浏览代码

Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleX into gui_compatibilization

will-jl944 4 年之前
父节点
当前提交
0e0c62a236

+ 14 - 8
dygraph/paddlex/command.py

@@ -80,7 +80,7 @@ def arg_parser():
         "--fixed_input_shape",
         "--fixed_input_shape",
         "-fs",
         "-fs",
         default=None,
         default=None,
-        help="export inference model with fixed input shape:[w,h]")
+        help="export inference model with fixed input shape:[w,h] or [n,3,w,h]")
     parser.add_argument(
     parser.add_argument(
         "--split_dataset",
         "--split_dataset",
         "-sd",
         "-sd",
@@ -136,15 +136,21 @@ def main():
 
 
         fixed_input_shape = None
         fixed_input_shape = None
         if args.fixed_input_shape is not None:
         if args.fixed_input_shape is not None:
-            fixed_input_shape = eval(args.fixed_input_shape)
-            assert len(
-                fixed_input_shape
-            ) == 2, "len of fixed input shape must == 2, such as [224,224]"
+            fixed_input_shape = list(eval(args.fixed_input_shape))
+            assert len(fixed_input_shape) in [
+                2, 4
+            ], "fixed_input_shape must be a list/tuple with length 2 or 4, such as [224,224] or [1,3,224,244]"
+            if len(fixed_input_shape) == 4:
+                assert fixed_input_shape[
+                    1] == 3, "input channel in fixed_input_shape must be 3, but recieved is {}".format(
+                        fixed_input_shape[1])
+            assert fixed_input_shape[-2] > 0 and fixed_input_shape[
+                -1] > 0, "input width and height must be a positive integer, but recievied is {}".format(
+                    fixed_input_shape[-2:])
+
             # input fixed_input_shape is [w,h]
             # input fixed_input_shape is [w,h]
             # export_inference_model needs [h,w]
             # export_inference_model needs [h,w]
-            fixed_input_shape = fixed_input_shape[-1::-1]
-        else:
-            fixed_input_shape = [-1, -1]
+            fixed_input_shape[-2:] = fixed_input_shape[-1:-3:-1]
 
 
         os.environ['PADDLEX_EXPORT_STAGE'] = 'True'
         os.environ['PADDLEX_EXPORT_STAGE'] = 'True'
         os.environ['PADDLESEG_EXPORT_STAGE'] = 'True'
         os.environ['PADDLESEG_EXPORT_STAGE'] = 'True'

+ 2 - 2
dygraph/paddlex/cv/models/base.py

@@ -479,10 +479,10 @@ class BaseModel:
         logging.info("Model is ready for quantization-aware training.")
         logging.info("Model is ready for quantization-aware training.")
         self.status = 'Quantized'
         self.status = 'Quantized'
 
 
-    def _export_inference_model(self, save_dir, image_shape=[-1, -1]):
+    def _export_inference_model(self, save_dir, image_shape=None):
         save_dir = osp.join(save_dir, 'inference_model')
         save_dir = osp.join(save_dir, 'inference_model')
         self.net.eval()
         self.net.eval()
-        self.test_inputs = self.get_test_inputs(image_shape)
+        self.test_inputs = self._get_test_inputs(image_shape)
 
 
         if self.status == 'Quantized':
         if self.status == 'Quantized':
             self.quantizer.save_quantized_model(self.net,
             self.quantizer.save_quantized_model(self.net,

+ 45 - 18
dygraph/paddlex/cv/models/classifier.py

@@ -24,6 +24,7 @@ from paddle.static import InputSpec
 from paddlex.utils import logging, TrainingStats, DisablePrint
 from paddlex.utils import logging, TrainingStats, DisablePrint
 from paddlex.cv.models.base import BaseModel
 from paddlex.cv.models.base import BaseModel
 from paddlex.cv.transforms import arrange_transforms
 from paddlex.cv.transforms import arrange_transforms
+from paddlex.cv.transforms.operators import Resize
 
 
 with DisablePrint():
 with DisablePrint():
     from PaddleClas.ppcls.modeling import architectures
     from PaddleClas.ppcls.modeling import architectures
@@ -52,7 +53,8 @@ class BaseClassifier(BaseModel):
     def __init__(self, model_name='ResNet50', num_classes=1000, **params):
     def __init__(self, model_name='ResNet50', num_classes=1000, **params):
         self.init_params = locals()
         self.init_params = locals()
         self.init_params.update(params)
         self.init_params.update(params)
-        del self.init_params['params']
+        if 'lr_mult_list' in self.init_params:
+            del self.init_params['lr_mult_list']
         super(BaseClassifier, self).__init__('classifier')
         super(BaseClassifier, self).__init__('classifier')
         if not hasattr(architectures, model_name):
         if not hasattr(architectures, model_name):
             raise Exception("ERROR: There's no model named {}.".format(
             raise Exception("ERROR: There's no model named {}.".format(
@@ -71,10 +73,22 @@ class BaseClassifier(BaseModel):
                 class_dim=self.num_classes, **params)
                 class_dim=self.num_classes, **params)
         return net
         return net
 
 
-    def get_test_inputs(self, image_shape):
+    def _fix_transforms_shape(self, image_shape):
+        if hasattr(self, 'test_transforms'):
+            if self.test_transforms is not None:
+                self.test_transforms.transforms.append(
+                    Resize(target_size=image_shape))
+
+    def _get_test_inputs(self, image_shape):
+        if image_shape is not None:
+            if len(image_shape) == 2:
+                image_shape = [None, 3] + image_shape
+            self._fix_transforms_shape(image_shape[-2:])
+        else:
+            image_shape = [None, 3, -1, -1]
         input_spec = [
         input_spec = [
             InputSpec(
             InputSpec(
-                shape=[None, 3] + image_shape, name='image', dtype='float32')
+                shape=image_shape, name='image', dtype='float32')
         ]
         ]
         return input_spec
         return input_spec
 
 
@@ -513,16 +527,21 @@ class AlexNet(BaseClassifier):
         super(AlexNet, self).__init__(
         super(AlexNet, self).__init__(
             model_name='AlexNet', num_classes=num_classes)
             model_name='AlexNet', num_classes=num_classes)
 
 
-    def get_test_inputs(self, image_shape):
-        if image_shape == [-1, -1]:
-            image_shape = [224, 224]
+    def _get_test_inputs(self, image_shape):
+        if image_shape is not None:
+            if len(image_shape) == 2:
+                image_shape = [None, 3] + image_shape
+        else:
+            image_shape = [None, 3, 224, 224]
             logging.info('When exporting inference model for {},'.format(
             logging.info('When exporting inference model for {},'.format(
                 self.__class__.__name__
                 self.__class__.__name__
-            ) + ' if image_shape is [-1, -1], it will be forcibly set to [224, 224]'
+            ) + ' if fixed_input_shape is not set, it will be forcibly set to [None, 3, 224, 224]'
                          )
                          )
+        self._fix_transforms_shape(image_shape[-2:])
+
         input_spec = [
         input_spec = [
             InputSpec(
             InputSpec(
-                shape=[None, 3] + image_shape, name='image', dtype='float32')
+                shape=image_shape, name='image', dtype='float32')
         ]
         ]
         return input_spec
         return input_spec
 
 
@@ -713,16 +732,20 @@ class ShuffleNetV2(BaseClassifier):
         super(ShuffleNetV2, self).__init__(
         super(ShuffleNetV2, self).__init__(
             model_name=model_name, num_classes=num_classes)
             model_name=model_name, num_classes=num_classes)
 
 
-    def get_test_inputs(self, image_shape):
-        if image_shape == [-1, -1]:
-            image_shape = [224, 224]
+    def _get_test_inputs(self, image_shape):
+        if image_shape is not None:
+            if len(image_shape) == 2:
+                image_shape = [None, 3] + image_shape
+        else:
+            image_shape = [None, 3, 224, 224]
             logging.info('When exporting inference model for {},'.format(
             logging.info('When exporting inference model for {},'.format(
                 self.__class__.__name__
                 self.__class__.__name__
-            ) + ' if image_shape is [-1, -1], it will be forcibly set to [224, 224]'
+            ) + ' if fixed_input_shape is not set, it will be forcibly set to [None, 3, 224, 224]'
                          )
                          )
+        self._fix_transforms_shape(image_shape[-2:])
         input_spec = [
         input_spec = [
             InputSpec(
             InputSpec(
-                shape=[None, 3] + image_shape, name='image', dtype='float32')
+                shape=image_shape, name='image', dtype='float32')
         ]
         ]
         return input_spec
         return input_spec
 
 
@@ -732,15 +755,19 @@ class ShuffleNetV2_swish(BaseClassifier):
         super(ShuffleNetV2_swish, self).__init__(
         super(ShuffleNetV2_swish, self).__init__(
             model_name='ShuffleNetV2_x1_5', num_classes=num_classes)
             model_name='ShuffleNetV2_x1_5', num_classes=num_classes)
 
 
-    def get_test_inputs(self, image_shape):
-        if image_shape == [-1, -1]:
-            image_shape = [224, 224]
+    def _get_test_inputs(self, image_shape):
+        if image_shape is not None:
+            if len(image_shape) == 2:
+                image_shape = [None, 3] + image_shape
+        else:
+            image_shape = [None, 3, 224, 224]
             logging.info('When exporting inference model for {},'.format(
             logging.info('When exporting inference model for {},'.format(
                 self.__class__.__name__
                 self.__class__.__name__
-            ) + ' if image_shape is [-1, -1], it will be forcibly set to [224, 224]'
+            ) + ' if fixed_input_shape is not set, it will be forcibly set to [None, 3, 224, 224]'
                          )
                          )
+        self._fix_transforms_shape(image_shape[-2:])
         input_spec = [
         input_spec = [
             InputSpec(
             InputSpec(
-                shape=[None, 3] + image_shape, name='image', dtype='float32')
+                shape=image_shape, name='image', dtype='float32')
         ]
         ]
         return input_spec
         return input_spec

+ 103 - 6
dygraph/paddlex/cv/models/detector.py

@@ -26,7 +26,7 @@ import ppdet
 from ppdet.modeling.proposal_generator.target_layer import BBoxAssigner, MaskAssigner
 from ppdet.modeling.proposal_generator.target_layer import BBoxAssigner, MaskAssigner
 import paddlex
 import paddlex
 import paddlex.utils.logging as logging
 import paddlex.utils.logging as logging
-from paddlex.cv.transforms.operators import _NormalizeBox, _PadBox, _BboxXYXY2XYWH
+from paddlex.cv.transforms.operators import _NormalizeBox, _PadBox, _BboxXYXY2XYWH, Resize, Padding
 from paddlex.cv.transforms.batch_operators import BatchCompose, BatchRandomResize, BatchRandomResizeByShort, _BatchPadding, _Gt2YoloTarget
 from paddlex.cv.transforms.batch_operators import BatchCompose, BatchRandomResize, BatchRandomResizeByShort, _BatchPadding, _Gt2YoloTarget
 from paddlex.cv.transforms import arrange_transforms
 from paddlex.cv.transforms import arrange_transforms
 from .base import BaseModel
 from .base import BaseModel
@@ -42,7 +42,6 @@ __all__ = [
 class BaseDetector(BaseModel):
 class BaseDetector(BaseModel):
     def __init__(self, model_name, num_classes=80, **params):
     def __init__(self, model_name, num_classes=80, **params):
         self.init_params.update(locals())
         self.init_params.update(locals())
-        del self.init_params['params']
         super(BaseDetector, self).__init__('detector')
         super(BaseDetector, self).__init__('detector')
         if not hasattr(ppdet.modeling, model_name):
         if not hasattr(ppdet.modeling, model_name):
             raise Exception("ERROR: There's no model named {}.".format(
             raise Exception("ERROR: There's no model named {}.".format(
@@ -58,15 +57,32 @@ class BaseDetector(BaseModel):
             net = ppdet.modeling.__dict__[self.model_name](**params)
             net = ppdet.modeling.__dict__[self.model_name](**params)
         return net
         return net
 
 
-    def get_test_inputs(self, image_shape):
+    def _fix_transforms_shape(self, image_shape):
+        raise NotImplementedError("_fix_transforms_shape: not implemented!")
+
+    def _get_test_inputs(self, image_shape):
+        if image_shape is not None:
+            if len(image_shape) == 2:
+                image_shape = [None, 3] + image_shape
+            if image_shape[-2] % 32 > 0 or image_shape[-1] % 32 > 0:
+                raise Exception(
+                    "Height and width in fixed_input_shape must be a multiple of 32, but recieved is {}.".
+                    format(image_shape[-2:]))
+            self._fix_transforms_shape(image_shape[-2:])
+        else:
+            image_shape = [None, 3, -1, -1]
+
         input_spec = [{
         input_spec = [{
             "image": InputSpec(
             "image": InputSpec(
-                shape=[None, 3] + image_shape, name='image', dtype='float32'),
+                shape=image_shape, name='image', dtype='float32'),
             "im_shape": InputSpec(
             "im_shape": InputSpec(
-                shape=[None, 2], name='im_shape', dtype='float32'),
+                shape=[image_shape[0], 2], name='im_shape', dtype='float32'),
             "scale_factor": InputSpec(
             "scale_factor": InputSpec(
-                shape=[None, 2], name='scale_factor', dtype='float32')
+                shape=[image_shape[0], 2],
+                name='scale_factor',
+                dtype='float32')
         }]
         }]
+
         return input_spec
         return input_spec
 
 
     def _get_backbone(self, backbone_name, **params):
     def _get_backbone(self, backbone_name, **params):
@@ -669,6 +685,29 @@ class YOLOv3(BaseDetector):
 
 
         return batch_transforms
         return batch_transforms
 
 
+    def _fix_transforms_shape(self, image_shape):
+        if hasattr(self, 'test_transforms'):
+            if self.test_transforms is not None:
+                has_resize_op = False
+                resize_op_idx = -1
+                normalize_op_idx = len(self.test_transforms.transforms)
+                for idx, op in enumerate(self.test_transforms.transforms):
+                    name = op.__class__.__name__
+                    if name == 'Resize':
+                        has_resize_op = True
+                        resize_op_idx = idx
+                    if name == 'Normalize':
+                        normalize_op_idx = idx
+
+                if not has_resize_op:
+                    self.test_transforms.transforms.insert(
+                        normalize_op_idx,
+                        Resize(
+                            target_size=image_shape, interp='CUBIC'))
+                else:
+                    self.test_transforms.transforms[
+                        resize_op_idx].target_size = image_shape
+
 
 
 class FasterRCNN(BaseDetector):
 class FasterRCNN(BaseDetector):
     def __init__(self,
     def __init__(self,
@@ -914,6 +953,35 @@ class FasterRCNN(BaseDetector):
 
 
         return batch_transforms
         return batch_transforms
 
 
+    def _fix_transforms_shape(self, image_shape):
+        if hasattr(self, 'test_transforms'):
+            if self.test_transforms is not None:
+                has_resize_op = False
+                resize_op_idx = -1
+                normalize_op_idx = len(self.test_transforms.transforms)
+                for idx, op in enumerate(self.test_transforms.transforms):
+                    name = op.__class__.__name__
+                    if name == 'ResizeByShort':
+                        has_resize_op = True
+                        resize_op_idx = idx
+                    if name == 'Normalize':
+                        normalize_op_idx = idx
+
+                if not has_resize_op:
+                    self.test_transforms.transforms.insert(
+                        normalize_op_idx,
+                        Resize(
+                            target_size=image_shape,
+                            keep_ratio=True,
+                            interp='CUBIC'))
+                else:
+                    self.test_transforms.transforms[resize_op_idx] = Resize(
+                        target_size=image_shape,
+                        keep_ratio=True,
+                        interp='CUBIC')
+                self.test_transforms.transforms.append(
+                    Padding(im_padding_value=[0., 0., 0.]))
+
 
 
 class PPYOLO(YOLOv3):
 class PPYOLO(YOLOv3):
     def __init__(self,
     def __init__(self,
@@ -1553,3 +1621,32 @@ class MaskRCNN(BaseDetector):
             collate_batch=collate_batch)
             collate_batch=collate_batch)
 
 
         return batch_transforms
         return batch_transforms
+
+    def _fix_transforms_shape(self, image_shape):
+        if hasattr(self, 'test_transforms'):
+            if self.test_transforms is not None:
+                has_resize_op = False
+                resize_op_idx = -1
+                normalize_op_idx = len(self.test_transforms.transforms)
+                for idx, op in enumerate(self.test_transforms.transforms):
+                    name = op.__class__.__name__
+                    if name == 'ResizeByShort':
+                        has_resize_op = True
+                        resize_op_idx = idx
+                    if name == 'Normalize':
+                        normalize_op_idx = idx
+
+                if not has_resize_op:
+                    self.test_transforms.transforms.insert(
+                        normalize_op_idx,
+                        Resize(
+                            target_size=image_shape,
+                            keep_ratio=True,
+                            interp='CUBIC'))
+                else:
+                    self.test_transforms.transforms[resize_op_idx] = Resize(
+                        target_size=image_shape,
+                        keep_ratio=True,
+                        interp='CUBIC')
+                self.test_transforms.transforms.append(
+                    Padding(im_padding_value=[0., 0., 0.]))

+ 26 - 6
dygraph/paddlex/cv/models/load_model.py

@@ -13,7 +13,7 @@
 # limitations under the License.
 # limitations under the License.
 
 
 import os.path as osp
 import os.path as osp
-
+import numpy as np
 import yaml
 import yaml
 import paddle
 import paddle
 import paddleslim
 import paddleslim
@@ -22,6 +22,29 @@ import paddlex.utils.logging as logging
 from paddlex.cv.transforms import build_transforms
 from paddlex.cv.transforms import build_transforms
 
 
 
 
+def load_rcnn_inference_model(model_dir):
+    paddle.enable_static()
+    exe = paddle.static.Executor(paddle.CPUPlace())
+    path_prefix = osp.join(model_dir, "model")
+    prog, _, _ = paddle.static.load_inference_model(path_prefix, exe)
+    paddle.disable_static()
+    extra_var_info = paddle.load(osp.join(model_dir, "model.pdiparams.info"))
+
+    net_state_dict = dict()
+    static_state_dict = dict()
+
+    for name, var in prog.state_dict().items():
+        static_state_dict[name] = np.array(var)
+    for var_name in static_state_dict:
+        if var_name not in extra_var_info:
+            continue
+        structured_name = extra_var_info[var_name].get('structured_name', None)
+        if structured_name is None:
+            continue
+        net_state_dict[structured_name] = static_state_dict[var_name]
+    return net_state_dict
+
+
 def load_model(model_dir):
 def load_model(model_dir):
     """
     """
     Load saved model from a given directory.
     Load saved model from a given directory.
@@ -43,7 +66,7 @@ def load_model(model_dir):
     if int(version.split('.')[0]) < 2:
     if int(version.split('.')[0]) < 2:
         raise Exception(
         raise Exception(
             'Current version is {}, a model trained by PaddleX={} cannot be load.'.
             'Current version is {}, a model trained by PaddleX={} cannot be load.'.
-            format(paddlex.version, version))
+            format(paddlex.__version__, version))
 
 
     status = model_info['status']
     status = model_info['status']
 
 
@@ -90,10 +113,7 @@ def load_model(model_dir):
 
 
         if status == 'Infer':
         if status == 'Infer':
             if model_info['Model'] in ['FasterRCNN', 'MaskRCNN']:
             if model_info['Model'] in ['FasterRCNN', 'MaskRCNN']:
-                net_state_dict = paddle.load(
-                    model_dir,
-                    params_filename='model.pdiparams',
-                    model_filename='model.pdmodel')
+                net_state_dict = load_rcnn_inference_model(model_dir)
             else:
             else:
                 net_state_dict = paddle.load(osp.join(model_dir, 'model'))
                 net_state_dict = paddle.load(osp.join(model_dir, 'model'))
         else:
         else:

+ 30 - 3
dygraph/paddlex/cv/models/segmenter.py

@@ -27,7 +27,7 @@ import paddlex.utils.logging as logging
 from .base import BaseModel
 from .base import BaseModel
 from .utils import seg_metrics as metrics
 from .utils import seg_metrics as metrics
 from paddlex.utils.checkpoint import seg_pretrain_weights_dict
 from paddlex.utils.checkpoint import seg_pretrain_weights_dict
-from paddlex.cv.transforms import Decode
+from paddlex.cv.transforms import Decode, Resize
 
 
 __all__ = ["UNet", "DeepLabV3P", "FastSCNN", "HRNet", "BiSeNetV2"]
 __all__ = ["UNet", "DeepLabV3P", "FastSCNN", "HRNet", "BiSeNetV2"]
 
 
@@ -58,10 +58,37 @@ class BaseSegmenter(BaseModel):
             num_classes=self.num_classes, **params)
             num_classes=self.num_classes, **params)
         return net
         return net
 
 
-    def get_test_inputs(self, image_shape):
+    def _fix_transforms_shape(self, image_shape):
+        if hasattr(self, 'test_transforms'):
+            if self.test_transforms is not None:
+                has_resize_op = False
+                resize_op_idx = -1
+                normalize_op_idx = len(self.test_transforms.transforms)
+                for idx, op in enumerate(self.test_transforms.transforms):
+                    name = op.__class__.__name__
+                    if name == 'Normalize':
+                        normalize_op_idx = idx
+                    if 'Resize' in name:
+                        has_resize_op = True
+                        resize_op_idx = idx
+
+                if not has_resize_op:
+                    self.test_transforms.transforms.insert(
+                        normalize_op_idx, Resize(target_size=image_shape))
+                else:
+                    self.test_transforms.transforms[resize_op_idx] = Resize(
+                        target_size=image_shape)
+
+    def _get_test_inputs(self, image_shape):
+        if image_shape is not None:
+            if len(image_shape) == 2:
+                image_shape = [None, 3] + image_shape
+            self._fix_transforms_shape(image_shape[-2:])
+        else:
+            image_shape = [None, 3, -1, -1]
         input_spec = [
         input_spec = [
             InputSpec(
             InputSpec(
-                shape=[None, 3] + image_shape, name='image', dtype='float32')
+                shape=image_shape, name='image', dtype='float32')
         ]
         ]
         return input_spec
         return input_spec
 
 

+ 27 - 18
dygraph/paddlex/cv/transforms/operators.py

@@ -212,13 +212,15 @@ class Resize(Transform):
             Otherwise, target_size represents [target height, target width].
             Otherwise, target_size represents [target height, target width].
         interp ({'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}, optional):
         interp ({'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}, optional):
             Interpolation method of resize. Defaults to 'LINEAR'.
             Interpolation method of resize. Defaults to 'LINEAR'.
+        keep_ratio (bool): the resize scale of width/height is same and width/height after resized is not greater
+            than target width/height. Defaults to False.
 
 
     Raises:
     Raises:
         TypeError: Invalid type of target_size.
         TypeError: Invalid type of target_size.
         ValueError: Invalid interpolation method.
         ValueError: Invalid interpolation method.
     """
     """
 
 
-    def __init__(self, target_size, interp='LINEAR'):
+    def __init__(self, target_size, interp='LINEAR', keep_ratio=False):
         super(Resize, self).__init__()
         super(Resize, self).__init__()
         if not (interp == "RANDOM" or interp in interp_dict):
         if not (interp == "RANDOM" or interp in interp_dict):
             raise ValueError("interp should be one of {}".format(
             raise ValueError("interp should be one of {}".format(
@@ -234,25 +236,22 @@ class Resize(Transform):
         # (height, width)
         # (height, width)
         self.target_size = target_size
         self.target_size = target_size
         self.interp = interp
         self.interp = interp
+        self.keep_ratio = keep_ratio
 
 
-    def apply_im(self, image, interp):
-        image = cv2.resize(
-            image, (self.target_size[1], self.target_size[0]),
-            interpolation=interp)
+    def apply_im(self, image, interp, target_size):
+        image = cv2.resize(image, target_size, interpolation=interp)
         return image
         return image
 
 
-    def apply_mask(self, mask):
-        mask = cv2.resize(
-            mask, (self.target_size[1], self.target_size[0]),
-            interpolation=cv2.INTER_NEAREST)
+    def apply_mask(self, mask, target_size):
+        mask = cv2.resize(mask, target_size, interpolation=cv2.INTER_NEAREST)
         return mask
         return mask
 
 
-    def apply_bbox(self, bbox, scale):
+    def apply_bbox(self, bbox, scale, target_size):
         im_scale_x, im_scale_y = scale
         im_scale_x, im_scale_y = scale
         bbox[:, 0::2] *= im_scale_x
         bbox[:, 0::2] *= im_scale_x
         bbox[:, 1::2] *= im_scale_y
         bbox[:, 1::2] *= im_scale_y
-        bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, self.target_size[1])
-        bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, self.target_size[0])
+        bbox[:, 0::2] = np.clip(bbox[:, 0::2], 0, target_size[0])
+        bbox[:, 1::2] = np.clip(bbox[:, 1::2], 0, target_size[1])
         return bbox
         return bbox
 
 
     def apply_segm(self, segms, im_size, scale):
     def apply_segm(self, segms, im_size, scale):
@@ -281,14 +280,22 @@ class Resize(Transform):
 
 
         im_scale_y = self.target_size[0] / im_h
         im_scale_y = self.target_size[0] / im_h
         im_scale_x = self.target_size[1] / im_w
         im_scale_x = self.target_size[1] / im_w
+        target_size = (self.target_size[1], self.target_size[0])
+        if self.keep_ratio:
+            scale = min(im_scale_y, im_scale_x)
+            target_w = int(round(im_w * scale))
+            target_h = int(round(im_h * scale))
+            target_size = (target_w, target_h)
+            im_scale_y = target_h / im_h
+            im_scale_x = target_w / im_w
 
 
-        sample['image'] = self.apply_im(sample['image'], interp)
+        sample['image'] = self.apply_im(sample['image'], interp, target_size)
 
 
         if 'mask' in sample:
         if 'mask' in sample:
-            sample['mask'] = self.apply_mask(sample['mask'])
+            sample['mask'] = self.apply_mask(sample['mask'], target_size)
         if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
         if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
-            sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'],
-                                                [im_scale_x, im_scale_y])
+            sample['gt_bbox'] = self.apply_bbox(
+                sample['gt_bbox'], [im_scale_x, im_scale_y], target_size)
         if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
         if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
             sample['gt_poly'] = self.apply_segm(
             sample['gt_poly'] = self.apply_segm(
                 sample['gt_poly'], [im_h, im_w], [im_scale_x, im_scale_y])
                 sample['gt_poly'], [im_h, im_w], [im_scale_x, im_scale_y])
@@ -913,7 +920,8 @@ class Padding(Transform):
                  pad_mode=0,
                  pad_mode=0,
                  offsets=None,
                  offsets=None,
                  im_padding_value=(127.5, 127.5, 127.5),
                  im_padding_value=(127.5, 127.5, 127.5),
-                 label_padding_value=255):
+                 label_padding_value=255,
+                 size_divisor=32):
         """
         """
         Pad image to a specified size or multiple of size_divisor.
         Pad image to a specified size or multiple of size_divisor.
 
 
@@ -923,6 +931,7 @@ class Padding(Transform):
                 if 0, only pad to right and bottom. If 1, pad according to center. If 2, only pad left and top. Defaults to 0.
                 if 0, only pad to right and bottom. If 1, pad according to center. If 2, only pad left and top. Defaults to 0.
             im_padding_value(Sequence[float]): RGB value of pad area. Defaults to (127.5, 127.5, 127.5).
             im_padding_value(Sequence[float]): RGB value of pad area. Defaults to (127.5, 127.5, 127.5).
             label_padding_value(int, optional): Filling value for the mask. Defaults to 255.
             label_padding_value(int, optional): Filling value for the mask. Defaults to 255.
+            size_divisor(int): Image width and height after padding is a multiple of size_divisor
         """
         """
         super(Padding, self).__init__()
         super(Padding, self).__init__()
         if isinstance(target_size, (list, tuple)):
         if isinstance(target_size, (list, tuple)):
@@ -940,7 +949,7 @@ class Padding(Transform):
             assert offsets, 'if pad_mode is -1, offsets should not be None'
             assert offsets, 'if pad_mode is -1, offsets should not be None'
 
 
         self.target_size = target_size
         self.target_size = target_size
-        self.size_divisor = 32
+        self.size_divisor = size_divisor
         self.pad_mode = pad_mode
         self.pad_mode = pad_mode
         self.offsets = offsets
         self.offsets = offsets
         self.im_padding_value = im_padding_value
         self.im_padding_value = im_padding_value