Kaynağa Gözat

Merge pull request #106 from PaddlePaddle/develop_jason

Develop jason
Jason 5 yıl önce
ebeveyn
işleme
c254838962

+ 1 - 1
docs/apis/transforms/augment.md

@@ -10,7 +10,7 @@ PaddleX对于图像分类、目标检测、实例分割和语义分割内置了
 | :------- | :------------|
 | 图像分类 | [RandomCrop](cls_transforms.html#randomcrop)、[RandomHorizontalFlip](cls_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](cls_transforms.html#randomverticalflip)、 <br> [RandomRotate](cls_transforms.html#randomratate)、 [RandomDistort](cls_transforms.html#randomdistort) |
 |目标检测<br>实例分割| [RandomHorizontalFlip](det_transforms.html#randomhorizontalflip)、[RandomDistort](det_transforms.html#randomdistort)、[RandomCrop](det_transforms.html#randomcrop)、<br> [MixupImage](det_transforms.html#mixupimage)(仅支持YOLOv3模型)、[RandomExpand](det_transforms.html#randomexpand) |
-|语义分割  | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](seg_transforms.html#randomverticalflip)、[RandomRangeScaling](seg_transforms.html#randomrangescaling)、<br> [RandomStepScaling](seg_transforms.html#randomstepscaling)、[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)、 [RandomBlur](seg_transforms.html#randomblur)、<br> [RandomRotation](seg_transforms.html#randomrotation)、[RandomScaleAspect](seg_transforms.html#randomscaleaspect)、[RandomDistort](seg_transforms.html#randomdistort) |
+|语义分割  | [RandomHorizontalFlip](seg_transforms.html#randomhorizontalflip)、[RandomVerticalFlip](seg_transforms.html#randomverticalflip)、[RandomRangeScaling](seg_transforms.html#randomrangescaling)、<br> [RandomStepScaling](seg_transforms.html#randomstepscaling)、[RandomPaddingCrop](seg_transforms.html#randompaddingcrop)、 [RandomBlur](seg_transforms.html#randomblur)、<br> [RandomRotate](seg_transforms.html#randomrotate)、[RandomScaleAspect](seg_transforms.html#randomscaleaspect)、[RandomDistort](seg_transforms.html#randomdistort) |
 
 ## imgaug增强库的支持
 

+ 1 - 1
docs/apis/transforms/seg_transforms.md

@@ -120,7 +120,7 @@ paddlex.seg.transforms.RandomBlur(prob=0.1)
 * **prob** (float): 图像模糊概率。默认为0.1。
 
 
-## RandomRotation
+## RandomRotate
 ```python
 paddlex.seg.transforms.RandomRotate(rotate_range=15, im_padding_value=[127.5, 127.5, 127.5], label_padding_value=255)
 ```

+ 1 - 1
paddlex/__init__.py

@@ -53,4 +53,4 @@ log_level = 2
 
 from . import interpret
 
-__version__ = '1.0.2.github'
+__version__ = '1.0.4'

+ 4 - 4
paddlex/cv/datasets/dataset.py

@@ -209,8 +209,8 @@ def GenerateMiniBatch(batch_data):
     padding_batch = []
     for data in batch_data:
         im_c, im_h, im_w = data[0].shape[:]
-        padding_im = np.zeros((im_c, max_shape[1], max_shape[2]),
-                              dtype=np.float32)
+        padding_im = np.zeros(
+            (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
         padding_im[:, :im_h, :im_w] = data[0]
         padding_batch.append((padding_im, ) + data[1:])
     return padding_batch
@@ -226,8 +226,8 @@ class Dataset:
         if num_workers == 'auto':
             import multiprocessing as mp
             num_workers = mp.cpu_count() // 2 if mp.cpu_count() // 2 < 8 else 8
-        if platform.platform().startswith(
-                "Darwin") or platform.platform().startswith("Windows"):
+        if platform.platform().startswith("Darwin") or platform.platform(
+        ).startswith("Windows"):
             parallel_method = 'thread'
         if transforms is None:
             raise Exception("transform should be defined.")

+ 20 - 19
paddlex/cv/models/deeplabv3p.py

@@ -190,11 +190,6 @@ class DeepLabv3p(BaseAPI):
         if mode == 'train':
             self.optimizer.minimize(model_out)
             outputs['loss'] = model_out
-        elif mode == 'eval':
-            outputs['loss'] = model_out[0]
-            outputs['pred'] = model_out[1]
-            outputs['label'] = model_out[2]
-            outputs['mask'] = model_out[3]
         else:
             outputs['pred'] = model_out[0]
             outputs['logit'] = model_out[1]
@@ -336,18 +331,26 @@ class DeepLabv3p(BaseAPI):
         for step, data in tqdm.tqdm(
                 enumerate(data_generator()), total=total_steps):
             images = np.array([d[0] for d in data])
-            labels = np.array([d[1] for d in data])
+
+            _, _, im_h, im_w = images.shape
+            labels = list()
+            for d in data:
+                padding_label = np.zeros(
+                    (1, im_h, im_w)).astype('int64') + self.ignore_index
+                padding_label[:, :im_h, :im_w] = d[1]
+                labels.append(padding_label)
+            labels = np.array(labels)
+
             num_samples = images.shape[0]
             if num_samples < batch_size:
                 num_pad_samples = batch_size - num_samples
                 pad_images = np.tile(images[0:1], (num_pad_samples, 1, 1, 1))
                 images = np.concatenate([images, pad_images])
             feed_data = {'image': images}
-            outputs = self.exe.run(
-                self.parallel_test_prog,
-                feed=feed_data,
-                fetch_list=list(self.test_outputs.values()),
-                return_numpy=True)
+            outputs = self.exe.run(self.parallel_test_prog,
+                                   feed=feed_data,
+                                   fetch_list=list(self.test_outputs.values()),
+                                   return_numpy=True)
             pred = outputs[0]
             if num_samples < batch_size:
                 pred = pred[0:num_samples]
@@ -364,8 +367,7 @@ class DeepLabv3p(BaseAPI):
 
         metrics = OrderedDict(
             zip(['miou', 'category_iou', 'macc', 'category_acc', 'kappa'],
-                [miou, category_iou, macc, category_acc,
-                 conf_mat.kappa()]))
+                [miou, category_iou, macc, category_acc, conf_mat.kappa()]))
         if return_details:
             eval_details = {
                 'confusion_matrix': conf_mat.confusion_matrix.tolist()
@@ -394,10 +396,9 @@ class DeepLabv3p(BaseAPI):
                 transforms=self.test_transforms, mode='test')
             im, im_info = self.test_transforms(im_file)
         im = np.expand_dims(im, axis=0)
-        result = self.exe.run(
-            self.test_prog,
-            feed={'image': im},
-            fetch_list=list(self.test_outputs.values()))
+        result = self.exe.run(self.test_prog,
+                              feed={'image': im},
+                              fetch_list=list(self.test_outputs.values()))
         pred = result[0]
         pred = np.squeeze(pred).astype('uint8')
         logit = result[1]
@@ -413,6 +414,6 @@ class DeepLabv3p(BaseAPI):
                 pred = pred[0:h, 0:w]
                 logit = logit[0:h, 0:w, :]
             else:
-                raise Exception("Unexpected info '{}' in im_info".format(
-                    info[0]))
+                raise Exception("Unexpected info '{}' in im_info".format(info[
+                    0]))
         return {'label_map': pred, 'score_map': logit}

+ 8 - 8
paddlex/cv/nets/segmentation/deeplabv3p.py

@@ -135,7 +135,8 @@ class DeepLabv3p(object):
         param_attr = fluid.ParamAttr(
             name=name_scope + 'weights',
             regularizer=None,
-            initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06))
+            initializer=fluid.initializer.TruncatedNormal(
+                loc=0.0, scale=0.06))
         with scope('encoder'):
             channel = 256
             with scope("image_pool"):
@@ -151,8 +152,8 @@ class DeepLabv3p(object):
                         padding=0,
                         param_attr=param_attr))
                 input_shape = fluid.layers.shape(input)
-                image_avg = fluid.layers.resize_bilinear(
-                    image_avg, input_shape[2:])
+                image_avg = fluid.layers.resize_bilinear(image_avg,
+                                                         input_shape[2:])
 
             with scope("aspp0"):
                 aspp0 = bn_relu(
@@ -244,7 +245,8 @@ class DeepLabv3p(object):
         param_attr = fluid.ParamAttr(
             name=name_scope + 'weights',
             regularizer=None,
-            initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06))
+            initializer=fluid.initializer.TruncatedNormal(
+                loc=0.0, scale=0.06))
         with scope('decoder'):
             with scope('concat'):
                 decode_shortcut = bn_relu(
@@ -326,9 +328,6 @@ class DeepLabv3p(object):
         if self.mode == 'train':
             inputs['label'] = fluid.data(
                 dtype='int32', shape=[None, 1, None, None], name='label')
-        elif self.mode == 'eval':
-            inputs['label'] = fluid.data(
-                dtype='int32', shape=[None, 1, None, None], name='label')
         return inputs
 
     def build_net(self, inputs):
@@ -351,7 +350,8 @@ class DeepLabv3p(object):
             name=name_scope + 'weights',
             regularizer=fluid.regularizer.L2DecayRegularizer(
                 regularization_coeff=0.0),
-            initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01))
+            initializer=fluid.initializer.TruncatedNormal(
+                loc=0.0, scale=0.01))
         with scope('logit'):
             with fluid.name_scope('last_conv'):
                 logit = conv(

+ 59 - 0
paddlex/cv/transforms/cls_transforms.py

@@ -92,6 +92,12 @@ class Compose(ClsTransform):
                     outputs = (im, label)
         return outputs
 
+    def add_augmenters(self, augmenters):
+        if not isinstance(augmenters, list):
+            raise Exception(
+                "augmenters should be list type in func add_augmenters()")
+        self.transforms = augmenters + self.transforms.transforms
+
 
 class RandomCrop(ClsTransform):
     """对图像进行随机剪裁,模型训练时的数据增强操作。
@@ -461,3 +467,56 @@ class ArrangeClassifier(ClsTransform):
         else:
             outputs = (im, )
         return outputs
+
+
+class ComposedClsTransforms(Compose):
+    """ 分类模型的基础Transforms流程,具体如下
+        训练阶段:
+        1. 随机从图像中crop一块子图,并resize成crop_size大小
+        2. 将1的输出按0.5的概率随机进行水平翻转
+        3. 将图像进行归一化
+        验证/预测阶段:
+        1. 将图像按比例Resize,使得最小边长度为crop_size[0] * 1.14
+        2. 从图像中心crop出一个大小为crop_size的图像
+        3. 将图像进行归一化
+
+        Args:
+            mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
+            crop_size(int|list): 输入模型里的图像大小
+            mean(list): 图像均值
+            std(list): 图像方差
+    """
+
+    def __init__(self,
+                 mode,
+                 crop_size=[224, 224],
+                 mean=[0.485, 0.456, 0.406],
+                 std=[0.229, 0.224, 0.225]):
+        width = crop_size
+        if isinstance(crop_size, list):
+            if crop_size[0] != crop_size[1]:
+                raise Exception(
+                    "In classifier model, width and height should be equal, please modify your parameter `crop_size`"
+                )
+            width = crop_size[0]
+        if width % 32 != 0:
+            raise Exception(
+                "In classifier model, width and height should be multiple of 32, e.g 224、256、320...., please modify your parameter `crop_size`"
+            )
+
+        if mode == 'train':
+            # 训练时的transforms,包含数据增强
+            transforms = [
+                RandomCrop(crop_size=width), RandomHorizontalFlip(prob=0.5),
+                Normalize(
+                    mean=mean, std=std)
+            ]
+        else:
+            # 验证/预测时的transforms
+            transforms = [
+                ResizeByShort(short_size=int(width * 1.14)),
+                CenterCrop(crop_size=width), Normalize(
+                    mean=mean, std=std)
+            ]
+
+        super(ComposedClsTransforms, self).__init__(transforms)

+ 111 - 0
paddlex/cv/transforms/det_transforms.py

@@ -152,6 +152,12 @@ class Compose(DetTransform):
                     outputs = (im, im_info)
         return outputs
 
+    def add_augmenters(self, augmenters):
+        if not isinstance(augmenters, list):
+            raise Exception(
+                "augmenters should be list type in func add_augmenters()")
+        self.transforms = augmenters + self.transforms.transforms
+
 
 class ResizeByShort(DetTransform):
     """根据图像的短边调整图像大小(resize)。
@@ -1227,3 +1233,108 @@ class ArrangeYOLOv3(DetTransform):
             im_shape = im_info['image_shape']
             outputs = (im, im_shape)
         return outputs
+
+
+class ComposedRCNNTransforms(Compose):
+    """ RCNN模型(faster-rcnn/mask-rcnn)图像处理流程,具体如下,
+        训练阶段:
+        1. 随机以0.5的概率将图像水平翻转
+        2. 图像归一化
+        3. 图像按比例Resize,scale计算方式如下
+            scale = min_max_size[0] / short_size_of_image
+            if max_size_of_image * scale > min_max_size[1]:
+                scale = min_max_size[1] / max_size_of_image
+        4. 将3步骤的长宽进行padding,使得长宽为32的倍数
+        验证阶段:
+        1. 图像归一化
+        2. 图像按比例Resize,scale计算方式同上训练阶段
+        3. 将2步骤的长宽进行padding,使得长宽为32的倍数
+
+        Args:
+            mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
+            min_max_size(list): 图像在缩放时,最小边和最大边的约束条件
+            mean(list): 图像均值
+            std(list): 图像方差
+    """
+
+    def __init__(self,
+                 mode,
+                 min_max_size=[800, 1333],
+                 mean=[0.485, 0.456, 0.406],
+                 std=[0.229, 0.224, 0.225]):
+        if mode == 'train':
+            # 训练时的transforms,包含数据增强
+            transforms = [
+                RandomHorizontalFlip(prob=0.5), Normalize(
+                    mean=mean, std=std), ResizeByShort(
+                        short_size=min_max_size[0], max_size=min_max_size[1]),
+                Padding(coarsest_stride=32)
+            ]
+        else:
+            # 验证/预测时的transforms
+            transforms = [
+                Normalize(
+                    mean=mean, std=std), ResizeByShort(
+                        short_size=min_max_size[0], max_size=min_max_size[1]),
+                Padding(coarsest_stride=32)
+            ]
+
+        super(ComposedRCNNTransforms, self).__init__(transforms)
+
+
+class ComposedYOLOTransforms(Compose):
+    """YOLOv3模型的图像预处理流程,具体如下,
+        训练阶段:
+        1. 在前mixup_epoch轮迭代中,使用MixupImage策略,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#mixupimage
+        2. 对图像进行随机扰动,包括亮度,对比度,饱和度和色调
+        3. 随机扩充图像,见https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/det_transforms.html#randomexpand
+        4. 随机裁剪图像
+        5. 将4步骤的输出图像Resize成shape参数的大小
+        6. 随机0.5的概率水平翻转图像
+        7. 图像归一化
+        验证/预测阶段:
+        1. 将图像Resize成shape参数大小
+        2. 图像归一化
+
+        Args:
+            mode(str): 图像处理流程所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
+            shape(list): 输入模型中图像的大小,输入模型的图像会被Resize成此大小
+            mixup_epoch(int): 模型训练过程中,前mixup_epoch会使用mixup策略
+            mean(list): 图像均值
+            std(list): 图像方差
+    """
+
+    def __init__(self,
+                 mode,
+                 shape=[608, 608],
+                 mixup_epoch=250,
+                 mean=[0.485, 0.456, 0.406],
+                 std=[0.229, 0.224, 0.225]):
+        width = shape
+        if isinstance(shape, list):
+            if shape[0] != shape[1]:
+                raise Exception(
+                    "In YOLOv3 model, width and height should be equal")
+            width = shape[0]
+        if width % 32 != 0:
+            raise Exception(
+                "In YOLOv3 model, width and height should be multiple of 32, e.g 224、256、320...."
+            )
+
+        if mode == 'train':
+            # 训练时的transforms,包含数据增强
+            transforms = [
+                MixupImage(mixup_epoch=mixup_epoch), RandomDistort(),
+                RandomExpand(), RandomCrop(), Resize(
+                    target_size=width,
+                    interp='RANDOM'), RandomHorizontalFlip(), Normalize(
+                        mean=mean, std=std)
+            ]
+        else:
+            # 验证/预测时的transforms
+            transforms = [
+                Resize(
+                    target_size=width, interp='CUBIC'), Normalize(
+                        mean=mean, std=std)
+            ]
+        super(ComposedYOLOTransforms, self).__init__(transforms)

+ 42 - 0
paddlex/cv/transforms/seg_transforms.py

@@ -108,6 +108,12 @@ class Compose(SegTransform):
                     outputs = (im, im_info)
         return outputs
 
+    def add_augmenters(self, augmenters):
+        if not isinstance(augmenters, list):
+            raise Exception(
+                "augmenters should be list type in func add_augmenters()")
+        self.transforms = augmenters + self.transforms.transforms
+
 
 class RandomHorizontalFlip(SegTransform):
     """以一定的概率对图像进行水平翻转。当存在标注图像时,则同步进行翻转。
@@ -1088,3 +1094,39 @@ class ArrangeSegmenter(SegTransform):
             return (im, im_info)
         else:
             return (im, )
+
+
+class ComposedSegTransforms(Compose):
+    """ 语义分割模型(UNet/DeepLabv3p)的图像处理流程,具体如下
+        训练阶段:
+        1. 随机对图像以0.5的概率水平翻转
+        2. 按不同的比例随机Resize原图
+        3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小
+        4. 图像归一化
+        预测阶段:
+        1. 图像归一化
+
+        Args:
+            mode(str): 图像处理所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
+            train_crop_size(list): 模型训练阶段,随机从原图crop的大小
+            mean(list): 图像均值
+            std(list): 图像方差
+    """
+
+    def __init__(self,
+                 mode,
+                 train_crop_size=[769, 769],
+                 mean=[0.5, 0.5, 0.5],
+                 std=[0.5, 0.5, 0.5]):
+        if mode == 'train':
+            # 训练时的transforms,包含数据增强
+            transforms = [
+                RandomHorizontalFlip(prob=0.5), ResizeStepScaling(),
+                RandomPaddingCrop(crop_size=train_crop_size), Normalize(
+                    mean=mean, std=std)
+            ]
+        else:
+            # 验证/预测时的transforms
+            transforms = [Resize(512), Normalize(mean=mean, std=std)]
+
+        super(ComposedSegTransforms, self).__init__(transforms)

+ 0 - 2
paddlex/deploy.py

@@ -97,8 +97,6 @@ class Predictor:
             config.disable_glog_info()
         if memory_optimize:
             config.enable_memory_optim()
-        else:
-            config.diable_memory_optim()
 
         # 开启计算图分析优化,包括OP融合等
         config.switch_ir_optim(True)

+ 1 - 1
setup.py

@@ -19,7 +19,7 @@ long_description = "PaddleX. A end-to-end deeplearning model development toolkit
 
 setuptools.setup(
     name="paddlex",
-    version='1.0.2',
+    version='1.0.4',
     author="paddlex",
     author_email="paddlex@baidu.com",
     description=long_description,