Эх сурвалжийг харах

Merge pull request #208 from PaddlePaddle/develop_del_doc

update code form doc branch, solve conflicts
Jason 5 жил өмнө
parent
commit
c61b7bcd58
32 өөрчлөгдсөн 706 нэмэгдсэн , 126 устгасан
  1. 48 0
      paddlex/command.py
  2. 2 1
      paddlex/cv/datasets/dataset.py
  3. 3 0
      paddlex/cv/datasets/easydata_cls.py
  4. 3 0
      paddlex/cv/datasets/easydata_det.py
  5. 3 0
      paddlex/cv/datasets/easydata_seg.py
  6. 2 0
      paddlex/cv/datasets/imagenet.py
  7. 3 1
      paddlex/cv/datasets/seg_dataset.py
  8. 3 0
      paddlex/cv/datasets/voc.py
  9. 17 21
      paddlex/cv/models/utils/visualize.py
  10. 32 21
      paddlex/cv/transforms/seg_transforms.py
  11. 1 0
      paddlex/tools/base.py
  12. 26 0
      paddlex/tools/convert.py
  13. 110 2
      paddlex/tools/x2coco.py
  14. 29 6
      paddlex/tools/x2imagenet.py
  15. 1 0
      paddlex/utils/__init__.py
  16. 14 5
      paddlex/utils/utils.py
  17. 12 22
      tutorials/train/image_classification/alexnet.py
  18. 2 7
      tutorials/train/image_classification/mobilenetv2.py
  19. 46 0
      tutorials/train/image_classification/mobilenetv3_small_ssld.py
  20. 46 0
      tutorials/train/image_classification/resnet50_vd_ssld.py
  21. 46 0
      tutorials/train/image_classification/shufflenetv2.py
  22. 54 0
      tutorials/train/instance_segmentation/mask_rcnn_hrnet_fpn.py
  23. 7 7
      tutorials/train/instance_segmentation/mask_rcnn_r50_fpn.py
  24. 55 0
      tutorials/train/object_detection/faster_rcnn_hrnet_fpn.py
  25. 6 10
      tutorials/train/object_detection/faster_rcnn_r50_fpn.py
  26. 4 5
      tutorials/train/object_detection/yolov3_darknet53.py
  27. 55 0
      tutorials/train/object_detection/yolov3_mobilenetv1.py
  28. 55 0
      tutorials/train/object_detection/yolov3_mobilenetv3.py
  29. 7 7
      tutorials/train/semantic_segmentation/deeplabv3p_mobilenetv2.py
  30. 9 3
      tutorials/train/semantic_segmentation/fast_scnn.py
  31. 2 2
      tutorials/train/semantic_segmentation/hrnet.py
  32. 3 6
      tutorials/train/semantic_segmentation/unet.py

+ 48 - 0
paddlex/command.py

@@ -51,6 +51,36 @@ def arg_parser():
         default=False,
         help="export onnx model for deployment")
     parser.add_argument(
+        "--data_conversion",
+        "-dc",
+        action="store_true",
+        default=False,
+        help="convert the dataset to the standard format")
+    parser.add_argument(
+        "--source",
+        "-se",
+        type=_text_type,
+        default=None,
+        help="define dataset format before the conversion")
+    parser.add_argument(
+        "--to",
+        "-to",
+        type=_text_type,
+        default=None,
+        help="define dataset format after the conversion")
+    parser.add_argument(
+        "--pics",
+        "-p",
+        type=_text_type,
+        default=None,
+        help="define pictures directory path")
+    parser.add_argument(
+        "--annotations",
+        "-a",
+        type=_text_type,
+        default=None,
+        help="define annotations directory path")
+    parser.add_argument(
         "--fixed_input_shape",
         "-fs",
         default=None,
@@ -105,6 +135,24 @@ def main():
                 "paddlex --export_inference --model_dir model_path --save_dir infer_model"
             )
         pdx.convertor.export_onnx_model(model, args.save_dir)
+        
+    if args.data_conversion:
+        assert args.source is not None, "--source should be defined while converting dataset"
+        assert args.to is not None, "--to should be defined to confirm the taregt dataset format"
+        assert args.pics is not None, "--pics should be defined to confirm the pictures path"
+        assert args.annotations is not None, "--annotations should be defined to confirm the annotations path"
+        assert args.save_dir is not None, "--save_dir should be defined to store taregt dataset"
+        if args.source == 'labelme' and args.to == 'ImageNet':
+            logging.error(
+                "The labelme dataset can not convert to the ImageNet dataset.",
+                exit=False)
+        if args.source == 'jingling' and args.to == 'PascalVOC':
+            logging.error(
+                "The jingling dataset can not convert to the PascalVOC dataset.",
+                exit=False)
+        pdx.tools.convert.dataset_conversion(args.source, args.to, 
+                                             args.pics, args.annotations, args.save_dir )
+        
 
 
 if __name__ == "__main__":

+ 2 - 1
paddlex/cv/datasets/dataset.py

@@ -46,7 +46,7 @@ def is_valid(sample):
                 return False
             elif isinstance(s, np.ndarray) and s.size == 0:
                 return False
-            elif isinstance(s, collections.Sequence) and len(s) == 0:
+            elif isinstance(s, collections.abc.Sequence) and len(s) == 0:
                 return False
     return True
 
@@ -55,6 +55,7 @@ def get_encoding(path):
     f = open(path, 'rb')
     data = f.read()
     file_encoding = chardet.detect(data).get('encoding')
+    f.close()
     return file_encoding
 
 

+ 3 - 0
paddlex/cv/datasets/easydata_cls.py

@@ -18,6 +18,7 @@ import random
 import copy
 import json
 import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
 from .imagenet import ImageNet
 from .dataset import is_pic
 from .dataset import get_encoding
@@ -68,6 +69,8 @@ class EasyDataCls(ImageNet):
             for line in f:
                 img_file, json_file = [osp.join(data_dir, x) \
                         for x in line.strip().split()[:2]]
+                img_file = path_normalization(img_file)
+                json_file = path_normalization(json_file)
                 if not is_pic(img_file):
                     continue
                 if not osp.isfile(json_file):

+ 3 - 0
paddlex/cv/datasets/easydata_det.py

@@ -20,6 +20,7 @@ import json
 import cv2
 import numpy as np
 import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
 from .voc import VOCDetection
 from .dataset import is_pic
 from .dataset import get_encoding
@@ -87,6 +88,8 @@ class EasyDataDet(VOCDetection):
             for line in f:
                 img_file, json_file = [osp.join(data_dir, x) \
                         for x in line.strip().split()[:2]]
+                img_file = path_normalization(img_file)
+                json_file = path_normalization(json_file)
                 if not is_pic(img_file):
                     continue
                 if not osp.isfile(json_file):

+ 3 - 0
paddlex/cv/datasets/easydata_seg.py

@@ -20,6 +20,7 @@ import json
 import cv2
 import numpy as np
 import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
 from .dataset import Dataset
 from .dataset import get_encoding
 from .dataset import is_pic
@@ -71,6 +72,8 @@ class EasyDataSeg(Dataset):
             for line in f:
                 img_file, json_file = [osp.join(data_dir, x) \
                         for x in line.strip().split()[:2]]
+                img_file = path_normalization(img_file)
+                json_file = path_normalization(json_file)
                 if not is_pic(img_file):
                     continue
                 if not osp.isfile(json_file):

+ 2 - 0
paddlex/cv/datasets/imagenet.py

@@ -17,6 +17,7 @@ import os.path as osp
 import random
 import copy
 import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
 from .dataset import Dataset
 from .dataset import is_pic
 from .dataset import get_encoding
@@ -66,6 +67,7 @@ class ImageNet(Dataset):
         with open(file_list, encoding=get_encoding(file_list)) as f:
             for line in f:
                 items = line.strip().split()
+                items[0] = path_normalization(items[0])
                 if not is_pic(items[0]):
                     continue
                 full_path = osp.join(data_dir, items[0])

+ 3 - 1
paddlex/cv/datasets/seg_dataset.py

@@ -17,6 +17,7 @@ import os.path as osp
 import random
 import copy
 import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
 from .dataset import Dataset
 from .dataset import get_encoding
 from .dataset import is_pic
@@ -61,10 +62,11 @@ class SegDataset(Dataset):
                 for line in f:
                     item = line.strip()
                     self.labels.append(item)
-
         with open(file_list, encoding=get_encoding(file_list)) as f:
             for line in f:
                 items = line.strip().split()
+                items[0] = path_normalization(items[0])
+                items[1] = path_normalization(items[1])
                 if not is_pic(items[0]):
                     continue
                 full_path_im = osp.join(data_dir, items[0])

+ 3 - 0
paddlex/cv/datasets/voc.py

@@ -22,6 +22,7 @@ import numpy as np
 from collections import OrderedDict
 import xml.etree.ElementTree as ET
 import paddlex.utils.logging as logging
+from paddlex.utils import path_normalization
 from .dataset import Dataset
 from .dataset import is_pic
 from .dataset import get_encoding
@@ -92,6 +93,8 @@ class VOCDetection(Dataset):
                     break
                 img_file, xml_file = [osp.join(data_dir, x) \
                         for x in line.strip().split()[:2]]
+                img_file = path_normalization(img_file)
+                xml_file = path_normalization(xml_file)
                 if not is_pic(img_file):
                     continue
                 if not osp.isfile(xml_file):

+ 17 - 21
paddlex/cv/models/utils/visualize.py

@@ -1,11 +1,11 @@
 # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -28,7 +28,7 @@ def visualize_detection(image, result, threshold=0.5, save_dir='./'):
     """
 
     if isinstance(image, np.ndarray):
-        image_name = str(int(time.time())) + '.jpg'
+        image_name = str(int(time.time() * 1000)) + '.jpg'
     else:
         image_name = os.path.split(image)[-1]
         image = cv2.imread(image)
@@ -64,7 +64,7 @@ def visualize_segmentation(image, result, weight=0.6, save_dir='./'):
 
     if isinstance(image, np.ndarray):
         im = image
-        image_name = str(int(time.time())) + '.jpg'
+        image_name = str(int(time.time() * 1000)) + '.jpg'
     else:
         image_name = os.path.split(image)[-1]
         im = cv2.imread(image)
@@ -145,8 +145,8 @@ def draw_bbox_mask(image, results, threshold=0.5):
         assert brightness_factor >= -1.0 and brightness_factor <= 1.0
         color = mplc.to_rgb(color)
         polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
-        modified_lightness = polygon_color[1] + (
-            brightness_factor * polygon_color[1])
+        modified_lightness = polygon_color[1] + (brightness_factor *
+                                                 polygon_color[1])
         modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
         modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
         modified_color = colorsys.hls_to_rgb(
@@ -161,8 +161,7 @@ def draw_bbox_mask(image, results, threshold=0.5):
     dpi = fig.get_dpi()
     fig.set_size_inches(
         (width * scale + 1e-2) / dpi,
-        (height * scale + 1e-2) / dpi,
-    )
+        (height * scale + 1e-2) / dpi, )
     canvas = FigureCanvasAgg(fig)
     ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
     ax.axis("off")
@@ -208,8 +207,7 @@ def draw_bbox_mask(image, results, threshold=0.5):
                 edgecolor=color,
                 linewidth=linewidth * scale,
                 alpha=0.8,
-                linestyle="-",
-            ))
+                linestyle="-", ))
 
         # draw mask
         if 'mask' in dt:
@@ -232,23 +230,22 @@ def draw_bbox_mask(image, results, threshold=0.5):
                         fill=True,
                         facecolor=mplc.to_rgb(color) + (alpha, ),
                         edgecolor=edge_color,
-                        linewidth=max(default_font_size // 15 * scale, 1),
-                    )
+                        linewidth=max(default_font_size // 15 * scale, 1), )
                     ax.add_patch(polygon)
 
         # draw label
         text_pos = (xmin, ymin)
         horiz_align = "left"
         instance_area = w * h
-        if (instance_area < _SMALL_OBJECT_AREA_THRESH * scale
-                or h < 40 * scale):
+        if (instance_area < _SMALL_OBJECT_AREA_THRESH * scale or
+                h < 40 * scale):
             if ymin >= height - 5:
                 text_pos = (xmin, ymin)
             else:
                 text_pos = (xmin, ymax)
         height_ratio = h / np.sqrt(height * width)
-        font_size = (np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 *
-                     default_font_size)
+        font_size = (np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2,
+                             2) * 0.5 * default_font_size)
         text = "{} {:.2f}".format(cname, score)
         color = np.maximum(list(mplc.to_rgb(color)), 0.2)
         color[np.argmax(color)] = max(0.8, np.max(color))
@@ -269,8 +266,7 @@ def draw_bbox_mask(image, results, threshold=0.5):
             horizontalalignment=horiz_align,
             color=color,
             zorder=10,
-            rotation=0,
-        )
+            rotation=0, )
 
     s, (width, height) = canvas.print_to_buffer()
     buffer = np.frombuffer(s, dtype="uint8")
@@ -408,8 +404,8 @@ def draw_pr_curve(eval_details_file=None,
             plt.plot(x, sr_array, color=color, label=nm, linewidth=1)
         plt.legend(loc="lower left", fontsize=5)
         plt.savefig(
-            os.path.join(save_dir, "./{}_pr_curve(iou-{}).png".format(
-                style, iou_thresh)),
+            os.path.join(save_dir,
+                         "./{}_pr_curve(iou-{}).png".format(style, iou_thresh)),
             dpi=800)
         plt.close()
 

+ 32 - 21
paddlex/cv/transforms/seg_transforms.py

@@ -1102,20 +1102,21 @@ class ArrangeSegmenter(SegTransform):
 class ComposedSegTransforms(Compose):
     """ 语义分割模型(UNet/DeepLabv3p)的图像处理流程,具体如下
         训练阶段:
-        1. 随机对图像以0.5的概率水平翻转
-        2. 按不同的比例随机Resize原图
+        1. 随机对图像以0.5的概率水平翻转,若random_horizontal_flip为False,则跳过此步骤
+        2. 按不同的比例随机Resize原图, 处理方式参考[paddlex.seg.transforms.ResizeRangeScaling](#resizerangescaling)。若min_max_size为None,则跳过此步骤
         3. 从原图中随机crop出大小为train_crop_size大小的子图,如若crop出来的图小于train_crop_size,则会将图padding到对应大小
         4. 图像归一化
-        预测阶段:
-        1. 图像归一化
+       预测阶段:
+        1. 将图像的最长边resize至(min_max_size[0] + min_max_size[1])//2, 短边按比例resize。若min_max_size为None,则跳过此步骤
+        2. 图像归一化
 
         Args:
-            mode(str): 图像处理所处阶段,训练/验证/预测,分别对应'train', 'eval', 'test'
-            min_max_size(list): 训练过程中,图像的最长边会随机resize至此区间(短边按比例相应resize);预测阶段,图像最长边会resize至此区间中间值,即(min_size+max_size)/2。默认为[400, 600]
-            train_crop_size(list): 仅在mode为'train`时生效,训练过程中,随机从图像中裁剪出对应大小的子图(如若原图小于此大小,则会padding到此大小),默认为[400, 600]
-            mean(list): 图像均值
-            std(list): 图像方差
-            random_horizontal_flip(bool): 数据增强方式,仅在mode为`train`时生效,表示训练过程是否随机水平翻转图像,默认为True
+            mode(str): Transforms所处的阶段,包括`train', 'eval'或'test'
+            min_max_size(list): 用于对图像进行resize,具体作用参见上述步骤。
+            train_crop_size(list): 训练过程中随机裁剪原图用于训练,具体作用参见上述步骤。此参数仅在mode为`train`时生效。
+            mean(list): 图像均值, 默认为[0.485, 0.456, 0.406]。
+            std(list): 图像方差,默认为[0.229, 0.224, 0.225]。
+            random_horizontal_flip(bool): 数据增强,是否随机水平翻转图像,此参数仅在mode为`train`时生效。
     """
 
     def __init__(self,
@@ -1127,19 +1128,29 @@ class ComposedSegTransforms(Compose):
                  random_horizontal_flip=True):
         if mode == 'train':
             # 训练时的transforms,包含数据增强
-            transforms = [
-                ResizeRangeScaling(
-                    min_value=min(min_max_size), max_value=max(min_max_size)),
-                RandomPaddingCrop(crop_size=train_crop_size), Normalize(
-                    mean=mean, std=std)
-            ]
+            if min_max_size is None:
+                transforms = [
+                    RandomPaddingCrop(crop_size=train_crop_size), Normalize(
+                        mean=mean, std=std)
+                ]
+            else:
+                transforms = [
+                    ResizeRangeScaling(
+                        min_value=min(min_max_size),
+                        max_value=max(min_max_size)),
+                    RandomPaddingCrop(crop_size=train_crop_size), Normalize(
+                        mean=mean, std=std)
+                ]
             if random_horizontal_flip:
                 transforms.insert(0, RandomHorizontalFlip())
         else:
             # 验证/预测时的transforms
-            long_size = (min(min_max_size) + max(min_max_size)) // 2
-            transforms = [
-                ResizeByLong(long_size=long_size), Normalize(
-                    mean=mean, std=std)
-            ]
+            if min_max_size is None:
+                transforms = [Normalize(mean=mean, std=std)]
+            else:
+                long_size = (min(min_max_size) + max(min_max_size)) // 2
+                transforms = [
+                    ResizeByLong(long_size=long_size), Normalize(
+                        mean=mean, std=std)
+                ]
         super(ComposedSegTransforms, self).__init__(transforms)

+ 1 - 0
paddlex/tools/base.py

@@ -40,4 +40,5 @@ def get_encoding(path):
     f = open(path, 'rb')
     data = f.read()
     file_encoding = chardet.detect(data).get('encoding')
+    f.close()
     return file_encoding

+ 26 - 0
paddlex/tools/convert.py

@@ -15,8 +15,10 @@
 # limitations under the License.
 
 from .x2imagenet import EasyData2ImageNet
+from .x2imagenet import JingLing2ImageNet
 from .x2coco import LabelMe2COCO
 from .x2coco import EasyData2COCO
+from .x2coco import JingLing2COCO
 from .x2voc import LabelMe2VOC
 from .x2voc import EasyData2VOC
 from .x2seg import JingLing2Seg
@@ -24,10 +26,34 @@ from .x2seg import LabelMe2Seg
 from .x2seg import EasyData2Seg
 
 easydata2imagenet = EasyData2ImageNet().convert
+jingling2imagenet = JingLing2ImageNet().convert
 labelme2coco = LabelMe2COCO().convert
 easydata2coco = EasyData2COCO().convert
+jingling2coco = JingLing2COCO().convert
 labelme2voc = LabelMe2VOC().convert
 easydata2voc = EasyData2VOC().convert
 jingling2seg = JingLing2Seg().convert
 labelme2seg = LabelMe2Seg().convert
 easydata2seg = EasyData2Seg().convert
+
+def dataset_conversion(source, to, pics, anns, save_dir):
+    if source == 'labelme' and to == 'PascalVOC':
+        labelme2voc(pics, anns, save_dir)
+    elif source == 'labelme' and to == 'MSCOCO':
+        labelme2coco(pics, anns, save_dir)
+    elif source == 'labelme' and to == 'SEG':
+        labelme2seg(pics, anns, save_dir)
+    elif source == 'jingling' and to == 'ImageNet':
+        jingling2imagenet(pics, anns, save_dir)
+    elif source == 'jingling' and to == 'MSCOCO':
+        jingling2coco(pics, anns, save_dir)
+    elif source == 'jingling' and to == 'SEG':
+        jingling2seg(pics, anns, save_dir)
+    elif source == 'easydata' and to == 'ImageNet':
+        easydata2imagenet(pics, anns, save_dir)
+    elif source == 'easydata' and to == 'PascalVOC':
+        easydata2voc(pics, anns, save_dir)
+    elif source == 'easydata' and to == 'MSCOCO':
+        easydata2coco(pics, anns, save_dir)
+    elif source == 'easydata' and to == 'SEG':
+        easydata2seg(pics, anns, save_dir)

+ 110 - 2
paddlex/tools/x2coco.py

@@ -22,6 +22,7 @@ import shutil
 import numpy as np
 import PIL.ImageDraw
 from .base import MyEncoder, is_pic, get_encoding
+from paddlex.utils import path_normalization
         
         
 class X2COCO(object):
@@ -100,6 +101,7 @@ class LabelMe2COCO(X2COCO):
         image["height"] = json_info["imageHeight"]
         image["width"] = json_info["imageWidth"]
         image["id"] = image_id + 1
+        json_info["imagePath"] = path_normalization(json_info["imagePath"])
         image["file_name"] = osp.split(json_info["imagePath"])[-1]
         return image
     
@@ -144,7 +146,7 @@ class LabelMe2COCO(X2COCO):
             img_name_part = osp.splitext(img_file)[0]
             json_file = osp.join(json_dir, img_name_part + ".json")
             if not osp.exists(json_file):
-                os.remove(os.remove(osp.join(image_dir, img_file)))
+                os.remove(osp.join(image_dir, img_file))
                 continue
             image_id = image_id + 1
             with open(json_file, mode='r', \
@@ -187,6 +189,7 @@ class EasyData2COCO(X2COCO):
         image["height"] = img.shape[0]
         image["width"] = img.shape[1]
         image["id"] = image_id + 1
+        img_path = path_normalization(img_path)
         image["file_name"] = osp.split(img_path)[-1]
         return image
     
@@ -216,7 +219,7 @@ class EasyData2COCO(X2COCO):
             img_name_part = osp.splitext(img_file)[0]
             json_file = osp.join(json_dir, img_name_part + ".json")
             if not osp.exists(json_file):
-                os.remove(os.remove(osp.join(image_dir, img_file)))
+                os.remove(osp.join(image_dir, img_file))
                 continue
             image_id = image_id + 1
             with open(json_file, mode='r', \
@@ -255,3 +258,108 @@ class EasyData2COCO(X2COCO):
                         self.annotations_list.append(
                             self.generate_polygon_anns_field(points, segmentation, label, image_id, object_id,
                                                 label_to_num))
+                        
+
+class JingLing2COCO(X2COCO):
+    """将使用EasyData标注的检测或分割数据集转换为COCO数据集。
+    """
+    def __init__(self):
+        super(JingLing2COCO, self).__init__()
+        
+    def generate_images_field(self, json_info, image_id):
+        image = {}
+        image["height"] = json_info["size"]["height"]
+        image["width"] = json_info["size"]["width"]
+        image["id"] = image_id + 1
+        json_info["path"] = path_normalization(json_info["path"])
+        image["file_name"] = osp.split(json_info["path"])[-1]
+        return image
+    
+    def generate_polygon_anns_field(self, height, width, 
+                                    points, label, image_id, 
+                                    object_id, label_to_num):
+        annotation = {}
+        annotation["segmentation"] = [list(np.asarray(points).flatten())]
+        annotation["iscrowd"] = 0
+        annotation["image_id"] = image_id + 1
+        annotation["bbox"] = list(map(float, self.get_bbox(height, width, points)))
+        annotation["area"] = annotation["bbox"][2] * annotation["bbox"][3]
+        annotation["category_id"] = label_to_num[label]
+        annotation["id"] = object_id + 1
+        return annotation
+    
+    def get_bbox(self, height, width, points):
+        polygons = points
+        mask = np.zeros([height, width], dtype=np.uint8)
+        mask = PIL.Image.fromarray(mask)
+        xy = list(map(tuple, polygons))
+        PIL.ImageDraw.Draw(mask).polygon(xy=xy, outline=1, fill=1)
+        mask = np.array(mask, dtype=bool)
+        index = np.argwhere(mask == 1)
+        rows = index[:, 0]
+        clos = index[:, 1]
+        left_top_r = np.min(rows)
+        left_top_c = np.min(clos)
+        right_bottom_r = np.max(rows)
+        right_bottom_c = np.max(clos)
+        return [
+            left_top_c, left_top_r, right_bottom_c - left_top_c,
+            right_bottom_r - left_top_r
+        ]
+        
+    def parse_json(self, img_dir, json_dir):
+        image_id = -1
+        object_id = -1
+        labels_list = []
+        label_to_num = {}
+        for img_file in os.listdir(img_dir):
+            img_name_part = osp.splitext(img_file)[0]
+            json_file = osp.join(json_dir, img_name_part + ".json")
+            if not osp.exists(json_file):
+                os.remove(osp.join(image_dir, img_file))
+                continue
+            image_id = image_id + 1
+            with open(json_file, mode='r', \
+                              encoding=get_encoding(json_file)) as j:
+                json_info = json.load(j)
+                img_info = self.generate_images_field(json_info, image_id)
+                self.images_list.append(img_info)
+                anns_type = "bndbox"
+                for i, obj in enumerate(json_info["outputs"]["object"]):
+                    if i == 0:
+                        if "polygon" in obj:
+                            anns_type = "polygon" 
+                    else:
+                        if anns_type not in obj:
+                            continue
+                    object_id = object_id + 1
+                    label = obj["name"]
+                    if label not in labels_list:
+                        self.categories_list.append(\
+                            self.generate_categories_field(label, labels_list))
+                        labels_list.append(label)
+                        label_to_num[label] = len(labels_list)
+                    if anns_type == "polygon":
+                        points = []
+                        for j in range(int(len(obj["polygon"]) / 2.0)):
+                            points.append([obj["polygon"]["x" + str(j + 1)], 
+                                           obj["polygon"]["y" + str(j + 1)]])
+                        self.annotations_list.append(
+                            self.generate_polygon_anns_field(json_info["size"]["height"], 
+                                                             json_info["size"]["width"], 
+                                                             points, 
+                                                             label, 
+                                                             image_id,
+                                                             object_id, 
+                                                             label_to_num))
+                    if anns_type == "bndbox":
+                        points = []
+                        points.append([obj["bndbox"]["xmin"], obj["bndbox"]["ymin"]])
+                        points.append([obj["bndbox"]["xmax"], obj["bndbox"]["ymax"]])
+                        points.append([obj["bndbox"]["xmin"], obj["bndbox"]["ymax"]])
+                        points.append([obj["bndbox"]["xmax"], obj["bndbox"]["ymin"]])
+                        self.annotations_list.append(
+                            self.generate_rectangle_anns_field(points, label, image_id,
+                                                  object_id, label_to_num))
+                        
+                        

+ 29 - 6
paddlex/tools/x2imagenet.py

@@ -22,9 +22,8 @@ import shutil
 import numpy as np
 from .base import MyEncoder, is_pic, get_encoding
 
-class EasyData2ImageNet(object):
-    """将使用EasyData标注的分类数据集转换为COCO数据集。
-    """
+
+class X2ImageNet(object):
     def __init__(self):
         pass
     
@@ -46,8 +45,8 @@ class EasyData2ImageNet(object):
                 continue
             with open(json_file, mode="r", \
                               encoding=get_encoding(json_file)) as j:
-                json_info = json.load(j)
-                for output in json_info['labels']:
+                json_info = self.get_json_info(j)
+                for output in json_info:
                     cls_name = output['name']
                     new_image_dir = osp.join(dataset_save_dir, cls_name)
                     if not osp.exists(new_image_dir):
@@ -55,4 +54,28 @@ class EasyData2ImageNet(object):
                     if is_pic(img_name):
                         shutil.copyfile(
                                     osp.join(image_dir, img_name),
-                                    osp.join(new_image_dir, img_name))
+                                    osp.join(new_image_dir, img_name))
+    
+
+class EasyData2ImageNet(X2ImageNet):
+    """将使用EasyData标注的分类数据集转换为ImageNet数据集。
+    """
+    def __init__(self):
+        super(EasyData2ImageNet, self).__init__()
+    
+    def get_json_info(self, json_file):
+        json_info = json.load(json_file)
+        json_info = json_info['labels']
+        return json_info
+                        
+class JingLing2ImageNet(X2ImageNet):
+    """将使用标注精灵标注的分类数据集转换为ImageNet数据集。
+    """
+    def __init__(self):
+        super(X2ImageNet, self).__init__()
+    
+    def get_json_info(self, json_file):
+        json_info = json.load(json_file)
+        json_info = json_info['outputs']['object']
+        return json_info
+    

+ 1 - 0
paddlex/utils/__init__.py

@@ -17,6 +17,7 @@ from . import logging
 from . import utils
 from . import save
 from .utils import seconds_to_hms
+from .utils import path_normalization
 from .download import download
 from .download import decompress
 from .download import download_and_decompress

+ 14 - 5
paddlex/utils/utils.py

@@ -20,6 +20,7 @@ import numpy as np
 import six
 import yaml
 import math
+import platform
 from . import logging
 
 
@@ -49,18 +50,26 @@ def get_environ_info():
                 info['num'] = fluid.core.get_cuda_device_count()
     return info
 
+def path_normalization(path):
+    win_sep = "\\"
+    other_sep = "/"
+    if platform.system() == "Windows":
+        path = win_sep.join(path.split(other_sep))
+    else:
+        path = other_sep.join(path.split(win_sep))
+    return path
 
 def parse_param_file(param_file, return_shape=True):
     from paddle.fluid.proto.framework_pb2 import VarType
     f = open(param_file, 'rb')
-    version = np.fromstring(f.read(4), dtype='int32')
-    lod_level = np.fromstring(f.read(8), dtype='int64')
+    version = np.frombuffer(f.read(4), dtype='int32')
+    lod_level = np.frombuffer(f.read(8), dtype='int64')
     for i in range(int(lod_level)):
-        _size = np.fromstring(f.read(8), dtype='int64')
+        _size = np.frombuffer(f.read(8), dtype='int64')
         _ = f.read(_size)
-    version = np.fromstring(f.read(4), dtype='int32')
+    version = np.frombuffer(f.read(4), dtype='int32')
     tensor_desc = VarType.TensorDesc()
-    tensor_desc_size = np.fromstring(f.read(4), dtype='int32')
+    tensor_desc_size = np.frombuffer(f.read(4), dtype='int32')
     tensor_desc.ParseFromString(f.read(int(tensor_desc_size)))
     tensor_shape = tuple(tensor_desc.dims)
     if return_shape:

+ 12 - 22
tutorials/train/classification/resnet50.py → tutorials/train/image_classification/alexnet.py

@@ -1,8 +1,4 @@
 import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-
-import paddle.fluid as fluid
 from paddlex.cls import transforms
 import paddlex as pdx
 
@@ -11,13 +7,13 @@ veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
 pdx.utils.download_and_decompress(veg_dataset, path='./')
 
 # 定义训练和验证时的transforms
-train_transforms = transforms.Compose(
-    [transforms.RandomCrop(crop_size=224),
-     transforms.Normalize()])
+train_transforms = transforms.Compose([
+    transforms.RandomCrop(crop_size=224), transforms.RandomHorizontalFlip(),
+    transforms.Normalize()
+])
 eval_transforms = transforms.Compose([
     transforms.ResizeByShort(short_size=256),
-    transforms.CenterCrop(crop_size=224),
-    transforms.Normalize()
+    transforms.CenterCrop(crop_size=224), transforms.Normalize()
 ])
 
 # 定义训练和验证所用的数据集
@@ -33,26 +29,20 @@ eval_dataset = pdx.datasets.ImageNet(
     label_list='vegetables_cls/labels.txt',
     transforms=eval_transforms)
 
-# PaddleX支持自定义构建优化器
-step_each_epoch = train_dataset.num_samples // 32
-learning_rate = fluid.layers.cosine_decay(
-    learning_rate=0.025, step_each_epoch=step_each_epoch, epochs=10)
-optimizer = fluid.optimizer.Momentum(
-    learning_rate=learning_rate,
-    momentum=0.9,
-    regularization=fluid.regularizer.L2Decay(4e-5))
-
 # 初始化模型,并进行训练
 # 可使用VisualDL查看训练指标
-# VisualDL启动方式: visualdl --logdir output/resnet50/vdl_log --port 8001
+# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001
 # 浏览器打开 https://0.0.0.0:8001即可
 # 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
-model = pdx.cls.ResNet50(num_classes=len(train_dataset.labels))
+model = pdx.cls.AlexNet(num_classes=len(train_dataset.labels))
+# AlexNet需要指定确定的input_shape
+model.fixed_input_shape = [224, 224]
 model.train(
     num_epochs=10,
     train_dataset=train_dataset,
     train_batch_size=32,
     eval_dataset=eval_dataset,
-    optimizer=optimizer,
-    save_dir='output/resnet50',
+    lr_decay_epochs=[4, 6, 8],
+    learning_rate=0.0025,
+    save_dir='output/alexnet',
     use_vdl=True)

+ 2 - 7
tutorials/train/classification/mobilenetv2.py → tutorials/train/image_classification/mobilenetv2.py

@@ -1,7 +1,4 @@
 import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-
 from paddlex.cls import transforms
 import paddlex as pdx
 
@@ -11,14 +8,12 @@ pdx.utils.download_and_decompress(veg_dataset, path='./')
 
 # 定义训练和验证时的transforms
 train_transforms = transforms.Compose([
-    transforms.RandomCrop(crop_size=224),
-    transforms.RandomHorizontalFlip(),
+    transforms.RandomCrop(crop_size=224), transforms.RandomHorizontalFlip(),
     transforms.Normalize()
 ])
 eval_transforms = transforms.Compose([
     transforms.ResizeByShort(short_size=256),
-    transforms.CenterCrop(crop_size=224),
-    transforms.Normalize()
+    transforms.CenterCrop(crop_size=224), transforms.Normalize()
 ])
 
 # 定义训练和验证所用的数据集

+ 46 - 0
tutorials/train/image_classification/mobilenetv3_small_ssld.py

@@ -0,0 +1,46 @@
+import os
+from paddlex.cls import transforms
+import paddlex as pdx
+
+# 下载和解压蔬菜分类数据集
+veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
+pdx.utils.download_and_decompress(veg_dataset, path='./')
+
+# 定义训练和验证时的transforms
+train_transforms = transforms.Compose([
+    transforms.RandomCrop(crop_size=224), transforms.RandomHorizontalFlip(),
+    transforms.Normalize()
+])
+eval_transforms = transforms.Compose([
+    transforms.ResizeByShort(short_size=256),
+    transforms.CenterCrop(crop_size=224), transforms.Normalize()
+])
+
+# 定义训练和验证所用的数据集
+train_dataset = pdx.datasets.ImageNet(
+    data_dir='vegetables_cls',
+    file_list='vegetables_cls/train_list.txt',
+    label_list='vegetables_cls/labels.txt',
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.ImageNet(
+    data_dir='vegetables_cls',
+    file_list='vegetables_cls/val_list.txt',
+    label_list='vegetables_cls/labels.txt',
+    transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+model = pdx.cls.MobileNetV3_small_ssld(num_classes=len(train_dataset.labels))
+model.train(
+    num_epochs=10,
+    train_dataset=train_dataset,
+    train_batch_size=32,
+    eval_dataset=eval_dataset,
+    lr_decay_epochs=[4, 6, 8],
+    learning_rate=0.025,
+    save_dir='output/mobilenetv3_small_ssld',
+    use_vdl=True)

+ 46 - 0
tutorials/train/image_classification/resnet50_vd_ssld.py

@@ -0,0 +1,46 @@
+import os
+from paddlex.cls import transforms
+import paddlex as pdx
+
+# 下载和解压蔬菜分类数据集
+veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
+pdx.utils.download_and_decompress(veg_dataset, path='./')
+
+# 定义训练和验证时的transforms
+train_transforms = transforms.Compose([
+    transforms.RandomCrop(crop_size=224), transforms.RandomHorizontalFlip(),
+    transforms.Normalize()
+])
+eval_transforms = transforms.Compose([
+    transforms.ResizeByShort(short_size=256),
+    transforms.CenterCrop(crop_size=224), transforms.Normalize()
+])
+
+# 定义训练和验证所用的数据集
+train_dataset = pdx.datasets.ImageNet(
+    data_dir='vegetables_cls',
+    file_list='vegetables_cls/train_list.txt',
+    label_list='vegetables_cls/labels.txt',
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.ImageNet(
+    data_dir='vegetables_cls',
+    file_list='vegetables_cls/val_list.txt',
+    label_list='vegetables_cls/labels.txt',
+    transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+model = pdx.cls.ResNet50_vd_ssld(num_classes=len(train_dataset.labels))
+model.train(
+    num_epochs=10,
+    train_dataset=train_dataset,
+    train_batch_size=32,
+    eval_dataset=eval_dataset,
+    lr_decay_epochs=[4, 6, 8],
+    learning_rate=0.025,
+    save_dir='output/resnet50_vd_ssld',
+    use_vdl=True)

+ 46 - 0
tutorials/train/image_classification/shufflenetv2.py

@@ -0,0 +1,46 @@
+import os
+from paddlex.cls import transforms
+import paddlex as pdx
+
+# 下载和解压蔬菜分类数据集
+veg_dataset = 'https://bj.bcebos.com/paddlex/datasets/vegetables_cls.tar.gz'
+pdx.utils.download_and_decompress(veg_dataset, path='./')
+
+# 定义训练和验证时的transforms
+train_transforms = transforms.Compose([
+    transforms.RandomCrop(crop_size=224), transforms.RandomHorizontalFlip(),
+    transforms.Normalize()
+])
+eval_transforms = transforms.Compose([
+    transforms.ResizeByShort(short_size=256),
+    transforms.CenterCrop(crop_size=224), transforms.Normalize()
+])
+
+# 定义训练和验证所用的数据集
+train_dataset = pdx.datasets.ImageNet(
+    data_dir='vegetables_cls',
+    file_list='vegetables_cls/train_list.txt',
+    label_list='vegetables_cls/labels.txt',
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.ImageNet(
+    data_dir='vegetables_cls',
+    file_list='vegetables_cls/val_list.txt',
+    label_list='vegetables_cls/labels.txt',
+    transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/mobilenetv2/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+model = pdx.cls.ShuffleNetV2(num_classes=len(train_dataset.labels))
+model.train(
+    num_epochs=10,
+    train_dataset=train_dataset,
+    train_batch_size=32,
+    eval_dataset=eval_dataset,
+    lr_decay_epochs=[4, 6, 8],
+    learning_rate=0.025,
+    save_dir='output/shufflenetv2',
+    use_vdl=True)

+ 54 - 0
tutorials/train/instance_segmentation/mask_rcnn_hrnet_fpn.py

@@ -0,0 +1,54 @@
+import os
+# 选择使用0号卡
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.det import transforms
+import paddlex as pdx
+
+# 下载和解压小度熊分拣数据集
+xiaoduxiong_dataset = 'https://bj.bcebos.com/paddlex/datasets/xiaoduxiong_ins_det.tar.gz'
+pdx.utils.download_and_decompress(xiaoduxiong_dataset, path='./')
+
+# 定义训练和验证时的transforms
+train_transforms = transforms.Compose([
+    transforms.RandomHorizontalFlip(), transforms.Normalize(),
+    transforms.ResizeByShort(
+        short_size=800, max_size=1333), transforms.Padding(coarsest_stride=32)
+])
+
+eval_transforms = transforms.Compose([
+    transforms.Normalize(),
+    transforms.ResizeByShort(
+        short_size=800, max_size=1333),
+    transforms.Padding(coarsest_stride=32),
+])
+
+# 定义训练和验证所用的数据集
+train_dataset = pdx.datasets.CocoDetection(
+    data_dir='xiaoduxiong_ins_det/JPEGImages',
+    ann_file='xiaoduxiong_ins_det/train.json',
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.CocoDetection(
+    data_dir='xiaoduxiong_ins_det/JPEGImages',
+    ann_file='xiaoduxiong_ins_det/val.json',
+    transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/mask_rcnn_r50_fpn/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
+num_classes = len(train_dataset.labels) + 1
+model = pdx.det.MaskRCNN(num_classes=num_classes, backbone='HRNet_W18')
+model.train(
+    num_epochs=12,
+    train_dataset=train_dataset,
+    train_batch_size=1,
+    eval_dataset=eval_dataset,
+    learning_rate=0.00125,
+    warmup_steps=10,
+    lr_decay_epochs=[8, 11],
+    save_dir='output/mask_rcnn_hrnet_fpn',
+    use_vdl=True)

+ 7 - 7
tutorials/train/detection/mask_rcnn_r50_fpn.py → tutorials/train/instance_segmentation/mask_rcnn_r50_fpn.py

@@ -11,16 +11,16 @@ pdx.utils.download_and_decompress(xiaoduxiong_dataset, path='./')
 
 # 定义训练和验证时的transforms
 train_transforms = transforms.Compose([
-    transforms.RandomHorizontalFlip(),
-    transforms.Normalize(),
-    transforms.ResizeByShort(short_size=800, max_size=1333),
-    transforms.Padding(coarsest_stride=32)
+    transforms.RandomHorizontalFlip(), transforms.Normalize(),
+    transforms.ResizeByShort(
+        short_size=800, max_size=1333), transforms.Padding(coarsest_stride=32)
 ])
 
 eval_transforms = transforms.Compose([
     transforms.Normalize(),
-    transforms.ResizeByShort(short_size=800, max_size=1333),
-    transforms.Padding(coarsest_stride=32)
+    transforms.ResizeByShort(
+        short_size=800, max_size=1333),
+    transforms.Padding(coarsest_stride=32),
 ])
 
 # 定义训练和验证所用的数据集
@@ -41,7 +41,7 @@ eval_dataset = pdx.datasets.CocoDetection(
 # 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
 # num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
 num_classes = len(train_dataset.labels) + 1
-model = pdx.det.MaskRCNN(num_classes=num_classes)
+model = pdx.det.MaskRCNN(num_classes=num_classes, backbone='ResNet50_vd')
 model.train(
     num_epochs=12,
     train_dataset=train_dataset,

+ 55 - 0
tutorials/train/object_detection/faster_rcnn_hrnet_fpn.py

@@ -0,0 +1,55 @@
+import os
+# 选择使用0号卡
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from paddlex.det import transforms
+import paddlex as pdx
+
+# 下载和解压昆虫检测数据集
+insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
+pdx.utils.download_and_decompress(insect_dataset, path='./')
+
+# 定义训练和验证时的transforms
+train_transforms = transforms.Compose([
+    transforms.RandomHorizontalFlip(), transforms.Normalize(),
+    transforms.ResizeByShort(
+        short_size=800, max_size=1333), transforms.Padding(coarsest_stride=32)
+])
+
+eval_transforms = transforms.Compose([
+    transforms.Normalize(),
+    transforms.ResizeByShort(
+        short_size=800, max_size=1333),
+    transforms.Padding(coarsest_stride=32),
+])
+
+# 定义训练和验证所用的数据集
+train_dataset = pdx.datasets.VOCDetection(
+    data_dir='insect_det',
+    file_list='insect_det/train_list.txt',
+    label_list='insect_det/labels.txt',
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.VOCDetection(
+    data_dir='insect_det',
+    file_list='insect_det/val_list.txt',
+    label_list='insect_det/labels.txt',
+    transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/faster_rcnn_r50_fpn/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
+num_classes = len(train_dataset.labels) + 1
+model = pdx.det.FasterRCNN(num_classes=num_classes, backbone='HRNet_W18')
+model.train(
+    num_epochs=12,
+    train_dataset=train_dataset,
+    train_batch_size=2,
+    eval_dataset=eval_dataset,
+    learning_rate=0.0025,
+    lr_decay_epochs=[8, 11],
+    save_dir='output/faster_rcnn_hrnet_fpn',
+    use_vdl=True)

+ 6 - 10
tutorials/train/detection/faster_rcnn_r50_fpn.py → tutorials/train/object_detection/faster_rcnn_r50_fpn.py

@@ -1,7 +1,4 @@
 import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-
 from paddlex.det import transforms
 import paddlex as pdx
 
@@ -11,18 +8,17 @@ pdx.utils.download_and_decompress(insect_dataset, path='./')
 
 # 定义训练和验证时的transforms
 train_transforms = transforms.Compose([
-    transforms.RandomHorizontalFlip(),
-    transforms.Normalize(),
-    transforms.ResizeByShort(short_size=800, max_size=1333),
-    transforms.Padding(coarsest_stride=32)
+    transforms.RandomHorizontalFlip(), transforms.Normalize(),
+    transforms.ResizeByShort(
+        short_size=800, max_size=1333), transforms.Padding(coarsest_stride=32)
 ])
 
 eval_transforms = transforms.Compose([
     transforms.Normalize(),
-    transforms.ResizeByShort(short_size=800, max_size=1333),
+    transforms.ResizeByShort(
+        short_size=800, max_size=1333),
     transforms.Padding(coarsest_stride=32),
 ])
-
 # 定义训练和验证所用的数据集
 train_dataset = pdx.datasets.VOCDetection(
     data_dir='insect_det',
@@ -43,7 +39,7 @@ eval_dataset = pdx.datasets.VOCDetection(
 # 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
 # num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
 num_classes = len(train_dataset.labels) + 1
-model = pdx.det.FasterRCNN(num_classes=num_classes)
+model = pdx.det.FasterRCNN(num_classes=num_classes, backbone='ResNet50_vd')
 model.train(
     num_epochs=12,
     train_dataset=train_dataset,

+ 4 - 5
tutorials/train/detection/yolov3_darknet53.py → tutorials/train/object_detection/yolov3_darknet53.py

@@ -1,7 +1,4 @@
 import os
-# 选择使用0号卡
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-
 from paddlex.det import transforms
 import paddlex as pdx
 
@@ -15,13 +12,15 @@ train_transforms = transforms.Compose([
     transforms.RandomDistort(),
     transforms.RandomExpand(),
     transforms.RandomCrop(),
-    transforms.Resize(target_size=608, interp='RANDOM'),
+    transforms.Resize(
+        target_size=608, interp='RANDOM'),
     transforms.RandomHorizontalFlip(),
     transforms.Normalize(),
 ])
 
 eval_transforms = transforms.Compose([
-    transforms.Resize(target_size=608, interp='CUBIC'),
+    transforms.Resize(
+        target_size=608, interp='CUBIC'),
     transforms.Normalize(),
 ])
 

+ 55 - 0
tutorials/train/object_detection/yolov3_mobilenetv1.py

@@ -0,0 +1,55 @@
+import os
+from paddlex.det import transforms
+import paddlex as pdx
+
+# 下载和解压昆虫检测数据集
+insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
+pdx.utils.download_and_decompress(insect_dataset, path='./')
+
+# 定义训练和验证时的transforms
+train_transforms = transforms.Compose([
+    transforms.MixupImage(mixup_epoch=250),
+    transforms.RandomDistort(),
+    transforms.RandomExpand(),
+    transforms.RandomCrop(),
+    transforms.Resize(
+        target_size=608, interp='RANDOM'),
+    transforms.RandomHorizontalFlip(),
+    transforms.Normalize(),
+])
+
+eval_transforms = transforms.Compose([
+    transforms.Resize(
+        target_size=608, interp='CUBIC'),
+    transforms.Normalize(),
+])
+
+# 定义训练和验证所用的数据集
+train_dataset = pdx.datasets.VOCDetection(
+    data_dir='insect_det',
+    file_list='insect_det/train_list.txt',
+    label_list='insect_det/labels.txt',
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.VOCDetection(
+    data_dir='insect_det',
+    file_list='insect_det/val_list.txt',
+    label_list='insect_det/labels.txt',
+    transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/yolov3_darknet/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+num_classes = len(train_dataset.labels)
+model = pdx.det.YOLOv3(num_classes=num_classes, backbone='MobileNetV1')
+model.train(
+    num_epochs=270,
+    train_dataset=train_dataset,
+    train_batch_size=8,
+    eval_dataset=eval_dataset,
+    learning_rate=0.000125,
+    lr_decay_epochs=[210, 240],
+    save_dir='output/yolov3_mobilenetv1',
+    use_vdl=True)

+ 55 - 0
tutorials/train/object_detection/yolov3_mobilenetv3.py

@@ -0,0 +1,55 @@
+import os
+from paddlex.det import transforms
+import paddlex as pdx
+
+# 下载和解压昆虫检测数据集
+insect_dataset = 'https://bj.bcebos.com/paddlex/datasets/insect_det.tar.gz'
+pdx.utils.download_and_decompress(insect_dataset, path='./')
+
+# 定义训练和验证时的transforms
+train_transforms = transforms.Compose([
+    transforms.MixupImage(mixup_epoch=250),
+    transforms.RandomDistort(),
+    transforms.RandomExpand(),
+    transforms.RandomCrop(),
+    transforms.Resize(
+        target_size=608, interp='RANDOM'),
+    transforms.RandomHorizontalFlip(),
+    transforms.Normalize(),
+])
+
+eval_transforms = transforms.Compose([
+    transforms.Resize(
+        target_size=608, interp='CUBIC'),
+    transforms.Normalize(),
+])
+
+# 定义训练和验证所用的数据集
+train_dataset = pdx.datasets.VOCDetection(
+    data_dir='insect_det',
+    file_list='insect_det/train_list.txt',
+    label_list='insect_det/labels.txt',
+    transforms=train_transforms,
+    shuffle=True)
+eval_dataset = pdx.datasets.VOCDetection(
+    data_dir='insect_det',
+    file_list='insect_det/val_list.txt',
+    label_list='insect_det/labels.txt',
+    transforms=eval_transforms)
+
+# 初始化模型,并进行训练
+# 可使用VisualDL查看训练指标
+# VisualDL启动方式: visualdl --logdir output/yolov3_darknet/vdl_log --port 8001
+# 浏览器打开 https://0.0.0.0:8001即可
+# 其中0.0.0.0为本机访问,如为远程服务, 改成相应机器IP
+num_classes = len(train_dataset.labels)
+model = pdx.det.YOLOv3(num_classes=num_classes, backbone='MobileNetV3_large')
+model.train(
+    num_epochs=270,
+    train_dataset=train_dataset,
+    train_batch_size=8,
+    eval_dataset=eval_dataset,
+    learning_rate=0.000125,
+    lr_decay_epochs=[210, 240],
+    save_dir='output/yolov3_mobilenetv3',
+    use_vdl=True)

+ 7 - 7
tutorials/train/segmentation/deeplabv3p.py → tutorials/train/semantic_segmentation/deeplabv3p_mobilenetv2.py

@@ -11,14 +11,14 @@ pdx.utils.download_and_decompress(optic_dataset, path='./')
 
 # 定义训练和验证时的transforms
 train_transforms = transforms.Compose([
-    transforms.RandomHorizontalFlip(),
-    transforms.Resize(target_size=512),
-    transforms.RandomPaddingCrop(crop_size=500),
-    transforms.Normalize()
+    transforms.RandomHorizontalFlip(), transforms.ResizeRangeScaling(),
+    transforms.RandomPaddingCrop(crop_size=512), transforms.Normalize()
 ])
 
-eval_transforms = transforms.Compose(
-    [transforms.Resize(512), transforms.Normalize()])
+eval_transforms = transforms.Compose([
+    transforms.ResizeByLong(long_size=512), transforms.Padding(target_size=512),
+    transforms.Normalize()
+])
 
 # 定义训练和验证所用的数据集
 train_dataset = pdx.datasets.SegDataset(
@@ -46,5 +46,5 @@ model.train(
     train_batch_size=4,
     eval_dataset=eval_dataset,
     learning_rate=0.01,
-    save_dir='output/deeplab',
+    save_dir='output/deeplabv3p_mobilenetv2',
     use_vdl=True)

+ 9 - 3
tutorials/train/segmentation/fast_scnn.py → tutorials/train/semantic_segmentation/fast_scnn.py

@@ -11,9 +11,15 @@ pdx.utils.download_and_decompress(optic_dataset, path='./')
 
 # 定义训练和验证时的transforms
 # API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/transforms/seg_transforms.html#composedsegtransforms
-train_transforms = transforms.ComposedSegTransforms(
-    mode='train', train_crop_size=[769, 769])
-eval_transforms = transforms.ComposedSegTransforms(mode='eval')
+train_transforms = transforms.Compose([
+    transforms.RandomHorizontalFlip(), transforms.ResizeRangeScaling(),
+    transforms.RandomPaddingCrop(crop_size=512), transforms.Normalize()
+])
+
+eval_transforms = transforms.Compose([
+    transforms.ResizeByLong(long_size=512), transforms.Padding(target_size=512),
+    transforms.Normalize()
+])
 
 # 定义训练和验证所用的数据集
 # API说明: https://paddlex.readthedocs.io/zh_CN/latest/apis/datasets/semantic_segmentation.html#segdataset

+ 2 - 2
tutorials/train/segmentation/hrnet.py → tutorials/train/semantic_segmentation/hrnet.py

@@ -16,8 +16,8 @@ train_transforms = transforms.Compose([
 ])
 
 eval_transforms = transforms.Compose([
-    transforms.ResizeByLong(long_size=512),
-    transforms.Padding(target_size=512), transforms.Normalize()
+    transforms.ResizeByLong(long_size=512), transforms.Padding(target_size=512),
+    transforms.Normalize()
 ])
 
 # 定义训练和验证所用的数据集

+ 3 - 6
tutorials/train/segmentation/unet.py → tutorials/train/semantic_segmentation/unet.py

@@ -11,15 +11,12 @@ pdx.utils.download_and_decompress(optic_dataset, path='./')
 
 # 定义训练和验证时的transforms
 train_transforms = transforms.Compose([
-    transforms.RandomHorizontalFlip(),
-    transforms.ResizeRangeScaling(),
-    transforms.RandomPaddingCrop(crop_size=512),
-    transforms.Normalize()
+    transforms.RandomHorizontalFlip(), transforms.ResizeRangeScaling(),
+    transforms.RandomPaddingCrop(crop_size=512), transforms.Normalize()
 ])
 
 eval_transforms = transforms.Compose([
-    transforms.ResizeByLong(long_size=512),
-    transforms.Padding(target_size=512),
+    transforms.ResizeByLong(long_size=512), transforms.Padding(target_size=512),
     transforms.Normalize()
 ])