浏览代码

support quality testing

FlyingQianMM 5 年之前
父节点
当前提交
fb6c35ee52

+ 0 - 8
paddlex/cv/datasets/dataset.py

@@ -205,14 +205,6 @@ def generate_minibatch(batch_data, label_padding_value=255, mapper=None):
             batch_data = op(batch_data)
     # if batch_size is 1, do not pad the image
     if len(batch_data) == 1:
-        #im = np.load('/home/luoqianhui/PaddleDetection/image.npy')
-        #im_info = np.load('/home/luoqianhui/PaddleDetection/im_info.npy')
-        #box = np.load('/home/luoqianhui/PaddleDetection/gt_bbox.npy')
-        #id = np.load('/home/luoqianhui/PaddleDetection/gt_class.npy')
-        #diff = np.load('/home/luoqianhui/PaddleDetection/difficult.npy')
-        #im_shape = np.array([1920,2560,1], dtype=np.float32)
-        #batch_data = [(im, im_info, box, im_shape, id, diff)]
-        #batch_data = [(im, im_info, box, id, diff)]
         return batch_data
     width = [data[0].shape[2] for data in batch_data]
     height = [data[0].shape[1] for data in batch_data]

+ 240 - 13
paddlex/cv/datasets/voc.py

@@ -19,6 +19,8 @@ import os.path as osp
 import random
 import re
 import numpy as np
+import cv2
+import json
 from collections import OrderedDict
 import xml.etree.ElementTree as ET
 import paddlex.utils.logging as logging
@@ -70,22 +72,24 @@ class VOCDetection(Dataset):
         annotations['categories'] = []
         annotations['annotations'] = []
 
-        cname2cid = OrderedDict()
+        self.cname2cid = OrderedDict()
+        self.cid2cname = OrderedDict()
         label_id = 1
         with open(label_list, 'r', encoding=get_encoding(label_list)) as fr:
             for line in fr.readlines():
-                cname2cid[line.strip()] = label_id
+                self.cname2cid[line.strip()] = label_id
+                self.cid2cname[label_id] = line.strip()
                 label_id += 1
                 self.labels.append(line.strip())
         logging.info("Starting to read file list from dataset...")
-        for k, v in cname2cid.items():
+        for k, v in self.cname2cid.items():
             annotations['categories'].append({
                 'supercategory': 'component',
                 'id': v,
                 'name': k
             })
         ct = 0
-        ann_ct = 0
+        self.ann_ct = 0
         with open(file_list, 'r', encoding=get_encoding(file_list)) as fr:
             while True:
                 line = fr.readline()
@@ -104,10 +108,12 @@ class VOCDetection(Dataset):
                 if not osp.isfile(xml_file):
                     continue
                 if not osp.exists(img_file):
-                    #raise IOError('The image file {} is not exist!'.format(
-                    #    img_file))
+                    logging.warning('The image file {} is not exist!'.format(
+                        img_file))
                     continue
                 if not osp.exists(xml_file):
+                    logging.warning('The annotation file {} is not exist!'.
+                                    format(xml_file))
                     continue
                 tree = ET.parse(xml_file)
                 if tree.find('id') is None:
@@ -144,9 +150,7 @@ class VOCDetection(Dataset):
                     name_tag = pattern.findall(str(ET.tostringlist(obj)))[0][
                         1:-1]
                     cname = obj.find(name_tag).text.strip()
-                    if cname in ['bu_dao_dian', 'jiao_wei_lou_di']:
-                        cname = 'lou_di'
-                    gt_class[i][0] = cname2cid[cname]
+                    gt_class[i][0] = self.cname2cid[cname]
                     pattern = re.compile('<difficult>', re.IGNORECASE)
                     diff_tag = pattern.findall(str(ET.tostringlist(obj)))[0][
                         1:-1]
@@ -187,11 +191,11 @@ class VOCDetection(Dataset):
                         'image_id': int(im_id[0]),
                         'bbox': [x1, y1, x2 - x1 + 1, y2 - y1 + 1],
                         'area': float((x2 - x1 + 1) * (y2 - y1 + 1)),
-                        'category_id': cname2cid[cname],
-                        'id': ann_ct,
+                        'category_id': self.cname2cid[cname],
+                        'id': self.ann_ct,
                         'difficult': _difficult
                     })
-                    ann_ct += 1
+                    self.ann_ct += 1
 
                 im_info = {
                     'im_id': im_id,
@@ -226,6 +230,12 @@ class VOCDetection(Dataset):
         self.coco_gt.createIndex()
 
     def add_negative_samples(self, image_dir):
+        """将背景图片加入训练
+
+        Args:
+            image_dir (str):背景图片所在的文件夹目录。
+
+        """
         import cv2
         if not osp.exists(image_dir):
             raise Exception("{} background images directory does not exist.".
@@ -245,7 +255,7 @@ class VOCDetection(Dataset):
 
             max_img_id += 1
             im_fname = osp.join(image_dir, image)
-            img_data = cv2.imread(im_fname)
+            img_data = cv2.imread(im_fname, cv2.IMREAD_UNCHANGED)
             im_h, im_w, im_c = img_data.shape
             im_info = {
                 'im_id': np.array([max_img_id]).astype('int32'),
@@ -263,6 +273,162 @@ class VOCDetection(Dataset):
             self.file_list.append([im_fname, coco_rec])
         self.num_samples = len(self.file_list)
 
+    def generate_image(self, templates, background, save_dir='dataset_clone'):
+        """将目标物体粘贴在背景图片上生成新的图片,并加入到数据集中
+
+        Args:
+            templates (list|tuple):可以将多张图像上的目标物体同时粘贴在同一个背景图片上,
+                因此templates是一个列表,其中每个元素是一个dict,表示一张图片的目标物体。
+                一张图片的目标物体有`image`和`annos`两个关键字,`image`的键值是图像的路径,
+                或者是解码后的排列格式为(H, W, C)且类型为uint8且为BGR格式的数组。
+                图像上可以有多个目标物体,因此`annos`的键值是一个列表,列表中每个元素是一个dict,
+                表示一个目标物体的信息。该dict包含`polygon`和`category`两个关键字,
+                其中`polygon`表示目标物体的边缘坐标,例如[[0, 0], [0, 1], [1, 1], [1, 0]],
+                `category`表示目标物体的类别,例如'dog'。
+            background (dict): 背景图片可以有真值,因此background是一个dict,包含`image`和`annos`
+                两个关键字,`image`的键值是背景图像的路径,或者是解码后的排列格式为(H, W, C)
+                且类型为uint8且为BGR格式的数组。若背景图片上没有真值,则`annos`的键值是空列表[],
+                若有,则`annos`的键值是由多个dict组成的列表,每个dict表示一个物体的信息,
+                包含`bbox`和`category`两个关键字,`bbox`的键值是物体框左上角和右下角的坐标,即
+                [x1, y1, x2, y2],`category`表示目标物体的类别,例如'dog'。
+            save_dir (str):新图片及其标注文件的存储目录。默认值为`dataset_clone`。
+
+        """
+        if not osp.exists(save_dir):
+            os.makedirs(save_dir)
+        image_dir = osp.join(save_dir, 'JPEGImages_clone')
+        anno_dir = osp.join(save_dir, 'Annotations_clone')
+        json_path = osp.join(save_dir, "annotations.json")
+        logging.info("Gegerated images will be saved in {}".format(image_dir))
+        logging.info(
+            "Annotation of generated images will be saved as xml files in {}".
+            format(anno_dir))
+        logging.info(
+            "Annotation of images (loaded before and generated now) will be saved as a COCO json file {}".
+            format(json_path))
+        if not osp.exists(image_dir):
+            os.makedirs(image_dir)
+        if not osp.exists(anno_dir):
+            os.makedirs(anno_dir)
+
+        num_objs = len(background['annos'])
+        for temp in templates:
+            num_objs += len(temp['annos'])
+
+        gt_bbox = np.zeros((num_objs, 4), dtype=np.float32)
+        gt_class = np.zeros((num_objs, 1), dtype=np.int32)
+        gt_score = np.ones((num_objs, 1), dtype=np.float32)
+        is_crowd = np.zeros((num_objs, 1), dtype=np.int32)
+        difficult = np.zeros((num_objs, 1), dtype=np.int32)
+        i = -1
+        for i, back_anno in enumerate(background['annos']):
+            gt_bbox[i] = back_anno['bbox']
+            gt_class[i][0] = self.cname2cid[back_anno['category']]
+
+        max_img_id = max(self.coco_gt.getImgIds())
+        max_img_id += 1
+
+        back_im = background['image']
+        if isinstance(back_im, np.ndarray):
+            if len(back_im.shape) != 3:
+                raise Exception(
+                    "background image should be 3-dimensions, but now is {}-dimensions".
+                    format(len(back_im.shape)))
+        else:
+            try:
+                back_im = cv2.imread(back_im, cv2.IMREAD_UNCHANGED)
+            except:
+                raise TypeError('Can\'t read The image file {}!'.format(
+                    back_im))
+        back_annos = background['annos']
+        im_h, im_w, im_c = back_im.shape
+        for temp in templates:
+            temp_im = temp['image']
+            if isinstance(temp_im, np.ndarray):
+                if len(temp_im.shape) != 3:
+                    raise Exception(
+                        "template image should be 3-dimensions, but now is {}-dimensions".
+                        format(len(temp_im.shape)))
+            else:
+                try:
+                    temp_im = cv2.imread(temp_im, cv2.IMREAD_UNCHANGED)
+                except:
+                    raise TypeError('Can\'t read The image file {}!'.format(
+                        temp_im))
+            temp_annos = temp['annos']
+            for temp_anno in temp_annos:
+                temp_mask = np.zeros(temp_im.shape, temp_im.dtype)
+                temp_poly = np.array(temp_anno['polygon'], np.int32)
+                temp_category = temp_anno['category']
+                cv2.fillPoly(temp_mask, [temp_poly], (255, 255, 255))
+                x_list = [temp_poly[i][0] for i in range(len(temp_poly))]
+                y_list = [temp_poly[i][1] for i in range(len(temp_poly))]
+                temp_poly_w = max(x_list) - min(x_list)
+                temp_poly_h = max(y_list) - min(y_list)
+                found = False
+                while not found:
+                    center_x = random.randint(1, im_w - 1)
+                    center_y = random.randint(1, im_h - 1)
+                    if center_x < temp_poly_w / 2 or center_x > im_w - temp_poly_w / 2 - 1 or \
+                       center_y < temp_poly_h / 2 or center_y > im_h - temp_poly_h / 2 - 1:
+                        found = False
+                        continue
+                    if len(back_annos) == 0:
+                        found = True
+                    for back_anno in back_annos:
+                        x1, y1, x2, y2 = back_anno['bbox']
+                        category = back_anno['category']
+                        if center_x > x1 and center_x < x2 and center_y > y1 and center_y < y2:
+                            found = False
+                            continue
+                        found = True
+                center = (center_x, center_y)
+                back_im = cv2.seamlessClone(temp_im, back_im, temp_mask,
+                                            center, cv2.MIXED_CLONE)
+                i += 1
+                x1 = center[0] - temp_poly_w / 2
+                x2 = center[0] + temp_poly_w / 2
+                y1 = center[1] - temp_poly_h / 2
+                y2 = center[1] + temp_poly_h / 2
+                gt_bbox[i] = [x1, y1, x2, y2]
+                gt_class[i][0] = self.cname2cid[temp_category]
+                self.ann_ct += 1
+                self.coco_gt.dataset['annotations'].append({
+                    'iscrowd': 0,
+                    'image_id': max_img_id,
+                    'bbox': [x1, y1, x2 - x1 + 1, y2 - y1 + 1],
+                    'area': float((x2 - x1 + 1) * (y2 - y1 + 1)),
+                    'category_id': self.cname2cid[temp_category],
+                    'id': self.ann_ct,
+                    'difficult': 0,
+                })
+        im_info = {
+            'im_id': np.array([max_img_id]).astype('int32'),
+            'image_shape': np.array([im_h, im_w]).astype('int32'),
+        }
+        label_info = {
+            'is_crowd': is_crowd,
+            'gt_class': gt_class,
+            'gt_bbox': gt_bbox,
+            'gt_score': gt_score,
+            'difficult': difficult,
+            'gt_poly': [],
+        }
+        self.coco_gt.dataset['images'].append({
+            'height': im_h,
+            'width': im_w,
+            'id': max_img_id,
+            'file_name': "clone_{:06d}.jpg".format(max_img_id)
+        })
+        coco_rec = (im_info, label_info)
+        im_fname = osp.join(image_dir, "clone_{:06d}.jpg".format(max_img_id))
+        cv2.imwrite(im_fname, back_im.astype('uint8'))
+        self._write_xml(im_fname, im_h, im_w, im_c, label_info, anno_dir)
+
+        self.file_list.append([im_fname, coco_rec])
+        self.num_samples = len(self.file_list)
+        self._write_json(self.coco_gt.dataset, save_dir)
+
     def iterator(self):
         self._epoch += 1
         self._pos = 0
@@ -288,3 +454,64 @@ class VOCDetection(Dataset):
             self._pos += 1
             sample = [f[0], im_info, label_info]
             yield sample
+
+    def _write_xml(self, im_fname, im_h, im_w, im_c, label_info, anno_dir):
+        is_crowd = label_info['is_crowd']
+        gt_class = label_info['gt_class']
+        gt_bbox = label_info['gt_bbox']
+        gt_score = label_info['gt_score']
+        gt_poly = label_info['gt_poly']
+        difficult = label_info['difficult']
+        import xml.dom.minidom as minidom
+        xml_doc = minidom.Document()
+        root = xml_doc.createElement("annotation")
+        xml_doc.appendChild(root)
+        node_filename = xml_doc.createElement("filename")
+        node_filename.appendChild(xml_doc.createTextNode(im_fname))
+        root.appendChild(node_filename)
+        node_size = xml_doc.createElement("size")
+        node_width = xml_doc.createElement("width")
+        node_width.appendChild(xml_doc.createTextNode(str(im_w)))
+        node_size.appendChild(node_width)
+        node_height = xml_doc.createElement("height")
+        node_height.appendChild(xml_doc.createTextNode(str(im_h)))
+        node_size.appendChild(node_height)
+        node_depth = xml_doc.createElement("depth")
+        node_depth.appendChild(xml_doc.createTextNode(str(im_c)))
+        node_size.appendChild(node_depth)
+        root.appendChild(node_size)
+        for i in range(label_info['gt_class'].shape[0]):
+            node_obj = xml_doc.createElement("object")
+            node_name = xml_doc.createElement("name")
+            label = self.cid2cname[gt_class[i][0]]
+            node_name.appendChild(xml_doc.createTextNode(label))
+            node_obj.appendChild(node_name)
+            node_diff = xml_doc.createElement("difficult")
+            node_diff.appendChild(xml_doc.createTextNode(str(difficult[i][0])))
+            node_obj.appendChild(node_diff)
+            node_box = xml_doc.createElement("bndbox")
+            node_xmin = xml_doc.createElement("xmin")
+            node_xmin.appendChild(xml_doc.createTextNode(str(gt_bbox[i][0])))
+            node_box.appendChild(node_xmin)
+            node_ymin = xml_doc.createElement("ymin")
+            node_ymin.appendChild(xml_doc.createTextNode(str(gt_bbox[i][1])))
+            node_box.appendChild(node_ymin)
+            node_xmax = xml_doc.createElement("xmax")
+            node_xmax.appendChild(xml_doc.createTextNode(str(gt_bbox[i][2])))
+            node_box.appendChild(node_xmax)
+            node_ymax = xml_doc.createElement("ymax")
+            node_ymax.appendChild(xml_doc.createTextNode(str(gt_bbox[i][3])))
+            node_box.appendChild(node_ymax)
+            node_obj.appendChild(node_box)
+            root.appendChild(node_obj)
+        img_name_part = osp.split(im_fname)[-1].split('.')[0]
+        with open(osp.join(anno_dir, img_name_part + ".xml"), 'w') as fxml:
+            xml_doc.writexml(
+                fxml, indent='\t', addindent='\t', newl='\n', encoding="utf-8")
+
+    def _write_json(self, coco_gt, save_dir):
+        from paddlex.tools.base import MyEncoder
+        json_path = osp.join(save_dir, "annotations.json")
+        f = open(json_path, "w")
+        json.dump(coco_gt, f, indent=4, cls=MyEncoder)
+        f.close()

+ 12 - 8
paddlex/cv/models/base.py

@@ -34,9 +34,6 @@ from os import path as osp
 from paddle.fluid.framework import Program
 from .utils.pretrain_weights import get_pretrain_weights
 
-#fluid.default_startup_program().random_seed = 1000
-#fluid.default_main_program().random_seed = 1000
-
 
 def dict2str(dict_input):
     out = ''
@@ -138,11 +135,15 @@ class BaseAPI:
                            batch_size=1,
                            batch_num=10,
                            cache_dir="./temp"):
+        input_channel = 3
+        if hasattr(self, 'input_channel'):
+            input_channel = self.input_channel
         arrange_transforms(
             model_type=self.model_type,
             class_name=self.__class__.__name__,
             transforms=dataset.transforms,
-            mode='quant')
+            mode='quant',
+            input_channel=input_channel)
         dataset.num_samples = batch_size * batch_num
         try:
             from .slim.post_quantization import PaddleXPostTrainingQuantization
@@ -422,11 +423,15 @@ class BaseAPI:
             from visualdl import LogWriter
             vdl_logdir = osp.join(save_dir, 'vdl_log')
         # 给transform添加arrange操作
+        input_channel = 3
+        if hasattr(self, 'input_channel'):
+            input_channel = self.input_channel
         arrange_transforms(
             model_type=self.model_type,
             class_name=self.__class__.__name__,
             transforms=train_dataset.transforms,
-            mode='train')
+            mode='train',
+            input_channel=input_channel)
         # 构建train_data_loader
         self.build_train_data_loader(
             dataset=train_dataset, batch_size=train_batch_size)
@@ -547,7 +552,7 @@ class BaseAPI:
             time_train_one_epoch = time.time() - epoch_start_time
             epoch_start_time = time.time()
 
-            ## 每间隔save_interval_epochs, 在验证集上评估和对模型进行保存
+            # 每间隔save_interval_epochs, 在验证集上评估和对模型进行保存
             self.completed_epochs += 1
             eval_epoch_start_time = time.time()
             if (i + 1) % save_interval_epochs == 0 or i == num_epochs - 1:
@@ -581,7 +586,7 @@ class BaseAPI:
                                     continue
                             log_writer.add_scalar(
                                 "Metrics/Eval(Epoch): {}".format(k), v, i + 1)
-                #self.save_model(save_dir=current_save_dir)
+                self.save_model(save_dir=current_save_dir)
                 if getattr(self, 'use_ema', False):
                     self.exe.run(self.ema.restore_program)
                 time_eval_one_epoch = time.time() - eval_epoch_start_time
@@ -594,4 +599,3 @@ class BaseAPI:
                 if eval_dataset is not None and early_stop:
                     if earlystop(current_accuracy):
                         break
-            #return

+ 40 - 9
paddlex/cv/models/faster_rcnn.py

@@ -59,7 +59,12 @@ class FasterRCNN(BaseAPI):
                  softnms_sigma=0.5,
                  post_threshold=0.05,
                  bbox_assigner='BBoxAssigner',
-                 fpn_num_channels=256):
+                 fpn_num_channels=256,
+                 input_channel=3,
+                 rpn_batch_size_per_im=256,
+                 rpn_fg_fraction=0.5,
+                 test_pre_nms_top_n=None,
+                 test_post_nms_top_n=1000):
         self.init_params = locals()
         super(FasterRCNN, self).__init__('detector')
         backbones = [
@@ -91,6 +96,11 @@ class FasterRCNN(BaseAPI):
         self.post_threshold = post_threshold
         self.bbox_assigner = bbox_assigner
         self.fpn_num_channels = fpn_num_channels
+        self.input_channel = input_channel
+        self.rpn_batch_size_per_im = rpn_batch_size_per_im
+        self.rpn_fg_fraction = rpn_fg_fraction
+        self.test_pre_nms_top_n = test_pre_nms_top_n
+        self.test_post_nms_top_n = test_post_nms_top_n
 
     def _get_backbone(self, backbone_name):
         norm_type = None
@@ -151,6 +161,8 @@ class FasterRCNN(BaseAPI):
     def build_net(self, mode='train'):
         train_pre_nms_top_n = 2000 if self.with_fpn else 12000
         test_pre_nms_top_n = 1000 if self.with_fpn else 6000
+        if self.test_pre_nms_top_n is not None:
+            test_pre_nms_top_n = self.test_pre_nms_top_n
         model = paddlex.cv.nets.detection.FasterRCNN(
             backbone=self._get_backbone(self.backbone),
             mode=mode,
@@ -172,7 +184,11 @@ class FasterRCNN(BaseAPI):
             post_threshold=self.post_threshold,
             softnms_sigma=self.softnms_sigma,
             bbox_assigner=self.bbox_assigner,
-            fpn_num_channels=self.fpn_num_channels)
+            fpn_num_channels=self.fpn_num_channels,
+            input_channel=self.input_channel,
+            rpn_batch_size_per_im=self.rpn_batch_size_per_im,
+            rpn_fg_fraction=self.rpn_fg_fraction,
+            test_post_nms_top_n=self.test_post_nms_top_n)
         inputs = model.generate_inputs()
         if mode == 'train':
             model_out = model.build_net(inputs)
@@ -214,7 +230,6 @@ class FasterRCNN(BaseAPI):
             end_lr=learning_rate)
         optimizer = fluid.optimizer.Momentum(
             learning_rate=lr_warmup,
-            #learning_rate=lr_decay,
             momentum=0.9,
             regularization=fluid.regularizer.L2Decay(1e-04))
         return optimizer
@@ -238,7 +253,9 @@ class FasterRCNN(BaseAPI):
               use_vdl=False,
               early_stop=False,
               early_stop_patience=5,
-              resume_checkpoint=None):
+              resume_checkpoint=None,
+              sensitivities_file=None,
+              eval_metric_loss=0.05):
         """训练。
 
         Args:
@@ -266,6 +283,9 @@ class FasterRCNN(BaseAPI):
             early_stop_patience (int): 当使用提前终止训练策略时,如果验证集精度在`early_stop_patience`个epoch内
                 连续下降或持平,则终止训练。默认值为5。
             resume_checkpoint (str): 恢复训练时指定上次训练保存的模型路径。若为None,则不会恢复训练。默认值为None。
+            sensitivities_file (str): 若指定为路径时,则加载路径下敏感度信息进行裁剪;若为字符串'DEFAULT',
+                则自动下载在ImageNet图片数据上获得的敏感度信息进行裁剪;若为None,则不进行裁剪。默认为None。
+            eval_metric_loss (float): 可容忍的精度损失。默认为0.05。
 
         Raises:
             ValueError: 评估类型不在指定列表中。
@@ -307,7 +327,9 @@ class FasterRCNN(BaseAPI):
             pretrain_weights=pretrain_weights,
             fuse_bn=fuse_bn,
             save_dir=save_dir,
-            resume_checkpoint=resume_checkpoint)
+            resume_checkpoint=resume_checkpoint,
+            sensitivities_file=sensitivities_file,
+            eval_metric_loss=eval_metric_loss)
 
         # 训练
         self.train_loop(
@@ -343,14 +365,19 @@ class FasterRCNN(BaseAPI):
             tuple (metrics, eval_details) /dict (metrics): 当return_details为True时,返回(metrics, eval_details),
                 当return_details为False时,返回metrics。metrics为dict,包含关键字:'bbox_mmap'或者’bbox_map‘,
                 分别表示平均准确率平均值在各个阈值下的结果取平均值的结果(mmAP)、平均准确率平均值(mAP)。
-                eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、
-                预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。
+                eval_details为dict,包含bbox和gt两个关键字。其中关键字bbox的键值是一个列表,列表中每个元素代表一个预测结果,
+                一个预测结果是一个由图像id,预测框类别id, 预测框坐标,预测框得分组成的列表。而关键字gt的键值是真实标注框的相关信息。
         """
+
+        input_channel = 3
+        if hasattr(self, 'input_channel'):
+            input_channel = self.input_channel
         arrange_transforms(
             model_type=self.model_type,
             class_name=self.__class__.__name__,
             transforms=eval_dataset.transforms,
-            mode='eval')
+            mode='eval',
+            input_channel=input_channel)
         if metric is None:
             if hasattr(self, 'metric') and self.metric is not None:
                 metric = self.metric
@@ -433,11 +460,15 @@ class FasterRCNN(BaseAPI):
                     model_type,
                     class_name,
                     thread_pool=None):
+        input_channel = 3
+        if hasattr(self, input_channel):
+            input_channel = self.input_channel
         arrange_transforms(
             model_type=model_type,
             class_name=class_name,
             transforms=transforms,
-            mode='test')
+            mode='test',
+            input_channel=input_channel)
         if thread_pool is not None:
             batch_data = thread_pool.map(transforms, images)
         else:

+ 5 - 3
paddlex/cv/models/mask_rcnn.py

@@ -251,9 +251,11 @@ class MaskRCNN(FasterRCNN):
                 当return_details为False时,返回metrics。metrics为dict,包含关键字:'bbox_mmap'和'segm_mmap'
                 或者’bbox_map‘和'segm_map',分别表示预测框和分割区域平均准确率平均值在
                 各个IoU阈值下的结果取平均值的结果(mmAP)、平均准确率平均值(mAP)。eval_details为dict,
-                包含关键字:'bbox',对应元素预测框结果列表,每个预测结果由图像id、预测框类别id、
-                预测框坐标、预测框得分;'mask',对应元素预测区域结果列表,每个预测结果由图像id、
-                预测区域类别id、预测区域坐标、预测区域得分;’gt‘:真实标注框和标注区域相关信息。
+                包含bbox、mask和gt三个关键字。其中关键字bbox的键值是一个列表,列表中每个元素代表一个预测结果,
+                一个预测结果是一个由图像id,预测框类别id, 预测框坐标,预测框得分组成的列表。
+                关键字mask的键值是一个列表,列表中每个元素代表各预测框内物体的分割结果,分割结果由图像id、
+                预测框类别id、表示预测框内各像素点是否属于物体的二值图、预测框得分。
+                而关键字gt的键值是真实标注框的相关信息。
         """
         arrange_transforms(
             model_type=self.model_type,

+ 12 - 7
paddlex/cv/models/ppyolo.py

@@ -125,7 +125,8 @@ class PPYOLO(BaseAPI):
         self.with_dcn_v2 = with_dcn_v2
 
         if paddle.__version__ < '1.8.4' and paddle.__version__ != '0.0.0':
-            raise Exception("PPYOLO requires paddlepaddle or paddlepaddle-gpu >= 1.8.4")
+            raise Exception(
+                "PPYOLO requires paddlepaddle or paddlepaddle-gpu >= 1.8.4")
 
     def _get_backbone(self, backbone_name):
         if backbone_name.startswith('ResNet50_vd'):
@@ -383,8 +384,8 @@ class PPYOLO(BaseAPI):
             tuple (metrics, eval_details) | dict (metrics): 当return_details为True时,返回(metrics, eval_details),
                 当return_details为False时,返回metrics。metrics为dict,包含关键字:'bbox_mmap'或者’bbox_map‘,
                 分别表示平均准确率平均值在各个IoU阈值下的结果取平均值的结果(mmAP)、平均准确率平均值(mAP)。
-                eval_details为dict,包含关键字:'bbox',对应元素预测结果列表,每个预测结果由图像id、
-                预测框类别id、预测框坐标、预测框得分;’gt‘:真实标注框相关信息。
+                eval_details为dict,包含bbox和gt两个关键字。其中关键字bbox的键值是一个列表,列表中每个元素代表一个预测结果,
+                一个预测结果是一个由图像id,预测框类别id, 预测框坐标,预测框得分组成的列表。而关键字gt的键值是真实标注框的相关信息。
         """
         arrange_transforms(
             model_type=self.model_type,
@@ -451,7 +452,11 @@ class PPYOLO(BaseAPI):
         return evaluate_metrics
 
     @staticmethod
-    def _preprocess(images, transforms, model_type, class_name, thread_pool=None):
+    def _preprocess(images,
+                    transforms,
+                    model_type,
+                    class_name,
+                    thread_pool=None):
         arrange_transforms(
             model_type=model_type,
             class_name=class_name,
@@ -546,9 +551,9 @@ class PPYOLO(BaseAPI):
 
         if transforms is None:
             transforms = self.test_transforms
-        im, im_size = PPYOLO._preprocess(img_file_list, transforms,
-                                         self.model_type,
-                                         self.__class__.__name__, self.thread_pool)
+        im, im_size = PPYOLO._preprocess(
+            img_file_list, transforms, self.model_type,
+            self.__class__.__name__, self.thread_pool)
 
         with fluid.scope_guard(self.scope):
             result = self.exe.run(self.test_prog,

+ 9 - 0
paddlex/cv/models/slim/prune_config.py

@@ -334,6 +334,15 @@ def get_prune_params(model):
         for i in params_not_prune:
             if i in prune_names:
                 prune_names.remove(i)
+    elif 'RCNN' in model_type:
+        for block in program.blocks:
+            for param in block.all_parameters():
+                pd_var = model.scope.find_var(param.name)
+                pd_param = pd_var.get_tensor()
+                if len(np.array(pd_param).shape) == 4:
+                    if 'fpn' in param.name or 'rpn' in param.name or 'fc' in param.name or 'cls' in param.name or 'bbox' in param.name:
+                        continue
+                    prune_names.append(param.name)
     else:
         raise Exception('The {} is not implement yet!'.format(model_type))
     return prune_names

+ 190 - 0
paddlex/cv/models/utils/detection_eval.py

@@ -768,3 +768,193 @@ class DetectionMAP(object):
             accum_fp += 1 - int(pos)
             accum_fp_list.append(accum_fp)
         return accum_tp_list, accum_fp_list
+
+
+def makeplot(rs, ps, outDir, class_name, iou_type):
+    import matplotlib.pyplot as plt
+
+    cs = np.vstack([
+        np.ones((2, 3)), np.array([.31, .51, .74]), np.array([.75, .31, .30]),
+        np.array([.36, .90, .38]), np.array([.50, .39, .64]),
+        np.array([1, .6, 0])
+    ])
+    areaNames = ['allarea', 'small', 'medium', 'large']
+    types = ['C75', 'C50', 'Loc', 'Sim', 'Oth', 'BG', 'FN']
+    for i in range(len(areaNames)):
+        area_ps = ps[..., i, 0]
+        figure_tile = iou_type + '-' + class_name + '-' + areaNames[i]
+        aps = [ps_.mean() for ps_ in area_ps]
+        ps_curve = [
+            ps_.mean(axis=1) if ps_.ndim > 1 else ps_ for ps_ in area_ps
+        ]
+        ps_curve.insert(0, np.zeros(ps_curve[0].shape))
+        fig = plt.figure()
+        ax = plt.subplot(111)
+        for k in range(len(types)):
+            ax.plot(rs, ps_curve[k + 1], color=[0, 0, 0], linewidth=0.5)
+            ax.fill_between(
+                rs,
+                ps_curve[k],
+                ps_curve[k + 1],
+                color=cs[k],
+                label=str('[{:.3f}'.format(aps[k]) + ']' + types[k]))
+        plt.xlabel('recall')
+        plt.ylabel('precision')
+        plt.xlim(0, 1.)
+        plt.ylim(0, 1.)
+        plt.title(figure_tile)
+        plt.legend()
+        fig.savefig(outDir + '/{}.png'.format(figure_tile))
+        plt.close(fig)
+
+
+def analyze_individual_category(k, cocoDt, cocoGt, catId, iou_type):
+    from pycocotools.coco import COCO
+    from pycocotools.cocoeval import COCOeval
+
+    nm = cocoGt.loadCats(catId)[0]
+    logging.info('--------------analyzing {}-{}---------------'.format(
+        k + 1, nm['name']))
+    ps_ = {}
+    dt = copy.deepcopy(cocoDt)
+    nm = cocoGt.loadCats(catId)[0]
+    imgIds = cocoGt.getImgIds()
+    dt_anns = dt.dataset['annotations']
+    select_dt_anns = []
+    for ann in dt_anns:
+        if ann['category_id'] == catId:
+            select_dt_anns.append(ann)
+    dt.dataset['annotations'] = select_dt_anns
+    dt.createIndex()
+    # compute precision but ignore superclass confusion
+    gt = copy.deepcopy(cocoGt)
+    child_catIds = gt.getCatIds(supNms=[nm['supercategory']])
+    for idx, ann in enumerate(gt.dataset['annotations']):
+        if (ann['category_id'] in child_catIds and
+                ann['category_id'] != catId):
+            gt.dataset['annotations'][idx]['ignore'] = 1
+            gt.dataset['annotations'][idx]['iscrowd'] = 1
+            gt.dataset['annotations'][idx]['category_id'] = catId
+    cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type)
+    cocoEval.params.imgIds = imgIds
+    cocoEval.params.maxDets = [100]
+    cocoEval.params.iouThrs = [.1]
+    cocoEval.params.useCats = 1
+    cocoEval.evaluate()
+    cocoEval.accumulate()
+    ps_supercategory = cocoEval.eval['precision'][0, :, k, :, :]
+    ps_['ps_supercategory'] = ps_supercategory
+    # compute precision but ignore any class confusion
+    gt = copy.deepcopy(cocoGt)
+    for idx, ann in enumerate(gt.dataset['annotations']):
+        if ann['category_id'] != catId:
+            gt.dataset['annotations'][idx]['ignore'] = 1
+            gt.dataset['annotations'][idx]['iscrowd'] = 1
+            gt.dataset['annotations'][idx]['category_id'] = catId
+    cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type)
+    cocoEval.params.imgIds = imgIds
+    cocoEval.params.maxDets = [100]
+    cocoEval.params.iouThrs = [.1]
+    cocoEval.params.useCats = 1
+    cocoEval.evaluate()
+    cocoEval.accumulate()
+    ps_allcategory = cocoEval.eval['precision'][0, :, k, :, :]
+    ps_['ps_allcategory'] = ps_allcategory
+    return k, ps_
+
+
+def coco_error_analysis(eval_details_file=None,
+                        gt=None,
+                        pred_bbox=None,
+                        pred_mask=None,
+                        save_dir='./output'):
+    """
+    Refer to https://github.com/open-mmlab/mmdetection/blob/master/tools/coco_error_analysis.py
+    """
+
+    from multiprocessing import Pool
+    from pycocotools.coco import COCO
+    from pycocotools.cocoeval import COCOeval
+
+    if eval_details_file is not None:
+        import json
+        with open(eval_details_file, 'r') as f:
+            eval_details = json.load(f)
+            pred_bbox = eval_details['bbox']
+            if 'mask' in eval_details:
+                pred_mask = eval_details['mask']
+            gt = eval_details['gt']
+    if gt is None or pred_bbox is None:
+        raise Exception(
+            "gt/pred_bbox/pred_mask is None now, please set right eval_details_file or gt/pred_bbox/pred_mask."
+        )
+    if pred_bbox is not None and len(pred_bbox) == 0:
+        raise Exception("There is no predicted bbox.")
+    if pred_mask is not None and len(pred_mask) == 0:
+        raise Exception("There is no predicted mask.")
+
+    def _analyze_results(cocoGt, cocoDt, res_type, out_dir):
+        directory = os.path.dirname(out_dir + '/')
+        if not os.path.exists(directory):
+            logging.info('-------------create {}-----------------'.format(
+                out_dir))
+            os.makedirs(directory)
+
+        imgIds = cocoGt.getImgIds()
+        res_out_dir = out_dir + '/' + res_type + '/'
+        res_directory = os.path.dirname(res_out_dir)
+        if not os.path.exists(res_directory):
+            logging.info('-------------create {}-----------------'.format(
+                res_out_dir))
+            os.makedirs(res_directory)
+        iou_type = res_type
+        cocoEval = COCOeval(
+            copy.deepcopy(cocoGt), copy.deepcopy(cocoDt), iou_type)
+        cocoEval.params.imgIds = imgIds
+        cocoEval.params.iouThrs = [.75, .5, .1]
+        cocoEval.params.maxDets = [100]
+        cocoEval.evaluate()
+        cocoEval.accumulate()
+        ps = cocoEval.eval['precision']
+        ps = np.vstack([ps, np.zeros((4, *ps.shape[1:]))])
+        catIds = cocoGt.getCatIds()
+        recThrs = cocoEval.params.recThrs
+        with Pool(processes=48) as pool:
+            args = [(k, cocoDt, cocoGt, catId, iou_type)
+                    for k, catId in enumerate(catIds)]
+            analyze_results = pool.starmap(analyze_individual_category, args)
+        for k, catId in enumerate(catIds):
+            nm = cocoGt.loadCats(catId)[0]
+            logging.info('--------------saving {}-{}---------------'.format(
+                k + 1, nm['name']))
+            analyze_result = analyze_results[k]
+            assert k == analyze_result[0], ""
+            ps_supercategory = analyze_result[1]['ps_supercategory']
+            ps_allcategory = analyze_result[1]['ps_allcategory']
+            # compute precision but ignore superclass confusion
+            ps[3, :, k, :, :] = ps_supercategory
+            # compute precision but ignore any class confusion
+            ps[4, :, k, :, :] = ps_allcategory
+            # fill in background and false negative errors and plot
+            T, _, _, A, _ = ps.shape
+            for t in range(T):
+                for a in range(A):
+                    if np.sum(ps[t, :, k, a, :] ==
+                              -1) != len(ps[t, :, k, :, :]):
+                        ps[t, :, k, a, :][ps[t, :, k, a, :] == -1] = 0
+            ps[5, :, k, :, :] = (ps[4, :, k, :, :] > 0)
+            ps[6, :, k, :, :] = 1.0
+            makeplot(recThrs, ps[:, :, k], res_out_dir, nm['name'], iou_type)
+        makeplot(recThrs, ps, res_out_dir, 'allclass', iou_type)
+
+    coco_gt = COCO()
+    coco_gt.dataset = gt
+    coco_gt.createIndex()
+    from pycocotools.cocoeval import COCOeval
+    if pred_bbox is not None:
+        coco_dt = loadRes(coco_gt, pred_bbox)
+        _analyze_results(coco_gt, coco_dt, res_type='bbox', out_dir=save_dir)
+    if pred_mask is not None:
+        coco_dt = loadRes(coco_gt, pred_mask)
+        _analyze_results(coco_gt, coco_dt, res_type='segm', out_dir=save_dir)
+    logging.info("The analysis figures are saved in {}".format(save_dir))

+ 9 - 5
paddlex/cv/nets/detection/faster_rcnn.py

@@ -46,6 +46,7 @@ class FasterRCNN(object):
     def __init__(
             self,
             backbone,
+            input_channel=3,
             mode='train',
             num_classes=81,
             with_fpn=False,
@@ -103,7 +104,8 @@ class FasterRCNN(object):
             else:
                 fpn = FPN()
         self.fpn = fpn
-        self.fpn.num_chan = fpn_num_channels
+        if self.fpn is not None:
+            self.fpn.num_chan = fpn_num_channels
         self.num_classes = num_classes
         if rpn_head is None:
             if self.fpn is None:
@@ -123,7 +125,6 @@ class FasterRCNN(object):
                     rpn_cls_loss=rpn_cls_loss,
                     rpn_focal_loss_alpha=rpn_focal_loss_alpha,
                     rpn_focal_loss_gamma=rpn_focal_loss_gamma)
-                #use_random=False)
             else:
                 rpn_head = FPNRPNHead(
                     anchor_start_size=anchor_sizes[0],
@@ -144,7 +145,6 @@ class FasterRCNN(object):
                     rpn_cls_loss=rpn_cls_loss,
                     rpn_focal_loss_alpha=rpn_focal_loss_alpha,
                     rpn_focal_loss_gamma=rpn_focal_loss_gamma)
-                #use_random=False)
         self.rpn_head = rpn_head
         if roi_extractor is None:
             if self.fpn is None:
@@ -206,6 +206,7 @@ class FasterRCNN(object):
                 bbox_reg_weights=bbox_reg_weights,
                 num_classes=num_classes,
                 shuffle_before_sample=self.rpn_head.use_random)
+        self.input_channel = input_channel
 
     def build_net(self, inputs):
         im = inputs['image']
@@ -272,13 +273,16 @@ class FasterRCNN(object):
 
         if self.fixed_input_shape is not None:
             input_shape = [
-                None, 3, self.fixed_input_shape[1], self.fixed_input_shape[0]
+                None, self.input_channel, self.fixed_input_shape[1],
+                self.fixed_input_shape[0]
             ]
             inputs['image'] = fluid.data(
                 dtype='float32', shape=input_shape, name='image')
         else:
             inputs['image'] = fluid.data(
-                dtype='float32', shape=[None, 3, None, None], name='image')
+                dtype='float32',
+                shape=[None, self.input_channel, None, None],
+                name='image')
         if self.mode == 'train':
             inputs['im_info'] = fluid.data(
                 dtype='float32', shape=[None, 3], name='im_info')

+ 9 - 36
paddlex/cv/nets/detection/rpn_head.py

@@ -267,8 +267,7 @@ class RPNHead(object):
         """
         rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input()
         if self.num_classes == 1:
-            if self.rpn_cls_loss == 'SigmoidCrossEntropy':
-                score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
+            score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
                     fluid.layers.rpn_target_assign(
                         bbox_pred=rpn_bbox,
                         cls_logits=rpn_cls,
@@ -283,44 +282,18 @@ class RPNHead(object):
                         rpn_positive_overlap=self.rpn_positive_overlap,
                         rpn_negative_overlap=self.rpn_negative_overlap,
                         use_random=self.use_random)
-                score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32')
-                score_tgt.stop_gradient = True
+            score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32')
+            score_tgt.stop_gradient = True
+            if self.rpn_cls_loss == 'SigmoidCrossEntropy':
                 rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(
                     x=score_pred, label=score_tgt)
             elif self.rpn_cls_loss == 'SigmoidFocalLoss':
-                binary_gt_label = fluid.layers.full_like(
-                    gt_box, fill_value=1, dtype='int32')
-                binary_gt_label = fluid.layers.reduce_sum(
-                    binary_gt_label, dim=1, keep_dim=True)
                 data = fluid.layers.fill_constant(
-                    shape=[1], value=4, dtype='int32')
-                binary_gt_label = fluid.layers.greater_equal(binary_gt_label,
-                                                             data)
-                binary_gt_label = fluid.layers.cast(
-                    binary_gt_label, dtype='int32')
-                score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight, fg_num = \
-                    fluid.layers.retinanet_target_assign(
-                        bbox_pred=rpn_bbox,
-                        cls_logits=rpn_cls,
-                        anchor_box=anchor,
-                        anchor_var=anchor_var,
-                        gt_boxes=gt_box,
-                        gt_labels=binary_gt_label,
-                        is_crowd=is_crowd,
-                        im_info=im_info,
-                        positive_overlap=self.rpn_positive_overlap,
-                        negative_overlap=self.rpn_negative_overlap,
-                        num_classes=1)
-                fg_num = fluid.layers.reduce_sum(fg_num, name='fg_num')
-                #score_tgt = fluid.layers.cast(score_tgt, 'int32')
-                #rpn_cls_loss = fluid.layers.sigmoid_focal_loss(
-                #    x=score_pred,
-                #    label=score_tgt,
-                #    fg_num=fg_num,
-                #    gamma=self.rpn_focal_loss_gamma,
-                #    alpha=self.rpn_focal_loss_alpha)
-                score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32')
-                score_tgt.stop_gradient = True
+                    shape=[1], value=1, dtype='int32')
+                fg_label = fluid.layers.greater_equal(score_tgt, data)
+                fg_label = fluid.layers.cast(fg_label, dtype='int32')
+                fg_num = fluid.layers.reduce_sum(fg_label)
+                fg_num.stop_gradient = True
                 loss = fluid.layers.sigmoid_cross_entropy_with_logits(
                     x=score_pred, label=score_tgt)
 

+ 6 - 1
paddlex/cv/transforms/__init__.py

@@ -84,7 +84,12 @@ def build_transforms_v1(model_type, transforms_info, batch_transforms_info):
     return eval_transforms
 
 
-def arrange_transforms(model_type, class_name, transforms, mode='train'):
+def arrange_transforms(model_type,
+                       class_name,
+                       transforms,
+                       mode='train',
+                       input_channel=3):
+    transforms.input_channel = input_channel
     # 给transforms添加arrange操作
     if model_type == 'classifier':
         arrange_transform = cls_transforms.ArrangeClassifier

+ 67 - 5
paddlex/cv/transforms/det_transforms.py

@@ -98,7 +98,7 @@ class Compose(DetTransform):
                 字段由transforms中的最后一个数据预处理操作决定。
         """
 
-        def decode_image(im_file, im_info, label_info):
+        def decode_image(im_file, im_info, label_info, input_channel=3):
             if im_info is None:
                 im_info = dict()
             if isinstance(im_file, np.ndarray):
@@ -109,12 +109,19 @@ class Compose(DetTransform):
                 im = im_file
             else:
                 try:
-                    im = cv2.imread(im_file).astype('float32')
+                    if input_channel == 3:
+                        im = cv2.imread(im_file).astype('float32')
+                    else:
+                        im = cv2.imread(im_file,
+                                        cv2.IMREAD_UNCHANGED).astype('float32')
+                        if im.ndim < 3:
+                            im = np.expand_dims(im, axis=-1)
                 except:
                     raise TypeError('Can\'t read The image file {}!'.format(
                         im_file))
             im = im.astype('float32')
-            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+            if input_channel == 3:
+                im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
             # make default im_info with [h, w, 1]
             im_info['im_resize_info'] = np.array(
                 [im.shape[0], im.shape[1], 1.], dtype=np.float32)
@@ -134,7 +141,10 @@ class Compose(DetTransform):
             else:
                 return (im, im_info, label_info)
 
-        outputs = decode_image(im, im_info, label_info)
+        input_channel = 3
+        if hasattr(self, 'input_channel'):
+            input_channel = self.input_channel
+        outputs = decode_image(im, im_info, label_info, input_channel)
         im = outputs[0]
         im_info = outputs[1]
         if len(outputs) == 3:
@@ -146,6 +156,10 @@ class Compose(DetTransform):
                 outputs = op(im, im_info, label_info)
                 im = outputs[0]
             else:
+                if im.shape[-1] != 3:
+                    raise Exception(
+                        "Only the 3-channel RGB image is supported in the imgaug operator, but recieved image channel is {}".
+                        format(im.shape[-1]))
                 im = execute_imgaug(op, im)
                 if label_info is not None:
                     outputs = (im, im_info, label_info)
@@ -236,6 +250,8 @@ class ResizeByShort(DetTransform):
         im = cv2.resize(
             im, (resized_width, resized_height),
             interpolation=cv2.INTER_LINEAR)
+        if im.ndim < 3:
+            im = np.expand_dims(im, axis=-1)
         im_info['im_resize_info'] = np.array(im_resize_info).astype(np.float32)
         if label_info is None:
             return (im, im_info)
@@ -533,7 +549,9 @@ class Normalize(DetTransform):
         """
         mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
         std = np.array(self.std)[np.newaxis, np.newaxis, :]
-        im = normalize(im, mean, std)
+        min_val = [0] * im.shape[-1]
+        max_val = [255] * im.shape[-1]
+        im = normalize(im, mean, std, min_val, max_val)
         if label_info is None:
             return (im, im_info)
         else:
@@ -587,6 +605,11 @@ class RandomDistort(DetTransform):
                    当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
                    存储与标注框相关信息的字典。
         """
+        if im.shape[-1] != 3:
+            raise Exception(
+                "Only the 3-channel RGB image is supported in the RandomDistort operator, but recieved image channel is {}".
+                format(im.shape[-1]))
+
         brightness_lower = 1 - self.brightness_range
         brightness_upper = 1 + self.brightness_range
         contrast_lower = 1 - self.contrast_range
@@ -1020,6 +1043,45 @@ class RandomCrop(DetTransform):
         return (im, im_info, label_info)
 
 
+class CLAHE(DetTransform):
+    """对图像进行对比度增强。
+    Args:
+        clip_limit (int|float): 颜色对比度的阈值,默认值为2.。
+        tile_grid_size (list|tuple): 进行像素均衡化的网格大小。默认值为(8, 8)。
+    Raises:
+        TypeError: 形参数据类型不满足需求。
+    """
+
+    def __init__(self, clip_limit=2., tile_grid_size=(8, 8)):
+        self.clip_limit = clip_limit
+        self.tile_grid_size = tile_grid_size
+
+    def __call__(self, im, im_info=None, label_info=None):
+        """
+        Args:
+            im (numnp.ndarraypy): 图像np.ndarray数据。
+            im_info (dict, 可选): 存储与图像相关的信息。
+            label_info (dict, 可选): 存储与标注框相关的信息。
+
+        Returns:
+            tuple: 当label_info为空时,返回的tuple为(im, im_info),分别对应图像np.ndarray数据、存储与图像相关信息的字典;
+                   当label_info不为空时,返回的tuple为(im, im_info, label_info),分别对应图像np.ndarray数据、
+                   存储与标注框相关信息的字典。
+        """
+        if im.shape[-1] != 1:
+            raise Exception(
+                "Only the one-channel image is supported in the CLAHE operator, but recieved image channel is {}".
+                format(im.shape[-1]))
+        clahe = cv2.createCLAHE(
+            clipLimit=self.clip_limit, tileGridSize=self.tile_grid_size)
+        im = clahe.apply(im).astype(im.dtype)
+
+        if label_info is None:
+            return (im, im_info)
+        else:
+            return (im, im_info, label_info)
+
+
 class ArrangeFasterRCNN(DetTransform):
     """获取FasterRCNN模型训练/验证/预测所需信息。
 

+ 4 - 0
paddlex/cv/transforms/ops.py

@@ -45,6 +45,8 @@ def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR):
 
     im = cv2.resize(
         im, (resized_width, resized_height), interpolation=interpolation)
+    if im.ndim < 3:
+        im = np.expand_dims(im, axis=-1)
     return im
 
 
@@ -56,6 +58,8 @@ def resize(im, target_size=608, interp=cv2.INTER_LINEAR):
         w = target_size
         h = target_size
     im = cv2.resize(im, (w, h), interpolation=interp)
+    if im.ndim < 3:
+        im = np.expand_dims(im, axis=-1)
     return im
 
 

+ 8 - 0
paddlex/cv/transforms/seg_transforms.py

@@ -341,6 +341,8 @@ class Resize(SegTransform):
             fx=im_scale_x,
             fy=im_scale_y,
             interpolation=self.interp_dict[self.interp])
+        if im.ndim < 3:
+            im = np.expand_dims(im, axis=-1)
         if label is not None:
             label = cv2.resize(
                 label,
@@ -463,6 +465,8 @@ class ResizeByShort(SegTransform):
         im = cv2.resize(
             im, (resized_width, resized_height),
             interpolation=cv2.INTER_NEAREST)
+        if im.ndim < 3:
+            im = np.expand_dims(im, axis=-1)
         if label is not None:
             im = cv2.resize(
                 label, (resized_width, resized_height),
@@ -584,6 +588,8 @@ class ResizeStepScaling(SegTransform):
             fx=scale_factor,
             fy=scale_factor,
             interpolation=cv2.INTER_LINEAR)
+        if im.ndim < 3:
+            im = np.expand_dims(im, axis=-1)
         if label is not None:
             label = cv2.resize(
                 label, (0, 0),
@@ -1030,6 +1036,8 @@ class RandomScaleAspect(SegTransform):
                     im = cv2.resize(
                         im, (img_width, img_height),
                         interpolation=cv2.INTER_LINEAR)
+                    if im.ndim < 3:
+                        im = np.expand_dims(im, axis=-1)
                     label = cv2.resize(
                         label, (img_width, img_height),
                         interpolation=cv2.INTER_NEAREST)

+ 1 - 0
paddlex/det.py

@@ -22,3 +22,4 @@ MaskRCNN = cv.models.MaskRCNN
 transforms = cv.transforms.det_transforms
 visualize = cv.models.utils.visualize.visualize_detection
 draw_pr_curve = cv.models.utils.visualize.draw_pr_curve
+coco_error_analysis = cv.models.utils.detection_eval.coco_error_analysis

+ 0 - 4
paddlex/tools/convert.py

@@ -19,7 +19,6 @@ from .x2imagenet import JingLing2ImageNet
 from .x2coco import LabelMe2COCO
 from .x2coco import EasyData2COCO
 from .x2coco import JingLing2COCO
-from .x2coco import VOC2COCO
 from .x2voc import LabelMe2VOC
 from .x2voc import EasyData2VOC
 from .x2seg import JingLing2Seg
@@ -31,7 +30,6 @@ jingling2imagenet = JingLing2ImageNet().convert
 labelme2coco = LabelMe2COCO().convert
 easydata2coco = EasyData2COCO().convert
 jingling2coco = JingLing2COCO().convert
-voc2coco = VOC2COCO().convert
 labelme2voc = LabelMe2VOC().convert
 easydata2voc = EasyData2VOC().convert
 jingling2seg = JingLing2Seg().convert
@@ -50,8 +48,6 @@ def dataset_conversion(source, to, pics, anns, save_dir):
         jingling2imagenet(pics, anns, save_dir)
     elif source == 'jingling' and to == 'MSCOCO':
         jingling2coco(pics, anns, save_dir)
-    elif source == 'PascalVOC' and to == 'MSCOCO':
-        voc2coco(pics, anns, save_dir)
     elif source == 'jingling' and to == 'SEG':
         jingling2seg(pics, anns, save_dir)
     elif source == 'easydata' and to == 'ImageNet':

+ 0 - 206
paddlex/tools/x2coco.py

@@ -19,13 +19,10 @@ import json
 import os
 import os.path as osp
 import shutil
-import re
 import numpy as np
 import PIL.ImageDraw
-import xml.etree.ElementTree as ET
 from .base import MyEncoder, is_pic, get_encoding
 from paddlex.utils import path_normalization
-import paddlex.utils.logging as logging
 
 
 class X2COCO(object):
@@ -380,206 +377,3 @@ class JingLing2COCO(X2COCO):
                             self.generate_rectangle_anns_field(
                                 points, label, image_id, object_id,
                                 label_to_num))
-
-
-class VOC2COCO(X2COCO):
-    """将使用VOC标注的数据集转换为COCO数据集。
-    """
-
-    def __init__(self):
-        super(VOC2COCO, self).__init__()
-
-    def generate_categories_field(self, label, labels_list):
-        category = {}
-        category["supercategory"] = "component"
-        category["id"] = len(labels_list) + 1
-        category["name"] = label
-        return category
-
-    def generate_images_field(self, xml_info, image_file, image_id):
-        image = {}
-        image["height"] = xml_info["imageHeight"]
-        image["width"] = xml_info["imageWidth"]
-        image["id"] = image_id + 1
-        image["imagePath"] = image_file
-        image["file_name"] = osp.split(image_file)[-1]
-        return image
-
-    def generate_label_list(self, xml_dir):
-        xml_dir_dir = os.path.abspath(
-            os.path.join(os.path.dirname(xml_dir), os.path.pardir))
-        self.labels_list = []
-        self.label_to_num = {}
-        if osp.exists(osp.join(xml_dir_dir, 'labels.txt')):
-            with open(osp.join(xml_dir_dir, 'labels.txt'), 'r') as fr:
-                while True:
-                    label = fr.readline().strip()
-                    if not label:
-                        break
-                    if label not in self.labels_list:
-                        self.categories_list.append(\
-                            self.generate_categories_field(label, self.labels_list))
-                        self.labels_list.append(label)
-                        self.label_to_num[label] = len(self.labels_list)
-            return
-        logging.info(
-            'labels.txt is not in the folder {}, so categories are ordered randomly in annotation.json.'.
-            format(xml_dir_dir))
-        return
-
-    def parse_xml(self, xml_file):
-        xml_info = {'im_info': {}, 'annotations': []}
-        tree = ET.parse(xml_file)
-        pattern = re.compile('<object>', re.IGNORECASE)
-        obj_match = pattern.findall(str(ET.tostringlist(tree.getroot())))
-        obj_tag = obj_match[0][1:-1]
-        objs = tree.findall(obj_tag)
-        pattern = re.compile('<size>', re.IGNORECASE)
-        size_tag = pattern.findall(str(ET.tostringlist(tree.getroot())))[0][1:
-                                                                            -1]
-        size_element = tree.find(size_tag)
-        pattern = re.compile('<width>', re.IGNORECASE)
-        width_tag = pattern.findall(str(ET.tostringlist(size_element)))[0][1:
-                                                                           -1]
-        im_w = float(size_element.find(width_tag).text)
-        pattern = re.compile('<height>', re.IGNORECASE)
-        height_tag = pattern.findall(str(ET.tostringlist(size_element)))[0][1:
-                                                                            -1]
-        im_h = float(size_element.find(height_tag).text)
-        xml_info['im_info']['imageWidth'] = im_w
-        xml_info['im_info']['imageHeight'] = im_h
-        for i, obj in enumerate(objs):
-            pattern = re.compile('<name>', re.IGNORECASE)
-            name_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1]
-            cname = obj.find(name_tag).text.strip()
-            pattern = re.compile('<bndbox>', re.IGNORECASE)
-            box_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1]
-            box_element = obj.find(box_tag)
-            pattern = re.compile('<xmin>', re.IGNORECASE)
-            xmin_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][
-                1:-1]
-            x1 = float(box_element.find(xmin_tag).text)
-            pattern = re.compile('<ymin>', re.IGNORECASE)
-            ymin_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][
-                1:-1]
-            y1 = float(box_element.find(ymin_tag).text)
-            pattern = re.compile('<xmax>', re.IGNORECASE)
-            xmax_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][
-                1:-1]
-            x2 = float(box_element.find(xmax_tag).text)
-            pattern = re.compile('<ymax>', re.IGNORECASE)
-            ymax_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][
-                1:-1]
-            y2 = float(box_element.find(ymax_tag).text)
-            x1 = max(0, x1)
-            y1 = max(0, y1)
-            if im_w > 0.5 and im_h > 0.5:
-                x2 = min(im_w - 1, x2)
-                y2 = min(im_h - 1, y2)
-            xml_info['annotations'].append({
-                'bbox': [[x1, y1], [x2, y2], [x1, y2], [x2, y1]],
-                'category': cname,
-            })
-        return xml_info
-
-    def parse_json(self, img_dir, xml_dir, file_list=None):
-        image_id = -1
-        object_id = -1
-        self.generate_label_list(xml_dir)
-        for img_file in os.listdir(img_dir):
-            if file_list is not None and img_file not in file_list:
-                continue
-            img_name_part = osp.splitext(img_file)[0]
-            xml_file = osp.join(xml_dir, img_name_part + ".xml")
-            if not osp.exists(xml_file):
-                os.remove(osp.join(img_dir, img_file))
-                continue
-            image_id = image_id + 1
-            xml_info = self.parse_xml(xml_file)
-            img_info = self.generate_images_field(xml_info['im_info'],
-                                                  osp.join(img_dir, img_file),
-                                                  image_id)
-            self.images_list.append(img_info)
-            annos = xml_info['annotations']
-            for anno in annos:
-                object_id = object_id + 1
-                label = anno["category"]
-                if label not in self.labels_list:
-                    self.categories_list.append(\
-                        self.generate_categories_field(label, self.labels_list))
-                    self.labels_list.append(label)
-                    self.label_to_num[label] = len(self.labels_list)
-                self.annotations_list.append(
-                    self.generate_rectangle_anns_field(anno[
-                        'bbox'], label, image_id, object_id,
-                                                       self.label_to_num))
-
-    def convert(self, image_dir, json_dir, dataset_save_dir):
-        """转换。
-        Args:
-            image_dir (str): 图像文件存放的路径。
-            json_dir (str): 与每张图像对应的json文件的存放路径。
-            dataset_save_dir (str): 转换后数据集存放路径。
-        """
-        assert osp.exists(image_dir), "he image folder does not exist!"
-        assert osp.exists(json_dir), "The json folder does not exist!"
-        assert osp.exists(dataset_save_dir), "The save folder does not exist!"
-        # Convert the image files.
-        new_image_dir = osp.join(dataset_save_dir, "JPEGImages")
-        if osp.exists(new_image_dir):
-            shutil.rmtree(new_image_dir)
-        os.makedirs(new_image_dir)
-        for img_name in os.listdir(image_dir):
-            if is_pic(img_name):
-                shutil.copyfile(
-                    osp.join(image_dir, img_name),
-                    osp.join(new_image_dir, img_name))
-        # Convert the json files.
-        xml_dir_dir = os.path.abspath(
-            os.path.join(os.path.dirname(json_dir), os.path.pardir))
-        for part in ['train', 'val', 'test']:
-            part_list_file = osp.join(xml_dir_dir, '{}_list.txt'.format(part))
-            if osp.exists(part_list_file):
-                file_list = list()
-                with open(part_list_file, 'r') as f:
-                    while True:
-                        line = f.readline()
-                        if not line:
-                            break
-                        if len(line.strip().split()) > 2:
-                            raise Exception(
-                                "A space is defined as the separator, but it exists in image or label name {}."
-                                .format(line))
-                        img_file = osp.join(
-                            image_dir, osp.split(line.strip().split()[0])[-1])
-                        xml_file = osp.join(
-                            json_dir, osp.split(line.strip().split()[1])[-1])
-                        img_file = path_normalization(img_file)
-                        xml_file = path_normalization(xml_file)
-                        if not is_pic(img_file):
-                            continue
-                        if not osp.isfile(xml_file):
-                            continue
-                        if not osp.exists(img_file):
-                            raise IOError('The image file {} is not exist!'.
-                                          format(img_file))
-                        file_list.append(osp.split(img_file)[-1])
-                self.parse_json(new_image_dir, json_dir, file_list)
-                coco_data = {}
-                coco_data["images"] = self.images_list
-                coco_data["categories"] = self.categories_list
-                coco_data["annotations"] = self.annotations_list
-                json_path = osp.join(dataset_save_dir, "{}.json".format(part))
-                json.dump(
-                    coco_data, open(json_path, "w"), indent=4, cls=MyEncoder)
-                logging.info("xml files in {} are converted to the MSCOCO format stored in {}".format(\
-                    osp.join(xml_dir_dir, '{}_list.txt'.format(part)), osp.join(dataset_save_dir, "{}.json".format(part))))
-                self.images_list = []
-                self.annotations_list = []
-        self.parse_json(new_image_dir, json_dir)
-        coco_data = {}
-        coco_data["images"] = self.images_list
-        coco_data["categories"] = self.categories_list
-        coco_data["annotations"] = self.annotations_list
-        json_path = osp.join(dataset_save_dir, "annotations.json")
-        json.dump(coco_data, open(json_path, "w"), indent=4, cls=MyEncoder)

+ 0 - 18
paddlex/tools/x2voc.py

@@ -22,19 +22,6 @@ import shutil
 import numpy as np
 from .base import MyEncoder, is_pic, get_encoding
 
-ch2en = {
-    u'不导电': 'bu_dao_dian',
-    u'擦花': 'ca_hua',
-    u'角位漏底': 'jiao_wei_lou_di',
-    u'桔皮': 'ju_pi',
-    u'漏底': 'lou_di',
-    u'喷流': 'pen_liu',
-    u'起坑': 'qi_keng',
-    u'漆泡': 'qi_pao',
-    u'杂色': 'za_se',
-    u'脏点': 'zang_dian'
-}
-
 
 class X2VOC(object):
     def __init__(self):
@@ -78,12 +65,10 @@ class LabelMe2VOC(X2VOC):
     def json2xml(self, image_dir, json_dir, xml_dir):
         import xml.dom.minidom as minidom
         i = 0
-        print('length: ', len(os.listdir(image_dir)))
         for img_name in os.listdir(image_dir):
             img_name_part = osp.splitext(img_name)[0]
             json_file = osp.join(json_dir, img_name_part + ".json")
             i += 1
-            print(i, " ", img_name)
             if not osp.exists(json_file):
                 os.remove(os.remove(osp.join(image_dir, img_name)))
                 continue
@@ -96,7 +81,6 @@ class LabelMe2VOC(X2VOC):
             node_filename = xml_doc.createElement("filename")
             node_filename.appendChild(xml_doc.createTextNode(img_name))
             root.appendChild(node_filename)
-            print(i, " ", json_file)
             with open(json_file, mode="r", \
                               encoding=get_encoding(json_file)) as j:
                 json_info = json.load(j)
@@ -135,8 +119,6 @@ class LabelMe2VOC(X2VOC):
                         ymin = min(y)
                         ymax = max(y)
                     label = shape["label"]
-                    label = ch2en[label]
-                    #print(label)
                     node_obj = xml_doc.createElement("object")
                     node_name = xml_doc.createElement("name")
                     node_name.appendChild(xml_doc.createTextNode(label))

+ 0 - 177
tutorials/train/object_detection/coco_error_analysis.py

@@ -1,177 +0,0 @@
-import copy
-import os
-from argparse import ArgumentParser
-from multiprocessing import Pool
-
-import matplotlib.pyplot as plt
-import numpy as np
-from pycocotools.coco import COCO
-from pycocotools.cocoeval import COCOeval
-
-
-def makeplot(rs, ps, outDir, class_name, iou_type):
-    cs = np.vstack([
-        np.ones((2, 3)), np.array([.31, .51, .74]), np.array([.75, .31, .30]),
-        np.array([.36, .90, .38]), np.array([.50, .39, .64]),
-        np.array([1, .6, 0])
-    ])
-    areaNames = ['allarea', 'small', 'medium', 'large']
-    types = ['C75', 'C50', 'Loc', 'Sim', 'Oth', 'BG', 'FN']
-    for i in range(len(areaNames)):
-        area_ps = ps[..., i, 0]
-        figure_tile = iou_type + '-' + class_name + '-' + areaNames[i]
-        aps = [ps_.mean() for ps_ in area_ps]
-        ps_curve = [
-            ps_.mean(axis=1) if ps_.ndim > 1 else ps_ for ps_ in area_ps
-        ]
-        ps_curve.insert(0, np.zeros(ps_curve[0].shape))
-        fig = plt.figure()
-        ax = plt.subplot(111)
-        for k in range(len(types)):
-            ax.plot(rs, ps_curve[k + 1], color=[0, 0, 0], linewidth=0.5)
-            ax.fill_between(
-                rs,
-                ps_curve[k],
-                ps_curve[k + 1],
-                color=cs[k],
-                label=str('[{:.3f}'.format(aps[k]) + ']' + types[k]))
-        plt.xlabel('recall')
-        plt.ylabel('precision')
-        plt.xlim(0, 1.)
-        plt.ylim(0, 1.)
-        plt.title(figure_tile)
-        plt.legend()
-        # plt.show()
-        fig.savefig(outDir + '/{}.png'.format(figure_tile))
-        plt.close(fig)
-
-
-def analyze_individual_category(k, cocoDt, cocoGt, catId, iou_type):
-    nm = cocoGt.loadCats(catId)[0]
-    print('--------------analyzing {}-{}---------------'.format(k + 1, nm[
-        'name']))
-    ps_ = {}
-    dt = copy.deepcopy(cocoDt)
-    nm = cocoGt.loadCats(catId)[0]
-    imgIds = cocoGt.getImgIds()
-    dt_anns = dt.dataset['annotations']
-    select_dt_anns = []
-    for ann in dt_anns:
-        if ann['category_id'] == catId:
-            select_dt_anns.append(ann)
-    dt.dataset['annotations'] = select_dt_anns
-    dt.createIndex()
-    # compute precision but ignore superclass confusion
-    gt = copy.deepcopy(cocoGt)
-    child_catIds = gt.getCatIds(supNms=[nm['supercategory']])
-    for idx, ann in enumerate(gt.dataset['annotations']):
-        if (ann['category_id'] in child_catIds and
-                ann['category_id'] != catId):
-            gt.dataset['annotations'][idx]['ignore'] = 1
-            gt.dataset['annotations'][idx]['iscrowd'] = 1
-            gt.dataset['annotations'][idx]['category_id'] = catId
-    cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type)
-    cocoEval.params.imgIds = imgIds
-    cocoEval.params.maxDets = [100]
-    cocoEval.params.iouThrs = [.1]
-    cocoEval.params.useCats = 1
-    cocoEval.evaluate()
-    cocoEval.accumulate()
-    ps_supercategory = cocoEval.eval['precision'][0, :, k, :, :]
-    ps_['ps_supercategory'] = ps_supercategory
-    # compute precision but ignore any class confusion
-    gt = copy.deepcopy(cocoGt)
-    for idx, ann in enumerate(gt.dataset['annotations']):
-        if ann['category_id'] != catId:
-            gt.dataset['annotations'][idx]['ignore'] = 1
-            gt.dataset['annotations'][idx]['iscrowd'] = 1
-            gt.dataset['annotations'][idx]['category_id'] = catId
-    cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type)
-    cocoEval.params.imgIds = imgIds
-    cocoEval.params.maxDets = [100]
-    cocoEval.params.iouThrs = [.1]
-    cocoEval.params.useCats = 1
-    cocoEval.evaluate()
-    cocoEval.accumulate()
-    ps_allcategory = cocoEval.eval['precision'][0, :, k, :, :]
-    ps_['ps_allcategory'] = ps_allcategory
-    return k, ps_
-
-
-def analyze_results(res_file, ann_file, res_types, out_dir):
-    for res_type in res_types:
-        assert res_type in ['bbox', 'segm']
-
-    directory = os.path.dirname(out_dir + '/')
-    if not os.path.exists(directory):
-        print('-------------create {}-----------------'.format(out_dir))
-        os.makedirs(directory)
-
-    cocoGt = COCO(ann_file)
-    cocoDt = cocoGt.loadRes(res_file)
-    imgIds = cocoGt.getImgIds()
-    for res_type in res_types:
-        res_out_dir = out_dir + '/' + res_type + '/'
-        res_directory = os.path.dirname(res_out_dir)
-        if not os.path.exists(res_directory):
-            print('-------------create {}-----------------'.format(
-                res_out_dir))
-            os.makedirs(res_directory)
-        iou_type = res_type
-        cocoEval = COCOeval(
-            copy.deepcopy(cocoGt), copy.deepcopy(cocoDt), iou_type)
-        cocoEval.params.imgIds = imgIds
-        cocoEval.params.iouThrs = [.75, .5, .1]
-        cocoEval.params.maxDets = [100]
-        cocoEval.evaluate()
-        cocoEval.accumulate()
-        ps = cocoEval.eval['precision']
-        ps = np.vstack([ps, np.zeros((4, *ps.shape[1:]))])
-        catIds = cocoGt.getCatIds()
-        recThrs = cocoEval.params.recThrs
-        with Pool(processes=48) as pool:
-            args = [(k, cocoDt, cocoGt, catId, iou_type)
-                    for k, catId in enumerate(catIds)]
-            analyze_results = pool.starmap(analyze_individual_category, args)
-        for k, catId in enumerate(catIds):
-            nm = cocoGt.loadCats(catId)[0]
-            print('--------------saving {}-{}---------------'.format(k + 1, nm[
-                'name']))
-            analyze_result = analyze_results[k]
-            assert k == analyze_result[0]
-            ps_supercategory = analyze_result[1]['ps_supercategory']
-            ps_allcategory = analyze_result[1]['ps_allcategory']
-            # compute precision but ignore superclass confusion
-            ps[3, :, k, :, :] = ps_supercategory
-            # compute precision but ignore any class confusion
-            ps[4, :, k, :, :] = ps_allcategory
-            # fill in background and false negative errors and plot
-            #ps[ps == -1] = 0
-            T, _, _, A, _ = ps.shape
-            for t in range(T):
-                for a in range(A):
-                    if np.sum(ps[t, :, k, a, :] ==
-                              -1) != len(ps[t, :, k, :, :]):
-                        ps[t, :, k, a, :][ps[t, :, k, a, :] == -1] = 0
-            ps[5, :, k, :, :] = (ps[4, :, k, :, :] > 0)
-            ps[6, :, k, :, :] = 1.0
-            makeplot(recThrs, ps[:, :, k], res_out_dir, nm['name'], iou_type)
-        makeplot(recThrs, ps, res_out_dir, 'allclass', iou_type)
-
-
-def main():
-    parser = ArgumentParser(description='COCO Error Analysis Tool')
-    parser.add_argument('result', help='result file (json format) path')
-    parser.add_argument('out_dir', help='dir to save analyze result images')
-    parser.add_argument(
-        '--ann',
-        default='data/coco/annotations/instances_val2017.json',
-        help='annotation file path')
-    parser.add_argument(
-        '--types', type=str, nargs='+', default=['bbox'], help='result types')
-    args = parser.parse_args()
-    analyze_results(args.result, args.ann, args.types, out_dir=args.out_dir)
-
-
-if __name__ == '__main__':
-    main()

+ 0 - 87
tutorials/train/object_detection/guang_2.py

@@ -1,87 +0,0 @@
-# 环境变量配置,用于控制是否使用GPU
-# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
-import os
-os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
-import json
-
-from paddlex.det import transforms
-import paddlex as pdx
-
-# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/det_transforms.html
-train_transforms = transforms.Compose([
-    transforms.RandomHorizontalFlip(), transforms.Normalize(),
-    transforms.ResizeByShort(
-        short_size=800, max_size=1333), transforms.Padding(coarsest_stride=32)
-])
-
-eval_transforms = transforms.Compose([
-    transforms.Normalize(),
-    transforms.ResizeByShort(
-        short_size=800, max_size=1333),
-    transforms.Padding(coarsest_stride=32),
-])
-
-# 定义训练和验证所用的数据集
-# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-vocdetection
-#train_dataset = pdx.datasets.VOCDetection(
-#    data_dir='dataset',
-#    file_list='dataset/train_list.txt',
-#    label_list='dataset/labels.txt',
-#    transforms=train_transforms,
-#    num_workers=2,
-#    shuffle=True)
-eval_dataset = pdx.datasets.VOCDetection(
-    data_dir='dataset',
-    file_list='dataset/val_list.txt',
-    label_list='dataset/labels.txt',
-    num_workers=2,
-    transforms=eval_transforms)
-
-# 初始化模型,并进行训练
-# 可使用VisualDL查看训练指标,参考https://paddlex.readthedocs.io/zh_CN/develop/train/visualdl.html
-# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
-#num_classes = len(train_dataset.labels) + 1
-#
-## API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#paddlex-det-fasterrcnn
-#model = pdx.det.FasterRCNN(num_classes=num_classes, backbone='ResNet50_vd')
-#
-## API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#id1
-## 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
-#model.train(
-#    num_epochs=36,
-#    train_dataset=train_dataset,
-#    train_batch_size=8,
-#    eval_dataset=eval_dataset,
-#    learning_rate=0.01,
-#    lr_decay_epochs=[24, 33],
-#    warmup_steps=1000,
-#    pretrain_weights='ResNet50_vd_ssld_pretrained',
-#    save_dir='output/guan_2',
-#    use_vdl=False)
-
-
-#eval_dataset = pdx.datasets.CocoDetection(
-#    data_dir='dataset_coco/JPEGImages',
-#    ann_file='dataset_coco/val.json',
-#    num_workers=2,
-#    transforms=eval_transforms)
-#model = pdx.load_model('output/guan_4/best_model/')
-#eval_details = model.evaluate(eval_dataset, batch_size=8, return_details=True)
-class MyEncoder(json.JSONEncoder):
-    def default(self, obj):
-        if isinstance(obj, np.integer):
-            return int(obj)
-        elif isinstance(obj, np.floating):
-            return float(obj)
-        elif isinstance(obj, np.ndarray):
-            return obj.tolist()
-        else:
-            return super(MyEncoder, self).default(obj)
-
-
-with open('output/guan_4/best_model/eval_details.json', 'r') as f:
-    eval_details = json.load(f)
-json_path = 'output/guan_4/best_model/gt.json'
-json.dump(eval_details['gt'], open(json_path, "w"), indent=4, cls=MyEncoder)
-json_path = 'output/guan_4/best_model/bbox.json'
-json.dump(eval_details['bbox'], open(json_path, "w"), indent=4, cls=MyEncoder)

+ 0 - 63
tutorials/train/object_detection/guang_2_r2_dcn_libra.py

@@ -1,63 +0,0 @@
-# 环境变量配置,用于控制是否使用GPU
-# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
-import os
-os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
-
-from paddlex.det import transforms
-import paddlex as pdx
-
-# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/det_transforms.html
-train_transforms = transforms.Compose([
-    transforms.RandomHorizontalFlip(), transforms.Normalize(),
-    transforms.ResizeByShort(
-        short_size=800, max_size=1333), transforms.Padding(coarsest_stride=32)
-])
-
-eval_transforms = transforms.Compose([
-    transforms.Normalize(),
-    transforms.ResizeByShort(
-        short_size=800, max_size=1333),
-    transforms.Padding(coarsest_stride=32),
-])
-
-# 定义训练和验证所用的数据集
-# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-vocdetection
-train_dataset = pdx.datasets.VOCDetection(
-    data_dir='dataset',
-    file_list='dataset/train_list.txt',
-    label_list='dataset/labels.txt',
-    transforms=train_transforms,
-    num_workers=8,
-    shuffle=True)
-eval_dataset = pdx.datasets.VOCDetection(
-    data_dir='dataset',
-    file_list='dataset/val_list.txt',
-    label_list='dataset/labels.txt',
-    num_workers=8,
-    transforms=eval_transforms)
-
-# 初始化模型,并进行训练
-# 可使用VisualDL查看训练指标,参考https://paddlex.readthedocs.io/zh_CN/develop/train/visualdl.html
-# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
-num_classes = len(train_dataset.labels) + 1
-
-# API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#paddlex-det-fasterrcnn
-model = pdx.det.FasterRCNN(
-    num_classes=num_classes,
-    backbone='ResNet50_vd',
-    with_dcn=True,
-    bbox_assigner='LibraBBoxAssigner')
-
-# API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#id1
-# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
-model.train(
-    num_epochs=55,
-    train_dataset=train_dataset,
-    train_batch_size=8,
-    eval_dataset=eval_dataset,
-    learning_rate=0.01,
-    lr_decay_epochs=[40, 50],
-    warmup_start_lr=0.001,
-    pretrain_weights='ResNet50_vd_ssld_pretrained',
-    save_dir='output/guan_2_r3_dcn_libra',
-    use_vdl=False)

+ 0 - 63
tutorials/train/object_detection/guang_6.py

@@ -1,63 +0,0 @@
-# 环境变量配置,用于控制是否使用GPU
-# 说明文档:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html#gpu
-import os
-os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
-
-from paddlex.det import transforms
-import paddlex as pdx
-
-# API说明 https://paddlex.readthedocs.io/zh_CN/develop/apis/transforms/det_transforms.html
-train_transforms = transforms.Compose([
-    transforms.RandomHorizontalFlip(), transforms.Normalize(),
-    transforms.ResizeByShort(
-        short_size=800, max_size=1333), transforms.Padding(coarsest_stride=32)
-])
-
-eval_transforms = transforms.Compose([
-    transforms.Normalize(),
-    transforms.ResizeByShort(
-        short_size=800, max_size=1333),
-    transforms.Padding(coarsest_stride=32),
-])
-
-# 定义训练和验证所用的数据集
-# API说明:https://paddlex.readthedocs.io/zh_CN/develop/apis/datasets.html#paddlex-datasets-vocdetection
-train_dataset = pdx.datasets.VOCDetection(
-    data_dir='dataset',
-    file_list='dataset/train_list.txt',
-    label_list='dataset/labels.txt',
-    transforms=train_transforms,
-    num_workers=8,
-    shuffle=True)
-eval_dataset = pdx.datasets.VOCDetection(
-    data_dir='dataset',
-    file_list='dataset/val_list.txt',
-    label_list='dataset/labels.txt',
-    num_workers=8,
-    transforms=eval_transforms)
-
-# 初始化模型,并进行训练
-# 可使用VisualDL查看训练指标,参考https://paddlex.readthedocs.io/zh_CN/develop/train/visualdl.html
-# num_classes 需要设置为包含背景类的类别数,即: 目标类别数量 + 1
-num_classes = len(train_dataset.labels) + 1
-
-# API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#paddlex-det-fasterrcnn
-model = pdx.det.FasterRCNN(
-    num_classes=num_classes,
-    backbone='ResNet50_vd',
-    with_dcn=True,
-    rpn_cls_loss='SigmoidFocalLoss')
-
-# API说明: https://paddlex.readthedocs.io/zh_CN/develop/apis/models/detection.html#id1
-# 各参数介绍与调整说明:https://paddlex.readthedocs.io/zh_CN/develop/appendix/parameters.html
-model.train(
-    num_epochs=60,
-    train_dataset=train_dataset,
-    train_batch_size=8,
-    eval_dataset=eval_dataset,
-    learning_rate=0.01,
-    lr_decay_epochs=[48, 56],
-    warmup_steps=1000,
-    pretrain_weights='ResNet50_vd_ssld_pretrained',
-    save_dir='output/guan_6',
-    use_vdl=False)

+ 0 - 95
tutorials/train/object_detection/mv_train_img.py

@@ -1,95 +0,0 @@
-import os
-os.environ['CUDA_VISIBLE_DEVICES'] = '4'
-import os.path as osp
-import re
-import cv2
-import xml.etree.ElementTree as ET
-import paddlex as pdx
-
-file_list = 'dataset/train_list.txt'
-save_dir = './visualize/train'
-data_dir = 'dataset/'
-if not os.path.exists(save_dir):
-    os.makedirs(save_dir)
-
-with open(file_list, 'r') as fr:
-    while True:
-        line = fr.readline()
-        if not line:
-            break
-        img_file, xml_file = [osp.join(data_dir, x) \
-                for x in line.strip().split()[:2]]
-        if 'budaodian' not in img_file and 'cahua' not in img_file and 'loudi' not in img_file and 'zangdian' not in img_file:
-            continue
-        tree = ET.parse(xml_file)
-        pattern = re.compile('<object>', re.IGNORECASE)
-        obj_match = pattern.findall(str(ET.tostringlist(tree.getroot())))
-        if len(obj_match) == 0:
-            continue
-        obj_tag = obj_match[0][1:-1]
-        objs = tree.findall(obj_tag)
-        pattern = re.compile('<size>', re.IGNORECASE)
-        size_tag = pattern.findall(str(ET.tostringlist(tree.getroot())))[0][1:
-                                                                            -1]
-        size_element = tree.find(size_tag)
-        pattern = re.compile('<width>', re.IGNORECASE)
-        width_tag = pattern.findall(str(ET.tostringlist(size_element)))[0][1:
-                                                                           -1]
-        im_w = float(size_element.find(width_tag).text)
-        pattern = re.compile('<height>', re.IGNORECASE)
-        height_tag = pattern.findall(str(ET.tostringlist(size_element)))[0][1:
-                                                                            -1]
-        im_h = float(size_element.find(height_tag).text)
-        gt_bbox = []
-        gt_class = []
-        for i, obj in enumerate(objs):
-            pattern = re.compile('<name>', re.IGNORECASE)
-            name_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1]
-            cname = obj.find(name_tag).text.strip()
-            gt_class.append(cname)
-            pattern = re.compile('<difficult>', re.IGNORECASE)
-            diff_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1]
-            try:
-                _difficult = int(obj.find(diff_tag).text)
-            except Exception:
-                _difficult = 0
-            pattern = re.compile('<bndbox>', re.IGNORECASE)
-            box_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1]
-            box_element = obj.find(box_tag)
-            pattern = re.compile('<xmin>', re.IGNORECASE)
-            xmin_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][
-                1:-1]
-            x1 = float(box_element.find(xmin_tag).text)
-            pattern = re.compile('<ymin>', re.IGNORECASE)
-            ymin_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][
-                1:-1]
-            y1 = float(box_element.find(ymin_tag).text)
-            pattern = re.compile('<xmax>', re.IGNORECASE)
-            xmax_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][
-                1:-1]
-            x2 = float(box_element.find(xmax_tag).text)
-            pattern = re.compile('<ymax>', re.IGNORECASE)
-            ymax_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][
-                1:-1]
-            y2 = float(box_element.find(ymax_tag).text)
-            x1 = max(0, x1)
-            y1 = max(0, y1)
-            if im_w > 0.5 and im_h > 0.5:
-                x2 = min(im_w - 1, x2)
-                y2 = min(im_h - 1, y2)
-            gt_bbox.append([x1, y1, x2, y2])
-        gts = []
-        for bbox, name in zip(gt_bbox, gt_class):
-            x1, y1, x2, y2 = bbox
-            w = x2 - x1 + 1
-            h = y2 - y1 + 1
-            gt = {
-                'category_id': 0,
-                'category': name,
-                'bbox': [x1, y1, w, h],
-                'score': 1
-            }
-            gts.append(gt)
-        gt_vis = pdx.det.visualize(img_file, gts, threshold=0.1, save_dir=None)
-        cv2.imwrite(
-            os.path.join(save_dir, os.path.split(img_file)[-1]), gt_vis)

+ 0 - 99
tutorials/train/object_detection/predict.py

@@ -1,99 +0,0 @@
-import os
-os.environ['CUDA_VISIBLE_DEVICES'] = '4'
-import os.path as osp
-import cv2
-import re
-import xml.etree.ElementTree as ET
-import paddlex as pdx
-
-model_dir = 'output/guan_2/best_model/'
-file_list = 'dataset/val_list.txt'
-data_dir = 'dataset/'
-save_dir = './visualize/guan_2'
-if not os.path.exists(save_dir):
-    os.makedirs(save_dir)
-
-model = pdx.load_model(model_dir)
-with open(file_list, 'r') as fr:
-    while True:
-        line = fr.readline()
-        if not line:
-            break
-        img_file, xml_file = [osp.join(data_dir, x) \
-                for x in line.strip().split()[:2]]
-        res = model.predict(img_file)
-        det_vis = pdx.det.visualize(
-            img_file, res, threshold=0.1, save_dir=None)
-
-        tree = ET.parse(xml_file)
-        pattern = re.compile('<object>', re.IGNORECASE)
-        obj_match = pattern.findall(str(ET.tostringlist(tree.getroot())))
-        if len(obj_match) == 0:
-            continue
-        obj_tag = obj_match[0][1:-1]
-        objs = tree.findall(obj_tag)
-        pattern = re.compile('<size>', re.IGNORECASE)
-        size_tag = pattern.findall(str(ET.tostringlist(tree.getroot())))[0][1:
-                                                                            -1]
-        size_element = tree.find(size_tag)
-        pattern = re.compile('<width>', re.IGNORECASE)
-        width_tag = pattern.findall(str(ET.tostringlist(size_element)))[0][1:
-                                                                           -1]
-        im_w = float(size_element.find(width_tag).text)
-        pattern = re.compile('<height>', re.IGNORECASE)
-        height_tag = pattern.findall(str(ET.tostringlist(size_element)))[0][1:
-                                                                            -1]
-        im_h = float(size_element.find(height_tag).text)
-        gt_bbox = []
-        gt_class = []
-        for i, obj in enumerate(objs):
-            pattern = re.compile('<name>', re.IGNORECASE)
-            name_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1]
-            cname = obj.find(name_tag).text.strip()
-            gt_class.append(cname)
-            pattern = re.compile('<difficult>', re.IGNORECASE)
-            diff_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1]
-            try:
-                _difficult = int(obj.find(diff_tag).text)
-            except Exception:
-                _difficult = 0
-            pattern = re.compile('<bndbox>', re.IGNORECASE)
-            box_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1]
-            box_element = obj.find(box_tag)
-            pattern = re.compile('<xmin>', re.IGNORECASE)
-            xmin_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][
-                1:-1]
-            x1 = float(box_element.find(xmin_tag).text)
-            pattern = re.compile('<ymin>', re.IGNORECASE)
-            ymin_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][
-                1:-1]
-            y1 = float(box_element.find(ymin_tag).text)
-            pattern = re.compile('<xmax>', re.IGNORECASE)
-            xmax_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][
-                1:-1]
-            x2 = float(box_element.find(xmax_tag).text)
-            pattern = re.compile('<ymax>', re.IGNORECASE)
-            ymax_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][
-                1:-1]
-            y2 = float(box_element.find(ymax_tag).text)
-            x1 = max(0, x1)
-            y1 = max(0, y1)
-            if im_w > 0.5 and im_h > 0.5:
-                x2 = min(im_w - 1, x2)
-                y2 = min(im_h - 1, y2)
-            gt_bbox.append([x1, y1, x2, y2])
-        gts = []
-        for bbox, name in zip(gt_bbox, gt_class):
-            x1, y1, x2, y2 = bbox
-            w = x2 - x1 + 1
-            h = y2 - y1 + 1
-            gt = {
-                'category_id': 0,
-                'category': name,
-                'bbox': [x1, y1, w, h],
-                'score': 1
-            }
-            gts.append(gt)
-        gt_vis = pdx.det.visualize(img_file, gts, threshold=0.1, save_dir=None)
-        vis = cv2.hconcat([det_vis, gt_vis])
-        cv2.imwrite(os.path.join(save_dir, os.path.split(img_file)[-1]), vis)