5 gadi atpakaļ · 0d004626e2
--- a/paddlex/cv/datasets/coco.py
+++ b/paddlex/cv/datasets/coco.py
@@ -128,7 +128,6 @@ class CocoDetection(VOCDetection):
 
				 
			
 
				             coco_rec = (im_info, label_info)
			
 
				             self.file_list.append([im_fname, coco_rec])
			
 
				-
			
 
				         if not len(self.file_list) > 0:
			
 
				             raise Exception('not found any coco record in %s' % (ann_file))
			
 
				         logging.info("{} samples in file {}".format(
			
--- a/paddlex/cv/datasets/voc.py
+++ b/paddlex/cv/datasets/voc.py
@@ -14,6 +14,7 @@
 
				 
			
 
				 from __future__ import absolute_import
			
 
				 import copy
			
 
				+import os
			
 
				 import os.path as osp
			
 
				 import random
			
 
				 import numpy as np
			
@@ -170,6 +171,43 @@ class VOCDetection(Dataset):
 
				         self.coco_gt.dataset = annotations
			
 
				         self.coco_gt.createIndex()
			
 
				 
			
 
				+    def append_backgrounds(self, image_dir):
			
 
				+        import cv2
			
 
				+        if not osp.exists(image_dir):
			
 
				+           raise Exception("{} background images directory does not exist.".format(image_dir))
			
 
				+        image_list = os.listdir(image_dir)
			
 
				+        max_img_id = max(self.coco_gt.getImgIds())
			
 
				+        for image in image_list:
			
 
				+            if not is_pic(image):
			
 
				+                continue
			
 
				+            # False ground truth
			
 
				+            gt_bbox = np.array([[0, 0, 1e-05, 1e-05]], dtype=np.float32)
			
 
				+            gt_class = np.array([[0]], dtype=np.int32)
			
 
				+            gt_score = np.ones((1, 1), dtype=np.float32)
			
 
				+            is_crowd = np.array([[0]], dtype=np.int32)
			
 
				+            difficult = np.zeros((1, 1), dtype=np.int32)
			
 
				+            gt_poly = [[[0, 0, 0, 1e-05, 1e-05, 1e-05, 1e-05, 0]]]
			
 
				+            
			
 
				+            max_img_id += 1
			
 
				+            im_fname = osp.join(image_dir, image)
			
 
				+            img_data = cv2.imread(im_fname)
			
 
				+            im_h, im_w, im_c = img_data.shape
			
 
				+            im_info = {
			
 
				+                'im_id': np.array([max_img_id]).astype('int32'),
			
 
				+                'image_shape': np.array([im_h, im_w]).astype('int32'),
			
 
				+            }
			
 
				+            label_info = {
			
 
				+                'is_crowd': is_crowd,
			
 
				+                'gt_class': gt_class,
			
 
				+                'gt_bbox': gt_bbox,
			
 
				+                'gt_score': gt_score,
			
 
				+                'difficult': difficult,
			
 
				+                'gt_poly': gt_poly
			
 
				+            }
			
 
				+            coco_rec = (im_info, label_info)
			
 
				+            self.file_list.append([im_fname, coco_rec])
			
 
				+        self.num_samples = len(self.file_list)
			
 
				+
			
 
				     def iterator(self):
			
 
				         self._epoch += 1
			
 
				         self._pos = 0
			
--- a/paddlex/cv/nets/detection/mask_rcnn.py
+++ b/paddlex/cv/nets/detection/mask_rcnn.py
@@ -204,7 +204,7 @@ class MaskRCNN(object):
 
				                 bg_thresh_hi=self.bg_thresh_hi,
			
 
				                 bg_thresh_lo=self.bg_thresh_lo,
			
 
				                 bbox_reg_weights=self.bbox_reg_weights,
			
 
				-                calass_nums=self.num_classes,
			
 
				+                class_nums=self.num_classes,
			
 
				                 use_random=self.rpn_head.use_random)
			
 
				 
			
 
				             rois = outputs[0]
			
--- a/paddlex/cv/transforms/det_transforms.py
+++ b/paddlex/cv/transforms/det_transforms.py
@@ -722,22 +722,38 @@ class MixupImage(DetTransform):
 
				                             'Becasuse gt_bbox/gt_class/gt_score is not in label_info!')
			
 
				         gt_bbox1 = label_info['gt_bbox']
			
 
				         gt_bbox2 = im_info['mixup'][2]['gt_bbox']
			
 
				-        gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
			
 
				         gt_class1 = label_info['gt_class']
			
 
				         gt_class2 = im_info['mixup'][2]['gt_class']
			
 
				-        gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
			
 
				-
			
 
				         gt_score1 = label_info['gt_score']
			
 
				         gt_score2 = im_info['mixup'][2]['gt_score']
			
 
				-        gt_score = np.concatenate(
			
 
				-            (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
			
 
				         if 'gt_poly' in label_info:
			
 
				             gt_poly1 = label_info['gt_poly']
			
 
				             gt_poly2 = im_info['mixup'][2]['gt_poly']
			
 
				-            label_info['gt_poly'] = gt_poly1 + gt_poly2
			
 
				         is_crowd1 = label_info['is_crowd']
			
 
				         is_crowd2 = im_info['mixup'][2]['is_crowd']
			
 
				-        is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
			
 
				+        
			
 
				+        if 0 not in gt_class1 and 0 not in gt_class2:
			
 
				+            gt_bbox = np.concatenate((gt_bbox1, gt_bbox2), axis=0)
			
 
				+            gt_class = np.concatenate((gt_class1, gt_class2), axis=0)
			
 
				+            gt_score = np.concatenate(
			
 
				+                (gt_score1 * factor, gt_score2 * (1. - factor)), axis=0)
			
 
				+            if 'gt_poly' in label_info:
			
 
				+                label_info['gt_poly'] = gt_poly1 + gt_poly2
			
 
				+            is_crowd = np.concatenate((is_crowd1, is_crowd2), axis=0)
			
 
				+        elif 0 in gt_class1:
			
 
				+            gt_bbox = gt_bbox2
			
 
				+            gt_class = gt_class2
			
 
				+            gt_score = gt_score2 * (1. - factor)
			
 
				+            if 'gt_poly' in label_info:
			
 
				+                label_info['gt_poly'] = gt_poly2
			
 
				+            is_crowd = is_crowd2
			
 
				+        else:
			
 
				+            gt_bbox = gt_bbox1
			
 
				+            gt_class = gt_class1
			
 
				+            gt_score = gt_score1 * factor
			
 
				+            if 'gt_poly' in label_info:
			
 
				+                label_info['gt_poly'] = gt_poly1
			
 
				+            is_crowd = is_crowd1
			
 
				         label_info['gt_bbox'] = gt_bbox
			
 
				         label_info['gt_score'] = gt_score
			
 
				         label_info['gt_class'] = gt_class
			
@@ -809,6 +825,8 @@ class RandomExpand(DetTransform):
 
				         if np.random.uniform(0., 1.) < self.prob:
			
 
				             return (im, im_info, label_info)
			
 
				 
			
 
				+        if 'gt_class' in label_info and 0 in label_info['gt_class']:
			
 
				+            return (im, im_info, label_info)
			
 
				         image_shape = im_info['image_shape']
			
 
				         height = int(image_shape[0])
			
 
				         width = int(image_shape[1])
			
@@ -904,6 +922,8 @@ class RandomCrop(DetTransform):
 
				 
			
 
				         if len(label_info['gt_bbox']) == 0:
			
 
				             return (im, im_info, label_info)
			
 
				+        if 'gt_class' in label_info and 0 in label_info['gt_class']:
			
 
				+            return (im, im_info, label_info)
			
 
				 
			
 
				         image_shape = im_info['image_shape']
			
 
				         w = image_shape[1]
			
@@ -1199,9 +1219,10 @@ class ArrangeYOLOv3(DetTransform):
 
				             if gt_num > 0:
			
 
				                 label_info['gt_class'][:gt_num, 0] = label_info[
			
 
				                     'gt_class'][:gt_num, 0] - 1
			
 
				-                gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :]
			
 
				-                gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0]
			
 
				-                gt_score[:gt_num] = label_info['gt_score'][:gt_num, 0]
			
 
				+                if -1 not in label_info['gt_class']:
			
 
				+                    gt_bbox[:gt_num, :] = label_info['gt_bbox'][:gt_num, :]
			
 
				+                    gt_class[:gt_num] = label_info['gt_class'][:gt_num, 0]
			
 
				+                    gt_score[:gt_num] = label_info['gt_score'][:gt_num, 0]
			
 
				             # parse [x1, y1, x2, y2] to [x, y, w, h]
			
 
				             gt_bbox[:, 2:4] = gt_bbox[:, 2:4] - gt_bbox[:, :2]
			
 
				             gt_bbox[:, :2] = gt_bbox[:, :2] + gt_bbox[:, 2:4] / 2.