소스 검색

Merge pull request #103 from FlyingQianMM/develop_draw

add box format in prediction
Jason 5 년 전
부모
커밋
7acf4f36ea
5개의 변경된 파일73개의 추가작업 그리고 77개의 파일을 삭제
  1. 2 2
      deploy/cpp/demo/detector.cpp
  2. 1 1
      docs/apis/models/instance_segmentation.md
  3. 24 25
      paddlex/cv/models/faster_rcnn.py
  4. 31 31
      paddlex/cv/models/mask_rcnn.py
  5. 15 18
      paddlex/cv/models/yolo_v3.py

+ 2 - 2
deploy/cpp/demo/detector.cpp

@@ -66,7 +66,7 @@ int main(int argc, char** argv) {
         std::cout << "image file: " << image_path
                   << ", predict label: " << result.boxes[i].category
                   << ", label_id:" << result.boxes[i].category_id
-                  << ", score: " << result.boxes[i].score << ", box:("
+                  << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
                   << result.boxes[i].coordinate[0] << ", "
                   << result.boxes[i].coordinate[1] << ", "
                   << result.boxes[i].coordinate[2] << ", "
@@ -89,7 +89,7 @@ int main(int argc, char** argv) {
     for (int i = 0; i < result.boxes.size(); ++i) {
       std::cout << ", predict label: " << result.boxes[i].category
                 << ", label_id:" << result.boxes[i].category_id
-                << ", score: " << result.boxes[i].score << ", box:("
+                << ", score: " << result.boxes[i].score << ", box(xmin, ymin, w, h):("
                 << result.boxes[i].coordinate[0] << ", "
                 << result.boxes[i].coordinate[1] << ", "
                 << result.boxes[i].coordinate[2] << ", "

+ 1 - 1
docs/apis/models/instance_segmentation.md

@@ -82,4 +82,4 @@ predict(self, img_file, transforms=None)
 >
 > **返回值**
 >
-> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。
+> > - **list**: 预测结果列表,列表中每个元素均为一个dict,key'bbox', 'mask', 'category', 'category_id', 'score',分别表示每个预测目标的框坐标信息、Mask信息,类别、类别id、置信度其中框坐标信息为[xmin, ymin, w, h],即左上角x, y坐标和框的宽和高。Mask信息为原图大小的二值图,1表示像素点属于预测类别,0表示像素点是背景。

+ 24 - 25
paddlex/cv/models/faster_rcnn.py

@@ -117,12 +117,12 @@ class FasterRCNN(BaseAPI):
             model_out = model.build_net(inputs)
             loss = model_out['loss']
             self.optimizer.minimize(loss)
-            outputs = OrderedDict([('loss', model_out['loss']),
-                                   ('loss_cls', model_out['loss_cls']),
-                                   ('loss_bbox', model_out['loss_bbox']),
-                                   ('loss_rpn_cls', model_out['loss_rpn_cls']),
-                                   ('loss_rpn_bbox',
-                                    model_out['loss_rpn_bbox'])])
+            outputs = OrderedDict(
+                [('loss', model_out['loss']),
+                 ('loss_cls', model_out['loss_cls']),
+                 ('loss_bbox', model_out['loss_bbox']),
+                 ('loss_rpn_cls', model_out['loss_rpn_cls']), (
+                     'loss_rpn_bbox', model_out['loss_rpn_bbox'])])
         else:
             outputs = model.build_net(inputs)
         return inputs, outputs
@@ -310,11 +310,10 @@ class FasterRCNN(BaseAPI):
                 'im_info': im_infos,
                 'im_shape': im_shapes,
             }
-            outputs = self.exe.run(
-                self.test_prog,
-                feed=[feed_data],
-                fetch_list=list(self.test_outputs.values()),
-                return_numpy=False)
+            outputs = self.exe.run(self.test_prog,
+                                   feed=[feed_data],
+                                   fetch_list=list(self.test_outputs.values()),
+                                   return_numpy=False)
             res = {
                 'bbox': (np.array(outputs[0]),
                          outputs[0].recursive_sequence_lengths())
@@ -339,13 +338,13 @@ class FasterRCNN(BaseAPI):
                 res['is_difficult'] = (np.array(res_is_difficult),
                                        [res_is_difficult_lod])
             results.append(res)
-            logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
-                epoch_id, step + 1, total_steps))
+            logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
+                                                               1, total_steps))
         box_ap_stats, eval_details = eval_results(
             results, metric, eval_dataset.coco_gt, with_background=True)
         metrics = OrderedDict(
-            zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'],
-                box_ap_stats))
+            zip(['bbox_mmap'
+                 if metric == 'COCO' else 'bbox_map'], box_ap_stats))
         if return_details:
             return metrics, eval_details
         return metrics
@@ -359,7 +358,8 @@ class FasterRCNN(BaseAPI):
 
         Returns:
             list: 预测结果列表,每个预测结果由预测框类别标签、
-              预测框类别名称、预测框坐标、预测框得分组成。
+              预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、
+              预测框得分组成。
         """
         if transforms is None and not hasattr(self, 'test_transforms'):
             raise Exception("transforms need to be defined, now is None.")
@@ -373,15 +373,14 @@ class FasterRCNN(BaseAPI):
         im = np.expand_dims(im, axis=0)
         im_resize_info = np.expand_dims(im_resize_info, axis=0)
         im_shape = np.expand_dims(im_shape, axis=0)
-        outputs = self.exe.run(
-            self.test_prog,
-            feed={
-                'image': im,
-                'im_info': im_resize_info,
-                'im_shape': im_shape
-            },
-            fetch_list=list(self.test_outputs.values()),
-            return_numpy=False)
+        outputs = self.exe.run(self.test_prog,
+                               feed={
+                                   'image': im,
+                                   'im_info': im_resize_info,
+                                   'im_shape': im_shape
+                               },
+                               fetch_list=list(self.test_outputs.values()),
+                               return_numpy=False)
         res = {
             k: (np.array(v), v.recursive_sequence_lengths())
             for k, v in zip(list(self.test_outputs.keys()), outputs)

+ 31 - 31
paddlex/cv/models/mask_rcnn.py

@@ -81,13 +81,13 @@ class MaskRCNN(FasterRCNN):
             model_out = model.build_net(inputs)
             loss = model_out['loss']
             self.optimizer.minimize(loss)
-            outputs = OrderedDict([('loss', model_out['loss']),
-                                   ('loss_cls', model_out['loss_cls']),
-                                   ('loss_bbox', model_out['loss_bbox']),
-                                   ('loss_mask', model_out['loss_mask']),
-                                   ('loss_rpn_cls', model_out['loss_rpn_cls']),
-                                   ('loss_rpn_bbox',
-                                    model_out['loss_rpn_bbox'])])
+            outputs = OrderedDict(
+                [('loss', model_out['loss']),
+                 ('loss_cls', model_out['loss_cls']),
+                 ('loss_bbox', model_out['loss_bbox']),
+                 ('loss_mask', model_out['loss_mask']),
+                 ('loss_rpn_cls', model_out['loss_rpn_cls']), (
+                     'loss_rpn_bbox', model_out['loss_rpn_bbox'])])
         else:
             outputs = model.build_net(inputs)
         return inputs, outputs
@@ -276,11 +276,10 @@ class MaskRCNN(FasterRCNN):
                 'im_info': im_infos,
                 'im_shape': im_shapes,
             }
-            outputs = self.exe.run(
-                self.test_prog,
-                feed=[feed_data],
-                fetch_list=list(self.test_outputs.values()),
-                return_numpy=False)
+            outputs = self.exe.run(self.test_prog,
+                                   feed=[feed_data],
+                                   fetch_list=list(self.test_outputs.values()),
+                                   return_numpy=False)
             res = {
                 'bbox': (np.array(outputs[0]),
                          outputs[0].recursive_sequence_lengths()),
@@ -292,8 +291,8 @@ class MaskRCNN(FasterRCNN):
             res['im_shape'] = (im_shapes, [])
             res['im_id'] = (np.array(res_im_id), [])
             results.append(res)
-            logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
-                epoch_id, step + 1, total_steps))
+            logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
+                                                               1, total_steps))
 
         ap_stats, eval_details = eval_results(
             results,
@@ -302,8 +301,8 @@ class MaskRCNN(FasterRCNN):
             with_background=True,
             resolution=self.mask_head_resolution)
         if metric == 'VOC':
-            if isinstance(ap_stats[0], np.ndarray) and isinstance(
-                    ap_stats[1], np.ndarray):
+            if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
+                                                                  np.ndarray):
                 metrics = OrderedDict(
                     zip(['bbox_map', 'segm_map'],
                         [ap_stats[0][1], ap_stats[1][1]]))
@@ -311,8 +310,8 @@ class MaskRCNN(FasterRCNN):
                 metrics = OrderedDict(
                     zip(['bbox_map', 'segm_map'], [0.0, 0.0]))
         elif metric == 'COCO':
-            if isinstance(ap_stats[0], np.ndarray) and isinstance(
-                    ap_stats[1], np.ndarray):
+            if isinstance(ap_stats[0], np.ndarray) and isinstance(ap_stats[1],
+                                                                  np.ndarray):
                 metrics = OrderedDict(
                     zip(['bbox_mmap', 'segm_mmap'],
                         [ap_stats[0][0], ap_stats[1][0]]))
@@ -331,8 +330,10 @@ class MaskRCNN(FasterRCNN):
             transforms (paddlex.det.transforms): 数据预处理操作。
 
         Returns:
-            dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、预测框坐标、预测框内的二值图、
-                预测框得分组成。
+            dict: 预测结果列表,每个预测结果由预测框类别标签、预测框类别名称、
+                  预测框坐标(坐标格式为[xmin, ymin, w, h])、
+                  原图大小的预测二值图(1表示预测框类别,0表示背景类)、
+                  预测框得分组成。
         """
         if transforms is None and not hasattr(self, 'test_transforms'):
             raise Exception("transforms need to be defined, now is None.")
@@ -346,15 +347,14 @@ class MaskRCNN(FasterRCNN):
         im = np.expand_dims(im, axis=0)
         im_resize_info = np.expand_dims(im_resize_info, axis=0)
         im_shape = np.expand_dims(im_shape, axis=0)
-        outputs = self.exe.run(
-            self.test_prog,
-            feed={
-                'image': im,
-                'im_info': im_resize_info,
-                'im_shape': im_shape
-            },
-            fetch_list=list(self.test_outputs.values()),
-            return_numpy=False)
+        outputs = self.exe.run(self.test_prog,
+                               feed={
+                                   'image': im,
+                                   'im_info': im_resize_info,
+                                   'im_shape': im_shape
+                               },
+                               fetch_list=list(self.test_outputs.values()),
+                               return_numpy=False)
         res = {
             k: (np.array(v), v.recursive_sequence_lengths())
             for k, v in zip(list(self.test_outputs.keys()), outputs)
@@ -368,8 +368,8 @@ class MaskRCNN(FasterRCNN):
         import pycocotools.mask as mask_util
         for index, xywh_res in enumerate(xywh_results):
             del xywh_res['image_id']
-            xywh_res['mask'] = mask_util.decode(
-                segm_results[index]['segmentation'])
+            xywh_res['mask'] = mask_util.decode(segm_results[index][
+                'segmentation'])
             xywh_res['category'] = self.labels[xywh_res['category_id']]
             results.append(xywh_res)
         return results

+ 15 - 18
paddlex/cv/models/yolo_v3.py

@@ -306,11 +306,10 @@ class YOLOv3(BaseAPI):
             images = np.array([d[0] for d in data])
             im_sizes = np.array([d[1] for d in data])
             feed_data = {'image': images, 'im_size': im_sizes}
-            outputs = self.exe.run(
-                self.test_prog,
-                feed=[feed_data],
-                fetch_list=list(self.test_outputs.values()),
-                return_numpy=False)
+            outputs = self.exe.run(self.test_prog,
+                                   feed=[feed_data],
+                                   fetch_list=list(self.test_outputs.values()),
+                                   return_numpy=False)
             res = {
                 'bbox': (np.array(outputs[0]),
                          outputs[0].recursive_sequence_lengths())
@@ -326,13 +325,13 @@ class YOLOv3(BaseAPI):
                 res['gt_label'] = (res_gt_label, [])
                 res['is_difficult'] = (res_is_difficult, [])
             results.append(res)
-            logging.debug("[EVAL] Epoch={}, Step={}/{}".format(
-                epoch_id, step + 1, total_steps))
+            logging.debug("[EVAL] Epoch={}, Step={}/{}".format(epoch_id, step +
+                                                               1, total_steps))
         box_ap_stats, eval_details = eval_results(
             results, metric, eval_dataset.coco_gt, with_background=False)
         evaluate_metrics = OrderedDict(
-            zip(['bbox_mmap' if metric == 'COCO' else 'bbox_map'],
-                box_ap_stats))
+            zip(['bbox_mmap'
+                 if metric == 'COCO' else 'bbox_map'], box_ap_stats))
         if return_details:
             return evaluate_metrics, eval_details
         return evaluate_metrics
@@ -346,7 +345,8 @@ class YOLOv3(BaseAPI):
 
         Returns:
             list: 预测结果列表,每个预测结果由预测框类别标签、
-              预测框类别名称、预测框坐标、预测框得分组成。
+              预测框类别名称、预测框坐标(坐标格式为[xmin, ymin, w, h])、
+              预测框得分组成。
         """
         if transforms is None and not hasattr(self, 'test_transforms'):
             raise Exception("transforms need to be defined, now is None.")
@@ -359,14 +359,11 @@ class YOLOv3(BaseAPI):
             im, im_size = self.test_transforms(img_file)
         im = np.expand_dims(im, axis=0)
         im_size = np.expand_dims(im_size, axis=0)
-        outputs = self.exe.run(
-            self.test_prog,
-            feed={
-                'image': im,
-                'im_size': im_size
-            },
-            fetch_list=list(self.test_outputs.values()),
-            return_numpy=False)
+        outputs = self.exe.run(self.test_prog,
+                               feed={'image': im,
+                                     'im_size': im_size},
+                               fetch_list=list(self.test_outputs.values()),
+                               return_numpy=False)
         res = {
             k: (np.array(v), v.recursive_sequence_lengths())
             for k, v in zip(list(self.test_outputs.keys()), outputs)