will-jl944 4 жил өмнө
parent
commit
d4f00e5ab0

+ 1 - 1
PaddleDetection

@@ -1 +1 @@
-Subproject commit 56ed45694cd9fc83efcadf01fc7dc328a8eeb1c0
+Subproject commit d41b085d66946219351d8d99c2b3d95756489624

+ 8 - 13
paddlex/ppdet/data/source/keypoint_coco.py

@@ -11,7 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+"""
+this code is base on https://github.com/open-mmlab/mmpose
+"""
 import os
 import cv2
 import numpy as np
@@ -25,8 +27,7 @@ from paddlex.ppdet.core.workspace import register, serializable
 
 @serializable
 class KeypointBottomUpBaseDataset(DetDataset):
-    """Base class for bottom-up datasets. Adapted from
-        https://github.com/open-mmlab/mmpose
+    """Base class for bottom-up datasets.
 
     All datasets should subclass it.
     All subclasses should overwrite:
@@ -90,8 +91,7 @@ class KeypointBottomUpBaseDataset(DetDataset):
 @register
 @serializable
 class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
-    """COCO dataset for bottom-up pose estimation. Adapted from
-        https://github.com/open-mmlab/mmpose
+    """COCO dataset for bottom-up pose estimation.
 
     The dataset loads raw features and apply specified transforms
     to return a dict containing the image tensors and other information.
@@ -262,8 +262,7 @@ class KeypointBottomUpCocoDataset(KeypointBottomUpBaseDataset):
 @register
 @serializable
 class KeypointBottomUpCrowdPoseDataset(KeypointBottomUpCocoDataset):
-    """CrowdPose dataset for bottom-up pose estimation. Adapted from
-        https://github.com/open-mmlab/mmpose
+    """CrowdPose dataset for bottom-up pose estimation.
 
     The dataset loads raw features and apply specified transforms
     to return a dict containing the image tensors and other information.
@@ -387,9 +386,7 @@ class KeypointTopDownBaseDataset(DetDataset):
 @register
 @serializable
 class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
-    """COCO dataset for top-down pose estimation. Adapted from
-        https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
-        Copyright (c) Microsoft, under the MIT License.
+    """COCO dataset for top-down pose estimation.
 
     The dataset loads raw features and apply specified transforms
     to return a dict containing the image tensors and other information.
@@ -582,9 +579,7 @@ class KeypointTopDownCocoDataset(KeypointTopDownBaseDataset):
 @register
 @serializable
 class KeypointTopDownMPIIDataset(KeypointTopDownBaseDataset):
-    """MPII dataset for topdown pose estimation. Adapted from
-        https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
-        Copyright (c) Microsoft, under the MIT License.
+    """MPII dataset for topdown pose estimation.
 
     The dataset loads raw features and apply specified transforms
     to return a dict containing the image tensors and other information.

+ 13 - 3
paddlex/ppdet/data/transform/batch_operators.py

@@ -16,6 +16,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import typing
+
 try:
     from collections.abc import Sequence
 except Exception:
@@ -69,15 +71,23 @@ class PadBatch(BaseOperator):
         """
         coarsest_stride = self.pad_to_stride
 
-        max_shape = np.array([data['image'].shape for data in samples]).max(
-            axis=0)
+        # multi scale input is nested list
+        if isinstance(samples,
+                      typing.Sequence) and len(samples) > 0 and isinstance(
+                          samples[0], typing.Sequence):
+            inner_samples = samples[0]
+        else:
+            inner_samples = samples
+
+        max_shape = np.array(
+            [data['image'].shape for data in inner_samples]).max(axis=0)
         if coarsest_stride > 0:
             max_shape[1] = int(
                 np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
             max_shape[2] = int(
                 np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)
 
-        for data in samples:
+        for data in inner_samples:
             im = data['image']
             im_c, im_h, im_w = im.shape[:]
             padding_im = np.zeros(

+ 8 - 1
paddlex/ppdet/data/transform/keypoint_operators.py

@@ -684,6 +684,10 @@ class ToHeatmapsTopDown(object):
         self.sigma = sigma
 
     def __call__(self, records):
+        """refer to
+            https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
+            Copyright (c) Microsoft, under the MIT License.
+        """
         joints = records['joints']
         joints_vis = records['joints_vis']
         num_joints = joints.shape[0]
@@ -792,7 +796,10 @@ class ToHeatmapsTopDown_DARK(object):
 
 @register_keypointop
 class ToHeatmapsTopDown_UDP(object):
-    """to generate the gaussian heatmaps of keypoint for heatmap loss.
+    """This code is based on:
+        https://github.com/HuangJunJie2017/UDP-Pose/blob/master/deep-high-resolution-net.pytorch/lib/dataset/JointsDataset.py
+
+        to generate the gaussian heatmaps of keypoint for heatmap loss.
         ref: Huang et al. The Devil is in the Details: Delving into Unbiased Data Processing
         for Human Pose Estimation (CVPR 2020).
 

+ 1 - 0
paddlex/ppdet/engine/export_utils.py

@@ -46,6 +46,7 @@ TRT_MIN_SUBGRAPH = {
     'GFL': 16,
     'PicoDet': 3,
     'CenterNet': 5,
+    'TOOD': 5,
 }
 
 KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet']

+ 2 - 0
paddlex/ppdet/engine/tracker.py

@@ -19,8 +19,10 @@ from __future__ import print_function
 import os
 import cv2
 import glob
+import re
 import paddle
 import numpy as np
+import os.path as osp
 from collections import defaultdict
 
 from paddlex.ppdet.core.workspace import create

+ 10 - 2
paddlex/ppdet/engine/trainer.py

@@ -22,6 +22,7 @@ import copy
 import time
 
 import numpy as np
+import typing
 from PIL import Image, ImageOps
 
 import paddle
@@ -473,7 +474,11 @@ class Trainer(object):
             for metric in self._metrics:
                 metric.update(data, outs)
 
-            sample_num += data['im_id'].numpy().shape[0]
+            # multi-scale inputs: all inputs have same im_id
+            if isinstance(data, typing.Sequence):
+                sample_num += data[0]['im_id'].numpy().shape[0]
+            else:
+                sample_num += data['im_id'].numpy().shape[0]
             self._compose_callback.on_step_end(self.status)
 
         self.status['sample_num'] = sample_num
@@ -517,7 +522,10 @@ class Trainer(object):
             outs = self.model(data)
 
             for key in ['im_shape', 'scale_factor', 'im_id']:
-                outs[key] = data[key]
+                if isinstance(data, typing.Sequence):
+                    outs[key] = data[0][key]
+                else:
+                    outs[key] = data[key]
             for key, value in outs.items():
                 if hasattr(value, 'numpy'):
                     outs[key] = value.numpy()

+ 3 - 4
paddlex/ppdet/metrics/keypoint_metrics.py

@@ -27,11 +27,10 @@ __all__ = ['KeyPointTopDownCOCOEval', 'KeyPointTopDownMPIIEval']
 
 
 class KeyPointTopDownCOCOEval(object):
-    '''
-    Adapted from
+    """refer to
         https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
         Copyright (c) Microsoft, under the MIT License.
-    '''
+    """
 
     def __init__(self,
                  anno_file,
@@ -286,7 +285,7 @@ class KeyPointTopDownMPIIEval(object):
         return self.eval_results
 
     def evaluate(self, outputs, savepath=None):
-        """Evaluate PCKh for MPII dataset. Adapted from
+        """Evaluate PCKh for MPII dataset. refer to
         https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
         Copyright (c) Microsoft, under the MIT License.
 

+ 6 - 1
paddlex/ppdet/metrics/metrics.py

@@ -21,6 +21,7 @@ import sys
 import json
 import paddle
 import numpy as np
+import typing
 
 from .map_utils import prune_zero_padding, DetectionMAP
 from .coco_utils import get_infer_results, cocoapi_eval
@@ -93,7 +94,11 @@ class COCOMetric(Metric):
         for k, v in outputs.items():
             outs[k] = v.numpy() if isinstance(v, paddle.Tensor) else v
 
-        im_id = inputs['im_id']
+        # multi-scale inputs: all inputs have same im_id
+        if isinstance(inputs, typing.Sequence):
+            im_id = inputs[0]['im_id']
+        else:
+            im_id = inputs['im_id']
         outs['im_id'] = im_id.numpy() if isinstance(im_id,
                                                     paddle.Tensor) else im_id
 

+ 3 - 3
paddlex/ppdet/model_zoo/tests/test_get_model.py

@@ -18,8 +18,8 @@ from __future__ import print_function
 
 import os
 import paddle
+import paddlex.ppdet
 import unittest
-import paddlex
 
 # NOTE: weights downloading costs time, we choose
 #       a small model for unittesting
@@ -29,7 +29,7 @@ MODEL_NAME = 'ppyolo/ppyolo_tiny_650e_coco'
 class TestGetConfigFile(unittest.TestCase):
     def test_main(self):
         try:
-            cfg_file = paddlex.ppdet.model_zoo.get_config_file(MODEL_NAME)
+            cfg_file = ppdet.model_zoo.get_config_file(MODEL_NAME)
             assert os.path.isfile(cfg_file)
         except:
             self.assertTrue(False)
@@ -38,7 +38,7 @@ class TestGetConfigFile(unittest.TestCase):
 class TestGetModel(unittest.TestCase):
     def test_main(self):
         try:
-            model = paddlex.ppdet.model_zoo.get_model(MODEL_NAME)
+            model = ppdet.model_zoo.get_model(MODEL_NAME)
             assert isinstance(model, paddle.nn.Layer)
         except:
             self.assertTrue(False)

+ 3 - 3
paddlex/ppdet/model_zoo/tests/test_list_model.py

@@ -17,7 +17,7 @@ from __future__ import division
 from __future__ import print_function
 
 import unittest
-import paddlex
+import paddlex.ppdet
 
 
 class TestListModel(unittest.TestCase):
@@ -26,7 +26,7 @@ class TestListModel(unittest.TestCase):
 
     def test_main(self):
         try:
-            paddlex.ppdet.model_zoo.list_model(self._filter)
+            ppdet.model_zoo.list_model(self._filter)
             self.assertTrue(True)
         except:
             self.assertTrue(False)
@@ -58,7 +58,7 @@ class TestListModelError(unittest.TestCase):
 
     def test_main(self):
         try:
-            paddlex.ppdet.model_zoo.list_model(self._filter)
+            ppdet.model_zoo.list_model(self._filter)
             self.assertTrue(False)
         except ValueError:
             self.assertTrue(True)

+ 56 - 1
paddlex/ppdet/modeling/architectures/meta_arch.py

@@ -2,9 +2,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
 import paddle
 import paddle.nn as nn
+import typing
+
 from paddlex.ppdet.core.workspace import register
+from paddlex.ppdet.modeling.post_process import nms
 
 __all__ = ['BaseArch']
 
@@ -54,7 +58,58 @@ class BaseArch(nn.Layer):
         if self.training:
             out = self.get_loss()
         else:
-            out = self.get_pred()
+            inputs_list = []
+            # multi-scale input
+            if not isinstance(inputs, typing.Sequence):
+                inputs_list.append(inputs)
+            else:
+                inputs_list.extend(inputs)
+
+            outs = []
+            for inp in inputs_list:
+                self.inputs = inp
+                outs.append(self.get_pred())
+
+            # multi-scale test
+            if len(outs) > 1:
+                out = self.merge_multi_scale_predictions(outs)
+            else:
+                out = outs[0]
+        return out
+
+    def merge_multi_scale_predictions(self, outs):
+        # default values for architectures not included in following list
+        num_classes = 80
+        nms_threshold = 0.5
+        keep_top_k = 100
+
+        if self.__class__.__name__ in ('CascadeRCNN', 'FasterRCNN', 'MaskRCNN'
+                                       ):
+            num_classes = self.bbox_head.num_classes
+            keep_top_k = self.bbox_post_process.nms.keep_top_k
+            nms_threshold = self.bbox_post_process.nms.nms_threshold
+        else:
+            raise Exception(
+                "Multi scale test only supports CascadeRCNN, FasterRCNN and MaskRCNN for now"
+            )
+
+        final_boxes = []
+        all_scale_outs = paddle.concat([o['bbox'] for o in outs]).numpy()
+        for c in range(num_classes):
+            idxs = all_scale_outs[:, 0] == c
+            if np.count_nonzero(idxs) == 0:
+                continue
+            r = nms(all_scale_outs[idxs, 1:], nms_threshold)
+            final_boxes.append(
+                np.concatenate([np.full((r.shape[0], 1), c), r], 1))
+        out = np.concatenate(final_boxes)
+        out = np.concatenate(sorted(
+            out, key=lambda e: e[1])[-keep_top_k:]).reshape((-1, 6))
+        out = {
+            'bbox': paddle.to_tensor(out),
+            'bbox_num': paddle.to_tensor(np.array([out.shape[0], ]))
+        }
+
         return out
 
     def build_inputs(self, data, input_def):

+ 45 - 0
paddlex/ppdet/modeling/assigners/utils.py

@@ -19,6 +19,12 @@ from __future__ import print_function
 import paddle
 import paddle.nn.functional as F
 
+__all__ = [
+    'pad_gt', 'gather_topk_anchors', 'check_points_inside_bboxes',
+    'compute_max_iou_anchor', 'compute_max_iou_gt',
+    'generate_anchors_for_grid_cell'
+]
+
 
 def pad_gt(gt_labels, gt_bboxes, gt_scores=None):
     r""" Pad 0 in gt_labels and gt_bboxes.
@@ -148,3 +154,42 @@ def compute_max_iou_gt(ious):
     max_iou_index = ious.argmax(axis=-1)
     is_max_iou = F.one_hot(max_iou_index, num_anchors)
     return is_max_iou.astype(ious.dtype)
+
+
+def generate_anchors_for_grid_cell(feats,
+                                   fpn_strides,
+                                   grid_cell_size=5.0,
+                                   grid_cell_offset=0.5):
+    r"""
+    Like ATSS, generate anchors based on grid size.
+    Args:
+        feats (List[Tensor]): shape[s, (b, c, h, w)]
+        fpn_strides (tuple|list): shape[s], stride for each scale feature
+        grid_cell_size (float): anchor size
+        grid_cell_offset (float): The range is between 0 and 1.
+    Returns:
+        anchors (List[Tensor]): shape[s, (l, 4)]
+        num_anchors_list (List[int]): shape[s]
+        stride_tensor_list (List[Tensor]): shape[s, (l, 1)]
+    """
+    assert len(feats) == len(fpn_strides)
+    anchors = []
+    num_anchors_list = []
+    stride_tensor_list = []
+    for feat, stride in zip(feats, fpn_strides):
+        _, _, h, w = feat.shape
+        cell_half_size = grid_cell_size * stride * 0.5
+        shift_x = (paddle.arange(end=w) + grid_cell_offset) * stride
+        shift_y = (paddle.arange(end=h) + grid_cell_offset) * stride
+        shift_y, shift_x = paddle.meshgrid(shift_y, shift_x)
+        anchor = paddle.stack(
+            [
+                shift_x - cell_half_size, shift_y - cell_half_size,
+                shift_x + cell_half_size, shift_y + cell_half_size
+            ],
+            axis=-1).astype(feat.dtype)
+        anchors.append(anchor.reshape([-1, 4]))
+        num_anchors_list.append(len(anchors[-1]))
+        stride_tensor_list.append(
+            paddle.full([num_anchors_list[-1], 1], stride))
+    return anchors, num_anchors_list, stride_tensor_list

+ 10 - 5
paddlex/ppdet/modeling/backbones/lite_hrnet.py

@@ -11,6 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+This code is based on
+https://github.com/HRNet/Lite-HRNet/blob/hrnet/models/backbones/litehrnet.py
+"""
 
 import paddle
 import paddle.nn as nn
@@ -44,7 +48,7 @@ class ConvNormLayer(nn.Layer):
         self.act = act
         norm_lr = 0. if freeze_norm else 1.
         if norm_type is not None:
-            assert norm_type in ['bn', 'sync_bn', 'gn'],\
+            assert norm_type in ['bn', 'sync_bn', 'gn'], \
                 "norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".format(norm_type)
             param_attr = ParamAttr(
                 initializer=Constant(1.0),
@@ -271,7 +275,7 @@ class ShuffleUnit(nn.Layer):
         branch_channel = out_channel // 2
         self.stride = stride
         if self.stride == 1:
-            assert in_channel == branch_channel * 2,\
+            assert in_channel == branch_channel * 2, \
                 "when stride=1, in_channel {} should equal to branch_channel*2 {}".format(in_channel, branch_channel * 2)
         if stride > 1:
             self.branch1 = nn.Sequential(
@@ -494,8 +498,9 @@ class LiteHRNetModule(nn.Layer):
         super(LiteHRNetModule, self).__init__()
         assert num_branches == len(in_channels),\
             "num_branches {} should equal to num_in_channels {}".format(num_branches, len(in_channels))
-        assert module_type in ['LITE', 'NAIVE'],\
-            "module_type should be one of ['LITE', 'NAIVE']"
+        assert module_type in [
+            'LITE', 'NAIVE'
+        ], "module_type should be one of ['LITE', 'NAIVE']"
         self.num_branches = num_branches
         self.in_channels = in_channels
         self.multiscale_output = multiscale_output
@@ -690,7 +695,7 @@ class LiteHRNet(nn.Layer):
         super(LiteHRNet, self).__init__()
         if isinstance(return_idx, Integral):
             return_idx = [return_idx]
-        assert network_type in ["lite_18", "lite_30", "naive", "wider_naive"],\
+        assert network_type in ["lite_18", "lite_30", "naive", "wider_naive"], \
             "the network_type should be one of [lite_18, lite_30, naive, wider_naive]"
         assert len(return_idx) > 0, "need one or more return index"
         self.freeze_at = freeze_at

+ 19 - 10
paddlex/ppdet/modeling/backbones/mobilenet_v1.py

@@ -226,7 +226,8 @@ class MobileNet(nn.Layer):
                 norm_type=norm_type,
                 name="conv2_1"))
         self.dwsl.append(dws21)
-        self._update_out_channels(64, len(self.dwsl), feature_maps)
+        self._update_out_channels(
+            int(64 * scale), len(self.dwsl), feature_maps)
         dws22 = self.add_sublayer(
             "conv2_2",
             sublayer=DepthwiseSeparable(
@@ -242,7 +243,8 @@ class MobileNet(nn.Layer):
                 norm_type=norm_type,
                 name="conv2_2"))
         self.dwsl.append(dws22)
-        self._update_out_channels(128, len(self.dwsl), feature_maps)
+        self._update_out_channels(
+            int(128 * scale), len(self.dwsl), feature_maps)
         # 1/4
         dws31 = self.add_sublayer(
             "conv3_1",
@@ -259,7 +261,8 @@ class MobileNet(nn.Layer):
                 norm_type=norm_type,
                 name="conv3_1"))
         self.dwsl.append(dws31)
-        self._update_out_channels(128, len(self.dwsl), feature_maps)
+        self._update_out_channels(
+            int(128 * scale), len(self.dwsl), feature_maps)
         dws32 = self.add_sublayer(
             "conv3_2",
             sublayer=DepthwiseSeparable(
@@ -275,7 +278,8 @@ class MobileNet(nn.Layer):
                 norm_type=norm_type,
                 name="conv3_2"))
         self.dwsl.append(dws32)
-        self._update_out_channels(256, len(self.dwsl), feature_maps)
+        self._update_out_channels(
+            int(256 * scale), len(self.dwsl), feature_maps)
         # 1/8
         dws41 = self.add_sublayer(
             "conv4_1",
@@ -292,7 +296,8 @@ class MobileNet(nn.Layer):
                 norm_type=norm_type,
                 name="conv4_1"))
         self.dwsl.append(dws41)
-        self._update_out_channels(256, len(self.dwsl), feature_maps)
+        self._update_out_channels(
+            int(256 * scale), len(self.dwsl), feature_maps)
         dws42 = self.add_sublayer(
             "conv4_2",
             sublayer=DepthwiseSeparable(
@@ -308,13 +313,14 @@ class MobileNet(nn.Layer):
                 norm_type=norm_type,
                 name="conv4_2"))
         self.dwsl.append(dws42)
-        self._update_out_channels(512, len(self.dwsl), feature_maps)
+        self._update_out_channels(
+            int(512 * scale), len(self.dwsl), feature_maps)
         # 1/16
         for i in range(5):
             tmp = self.add_sublayer(
                 "conv5_" + str(i + 1),
                 sublayer=DepthwiseSeparable(
-                    in_channels=512,
+                    in_channels=int(512 * scale),
                     out_channels1=512,
                     out_channels2=512,
                     num_groups=512,
@@ -326,7 +332,8 @@ class MobileNet(nn.Layer):
                     norm_type=norm_type,
                     name="conv5_" + str(i + 1)))
             self.dwsl.append(tmp)
-            self._update_out_channels(512, len(self.dwsl), feature_maps)
+            self._update_out_channels(
+                int(512 * scale), len(self.dwsl), feature_maps)
         dws56 = self.add_sublayer(
             "conv5_6",
             sublayer=DepthwiseSeparable(
@@ -342,7 +349,8 @@ class MobileNet(nn.Layer):
                 norm_type=norm_type,
                 name="conv5_6"))
         self.dwsl.append(dws56)
-        self._update_out_channels(1024, len(self.dwsl), feature_maps)
+        self._update_out_channels(
+            int(1024 * scale), len(self.dwsl), feature_maps)
         # 1/32
         dws6 = self.add_sublayer(
             "conv6",
@@ -359,7 +367,8 @@ class MobileNet(nn.Layer):
                 norm_type=norm_type,
                 name="conv6"))
         self.dwsl.append(dws6)
-        self._update_out_channels(1024, len(self.dwsl), feature_maps)
+        self._update_out_channels(
+            int(1024 * scale), len(self.dwsl), feature_maps)
 
         if self.with_extra_blocks:
             self.extra_blocks = []

+ 24 - 2
paddlex/ppdet/modeling/bbox_utils.py

@@ -751,6 +751,28 @@ def bbox_center(boxes):
     Returns:
         Tensor: boxes centers with shape (N, 2), "cx, cy" format.
     """
-    boxes_cx = (boxes[:, 0] + boxes[:, 2]) / 2
-    boxes_cy = (boxes[:, 1] + boxes[:, 3]) / 2
+    boxes_cx = (boxes[..., 0] + boxes[..., 2]) / 2
+    boxes_cy = (boxes[..., 1] + boxes[..., 3]) / 2
     return paddle.stack([boxes_cx, boxes_cy], axis=-1)
+
+
+def batch_distance2bbox(points, distance, max_shapes=None):
+    """Decode distance prediction to bounding box for batch.
+    Args:
+        points (Tensor): [B, ..., 2]
+        distance (Tensor): [B, ..., 4]
+        max_shapes (tuple): [B, 2], "h,w" format, Shape of the image.
+    Returns:
+        Tensor: Decoded bboxes.
+    """
+    x1 = points[..., 0] - distance[..., 0]
+    y1 = points[..., 1] - distance[..., 1]
+    x2 = points[..., 0] + distance[..., 2]
+    y2 = points[..., 1] + distance[..., 3]
+    if max_shapes is not None:
+        for i, max_shape in enumerate(max_shapes):
+            x1[i] = x1[i].clip(min=0, max=max_shape[1])
+            y1[i] = y1[i].clip(min=0, max=max_shape[0])
+            x2[i] = x2[i].clip(min=0, max=max_shape[1])
+            y2[i] = y2[i].clip(min=0, max=max_shape[0])
+    return paddle.stack([x1, y1, x2, y2], -1)

+ 31 - 91
paddlex/ppdet/modeling/heads/tood_head.py

@@ -24,10 +24,11 @@ from paddle.nn.initializer import Constant
 
 from paddlex.ppdet.core.workspace import register
 from ..initializer import normal_, constant_, bias_init_with_prob
-from paddlex.ppdet.modeling.bbox_utils import bbox_center
+from paddlex.ppdet.modeling.bbox_utils import bbox_center, batch_distance2bbox
 from ..losses import GIoULoss
-from paddle.vision.ops import deform_conv2d
 from paddlex.ppdet.modeling.layers import ConvNormLayer
+from paddlex.ppdet.modeling.ops import get_static_shape
+from paddlex.ppdet.modeling.assigners.utils import generate_anchors_for_grid_cell
 
 
 class ScaleReg(nn.Layer):
@@ -84,25 +85,13 @@ class TaskDecomposition(nn.Layer):
         normal_(self.la_conv1.weight, std=0.001)
         normal_(self.la_conv2.weight, std=0.001)
 
-    def forward(self, feat, avg_feat=None):
-        b, _, h, w = feat.shape
-        if avg_feat is None:
-            avg_feat = F.adaptive_avg_pool2d(feat, (1, 1))
+    def forward(self, feat, avg_feat):
+        b, _, h, w = get_static_shape(feat)
         weight = F.relu(self.la_conv1(avg_feat))
-        weight = F.sigmoid(self.la_conv2(weight))
-
-        # here new_conv_weight = layer_attention_weight * conv_weight
-        # in order to save memory and FLOPs.
-        conv_weight = weight.reshape([b, 1, self.stacked_convs, 1]) * \
-            self.reduction_conv.conv.weight.reshape(
-            [1, self.feat_channels, self.stacked_convs, self.feat_channels])
-        conv_weight = conv_weight.reshape(
-            [b, self.feat_channels, self.in_channels])
-        feat = feat.reshape([b, self.in_channels, h * w])
-        feat = paddle.bmm(conv_weight, feat).reshape(
-            [b, self.feat_channels, h, w])
-        if self.norm_type is not None:
-            feat = self.reduction_conv.norm(feat)
+        weight = F.sigmoid(self.la_conv2(weight)).unsqueeze(-1)
+        feat = paddle.reshape(
+            feat, [b, self.stacked_convs, self.feat_channels, h, w]) * weight
+        feat = self.reduction_conv(feat.flatten(1, 2))
         feat = F.relu(feat)
         return feat
 
@@ -211,81 +200,32 @@ class TOODHead(nn.Layer):
             normal_(self.cls_prob_conv2.weight, std=0.01)
             constant_(self.cls_prob_conv2.bias, bias_cls)
             normal_(self.reg_offset_conv1.weight, std=0.001)
-            normal_(self.reg_offset_conv2.weight, std=0.001)
+            constant_(self.reg_offset_conv2.weight)
             constant_(self.reg_offset_conv2.bias)
 
-    def _generate_anchors(self, feats):
-        anchors, num_anchors_list = [], []
-        stride_tensor_list = []
-        for feat, stride in zip(feats, self.fpn_strides):
-            _, _, h, w = feat.shape
-            cell_half_size = self.grid_cell_scale * stride * 0.5
-            shift_x = (paddle.arange(end=w) + self.grid_cell_offset) * stride
-            shift_y = (paddle.arange(end=h) + self.grid_cell_offset) * stride
-            shift_y, shift_x = paddle.meshgrid(shift_y, shift_x)
-            anchor = paddle.stack(
-                [
-                    shift_x - cell_half_size, shift_y - cell_half_size,
-                    shift_x + cell_half_size, shift_y + cell_half_size
-                ],
-                axis=-1)
-            anchors.append(anchor.reshape([-1, 4]))
-            num_anchors_list.append(len(anchors[-1]))
-            stride_tensor_list.append(
-                paddle.full([num_anchors_list[-1], 1], stride))
-        return anchors, num_anchors_list, stride_tensor_list
-
-    @staticmethod
-    def _batch_distance2bbox(points, distance, max_shapes=None):
-        """Decode distance prediction to bounding box.
-        Args:
-            points (Tensor): [B, l, 2]
-            distance (Tensor): [B, l, 4]
-            max_shapes (tuple): [B, 2], "h w" format, Shape of the image.
-        Returns:
-            Tensor: Decoded bboxes.
-        """
-        x1 = points[:, :, 0] - distance[:, :, 0]
-        y1 = points[:, :, 1] - distance[:, :, 1]
-        x2 = points[:, :, 0] + distance[:, :, 2]
-        y2 = points[:, :, 1] + distance[:, :, 3]
-        bboxes = paddle.stack([x1, y1, x2, y2], -1)
-        if max_shapes is not None:
-            out_bboxes = []
-            for bbox, max_shape in zip(bboxes, max_shapes):
-                bbox[:, 0] = bbox[:, 0].clip(min=0, max=max_shape[1])
-                bbox[:, 1] = bbox[:, 1].clip(min=0, max=max_shape[0])
-                bbox[:, 2] = bbox[:, 2].clip(min=0, max=max_shape[1])
-                bbox[:, 3] = bbox[:, 3].clip(min=0, max=max_shape[0])
-                out_bboxes.append(bbox)
-            out_bboxes = paddle.stack(out_bboxes)
-            return out_bboxes
-        return bboxes
-
-    @staticmethod
-    def _deform_sampling(feat, offset):
-        """ Sampling the feature according to offset.
-        Args:
-            feat (Tensor): Feature
-            offset (Tensor): Spatial offset for for feature sampliing
-        """
-        # it is an equivalent implementation of bilinear interpolation
-        # you can also use F.grid_sample instead
-        c = feat.shape[1]
-        weight = paddle.ones([c, 1, 1, 1])
-        y = deform_conv2d(feat, offset, weight, deformable_groups=c, groups=c)
-        return y
+    def _reg_grid_sample(self, feat, offset, anchor_points):
+        b, _, h, w = get_static_shape(feat)
+        feat = paddle.reshape(feat, [-1, 1, h, w])
+        offset = paddle.reshape(offset, [-1, 2, h, w]).transpose([0, 2, 3, 1])
+        grid_shape = paddle.concat([w, h]).astype('float32')
+        grid = (offset + anchor_points) / grid_shape
+        grid = 2 * grid.clip(0., 1.) - 1
+        feat = F.grid_sample(feat, grid)
+        feat = paddle.reshape(feat, [b, -1, h, w])
+        return feat
 
     def forward(self, feats):
         assert len(feats) == len(self.fpn_strides), \
             "The size of feats is not equal to size of fpn_strides"
 
-        anchors, num_anchors_list, stride_tensor_list = self._generate_anchors(
-            feats)
+        anchors, num_anchors_list, stride_tensor_list = generate_anchors_for_grid_cell(
+            feats, self.fpn_strides, self.grid_cell_scale,
+            self.grid_cell_offset)
+
         cls_score_list, bbox_pred_list = [], []
         for feat, scale_reg, anchor, stride in zip(feats, self.scales_regs,
                                                    anchors, self.fpn_strides):
-            b, _, h, w = feat.shape
+            b, _, h, w = get_static_shape(feat)
             inter_feats = []
             for inter_conv in self.inter_convs:
                 feat = F.relu(inter_conv(feat))
@@ -309,16 +249,16 @@ class TOODHead(nn.Layer):
 
             # reg prediction and alignment
             reg_dist = scale_reg(self.tood_reg(reg_feat).exp())
-            reg_dist = reg_dist.transpose([0, 2, 3, 1]).reshape([b, -1, 4])
+            reg_dist = reg_dist.flatten(2).transpose([0, 2, 1])
             anchor_centers = bbox_center(anchor).unsqueeze(0) / stride
-            reg_bbox = self._batch_distance2bbox(
-                anchor_centers.tile([b, 1, 1]), reg_dist)
+            reg_bbox = batch_distance2bbox(anchor_centers, reg_dist)
             if self.use_align_head:
-                reg_bbox = reg_bbox.reshape([b, h, w, 4]).transpose(
-                    [0, 3, 1, 2])
                 reg_offset = F.relu(self.reg_offset_conv1(feat))
                 reg_offset = self.reg_offset_conv2(reg_offset)
-                bbox_pred = self._deform_sampling(reg_bbox, reg_offset)
+                reg_bbox = reg_bbox.transpose([0, 2, 1]).reshape([b, 4, h, w])
+                anchor_centers = anchor_centers.reshape([1, h, w, 2])
+                bbox_pred = self._reg_grid_sample(reg_bbox, reg_offset,
+                                                  anchor_centers)
                 bbox_pred = bbox_pred.flatten(2).transpose([0, 2, 1])
             else:
                 bbox_pred = reg_bbox

+ 7 - 1
paddlex/ppdet/modeling/keypoint_utils.py

@@ -11,6 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+this code is based on https://github.com/open-mmlab/mmpose
+"""
 
 import cv2
 import numpy as np
@@ -96,7 +99,10 @@ def get_affine_transform(center,
 
 
 def get_warp_matrix(theta, size_input, size_dst, size_target):
-    """Calculate the transformation matrix under the constraint of unbiased.
+    """This code is based on
+        https://github.com/open-mmlab/mmpose/blob/master/mmpose/core/post_processing/post_transforms.py
+
+        Calculate the transformation matrix under the constraint of unbiased.
     Paper ref: Huang et al. The Devil is in the Details: Delving into Unbiased
     Data Processing for Human Pose Estimation (CVPR 2020).
 

+ 0 - 2
paddlex/ppdet/modeling/losses/__init__.py

@@ -25,7 +25,6 @@ from . import fairmot_loss
 from . import gfocal_loss
 from . import detr_loss
 from . import sparsercnn_loss
-from . import varifocal_loss
 
 from .yolo_loss import *
 from .iou_aware_loss import *
@@ -40,4 +39,3 @@ from .fairmot_loss import *
 from .gfocal_loss import *
 from .detr_loss import *
 from .sparsercnn_loss import *
-from .varifocal_loss import *

+ 19 - 13
paddlex/ppdet/modeling/ops.py

@@ -125,7 +125,7 @@ def roi_pool(input,
     ..  code-block:: python
 
         import paddle
-        from ppdet.modeling import ops
+        from paddlex.ppdet.modeling import ops
         paddle.enable_static()
 
         x = paddle.static.data(
@@ -232,7 +232,7 @@ def roi_align(input,
         .. code-block:: python
 
             import paddle
-            from ppdet.modeling import ops
+            from paddlex.ppdet.modeling import ops
             paddle.enable_static()
 
             x = paddle.static.data(
@@ -329,7 +329,7 @@ def iou_similarity(x, y, box_normalized=True, name=None):
         .. code-block:: python
 
             import paddle
-            from ppdet.modeling import ops
+            from paddlex.ppdet.modeling import ops
             paddle.enable_static()
 
             x = paddle.static.data(name='x', shape=[None, 4], dtype='float32')
@@ -407,7 +407,7 @@ def collect_fpn_proposals(multi_rois,
         .. code-block:: python
 
             import paddle
-            from ppdet.modeling import ops
+            from paddlex.ppdet.modeling import ops
             paddle.enable_static()
             multi_rois = []
             multi_scores = []
@@ -529,7 +529,7 @@ def distribute_fpn_proposals(fpn_rois,
         .. code-block:: python
 
             import paddle
-            from ppdet.modeling import ops
+            from paddlex.ppdet.modeling import ops
             paddle.enable_static()
             fpn_rois = paddle.static.data(
                 name='data', shape=[None, 4], dtype='float32', lod_level=1)
@@ -677,7 +677,7 @@ def yolo_box(
     .. code-block:: python
 
         import paddle
-        from ppdet.modeling import ops
+        from paddlex.ppdet.modeling import ops
 
         paddle.enable_static()
         x = paddle.static.data(name='x', shape=[None, 255, 13, 13], dtype='float32')
@@ -793,7 +793,7 @@ def prior_box(input,
         .. code-block:: python
 
         import paddle
-        from ppdet.modeling import ops
+        from paddlex.ppdet.modeling import ops
 
         paddle.enable_static()
         input = paddle.static.data(name="input", shape=[None,3,6,9])
@@ -959,7 +959,7 @@ def multiclass_nms(bboxes,
         .. code-block:: python
 
             import paddle
-            from ppdet.modeling import ops
+            from paddlex.ppdet.modeling import ops
             boxes = paddle.static.data(name='bboxes', shape=[81, 4],
                                       dtype='float32', lod_level=1)
             scores = paddle.static.data(name='scores', shape=[81],
@@ -1095,7 +1095,7 @@ def matrix_nms(bboxes,
     Examples:
         .. code-block:: python
             import paddle
-            from ppdet.modeling import ops
+            from paddlex.ppdet.modeling import ops
             boxes = paddle.static.data(name='bboxes', shape=[None,81, 4],
                                       dtype='float32', lod_level=1)
             scores = paddle.static.data(name='scores', shape=[None,81],
@@ -1230,8 +1230,8 @@ def bipartite_match(dist_matrix,
 
         .. code-block:: python
             import paddle
-            from ppdet.modeling import ops
-            from ppdet.modeling.utils import iou_similarity
+            from paddlex.ppdet.modeling import ops
+            from paddlex.ppdet.modeling.utils import iou_similarity
 
             paddle.enable_static()
 
@@ -1347,7 +1347,7 @@ def box_coder(prior_box,
         .. code-block:: python
 
             import paddle
-            from ppdet.modeling import ops
+            from paddlex.ppdet.modeling import ops
             paddle.enable_static()
             # For encode
             prior_box_encode = paddle.static.data(name='prior_box_encode',
@@ -1494,7 +1494,7 @@ def generate_proposals(scores,
         .. code-block:: python
 
             import paddle
-            from ppdet.modeling import ops
+            from paddlex.ppdet.modeling import ops
             paddle.enable_static()
             scores = paddle.static.data(name='scores', shape=[None, 4, 5, 5], dtype='float32')
             bbox_deltas = paddle.static.data(name='bbox_deltas', shape=[None, 16, 5, 5], dtype='float32')
@@ -1603,3 +1603,9 @@ def channel_shuffle(x, groups):
     x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4])
     x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width])
     return x
+
+
+def get_static_shape(tensor):
+    shape = paddle.shape(tensor)
+    shape.stop_gradient = True
+    return shape

+ 56 - 0
paddlex/ppdet/modeling/post_process.py

@@ -658,3 +658,59 @@ class SparsePostProcess(object):
 
         bbox_pred = paddle.concat(boxes_final)
         return bbox_pred, bbox_num
+
+
+def nms(dets, thresh):
+    """Apply classic DPM-style greedy NMS."""
+    if dets.shape[0] == 0:
+        return dets[[], :]
+    scores = dets[:, 0]
+    x1 = dets[:, 1]
+    y1 = dets[:, 2]
+    x2 = dets[:, 3]
+    y2 = dets[:, 4]
+
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+
+    ndets = dets.shape[0]
+    suppressed = np.zeros((ndets), dtype=np.int)
+
+    # nominal indices
+    # _i, _j
+    # sorted indices
+    # i, j
+    # temp variables for box i's (the box currently under consideration)
+    # ix1, iy1, ix2, iy2, iarea
+
+    # variables for computing overlap with box j (lower scoring box)
+    # xx1, yy1, xx2, yy2
+    # w, h
+    # inter, ovr
+
+    for _i in range(ndets):
+        i = order[_i]
+        if suppressed[i] == 1:
+            continue
+        ix1 = x1[i]
+        iy1 = y1[i]
+        ix2 = x2[i]
+        iy2 = y2[i]
+        iarea = areas[i]
+        for _j in range(_i + 1, ndets):
+            j = order[_j]
+            if suppressed[j] == 1:
+                continue
+            xx1 = max(ix1, x1[j])
+            yy1 = max(iy1, y1[j])
+            xx2 = min(ix2, x2[j])
+            yy2 = min(iy2, y2[j])
+            w = max(0.0, xx2 - xx1 + 1)
+            h = max(0.0, yy2 - yy1 + 1)
+            inter = w * h
+            ovr = inter / (iarea + areas[j] - inter)
+            if ovr >= thresh:
+                suppressed[j] = 1
+    keep = np.where(suppressed == 0)[0]
+    dets = dets[keep, :]
+    return dets

BIN
paddlex/ppdet/modeling/tests/imgs/coco2017_val2017_000000000139.jpg


BIN
paddlex/ppdet/modeling/tests/imgs/coco2017_val2017_000000000724.jpg


+ 62 - 0
paddlex/ppdet/modeling/tests/test_mstest.py

@@ -0,0 +1,62 @@
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import unittest
+from paddlex.ppdet.core.workspace import load_config
+from paddlex.ppdet.engine import Trainer
+
+
+class TestMultiScaleInference(unittest.TestCase):
+    def setUp(self):
+        self.set_config()
+
+    def set_config(self):
+        self.mstest_cfg_file = 'configs/faster_rcnn/faster_rcnn_r34_fpn_multiscaletest_1x_coco.yml'
+
+    # test evaluation with multi scale test
+    def test_eval_mstest(self):
+        cfg = load_config(self.mstest_cfg_file)
+        trainer = Trainer(cfg, mode='eval')
+
+        cfg.weights = 'https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_fpn_1x_coco.pdparams'
+        trainer.load_weights(cfg.weights)
+
+        trainer.evaluate()
+
+    # test inference with multi scale test
+    def test_infer_mstest(self):
+        cfg = load_config(self.mstest_cfg_file)
+        trainer = Trainer(cfg, mode='test')
+
+        cfg.weights = 'https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_fpn_1x_coco.pdparams'
+        trainer.load_weights(cfg.weights)
+        tests_img_root = os.path.join(os.path.dirname(__file__), 'imgs')
+
+        # input images to predict
+        imgs = [
+            'coco2017_val2017_000000000139.jpg',
+            'coco2017_val2017_000000000724.jpg'
+        ]
+        imgs = [os.path.join(tests_img_root, img) for img in imgs]
+        trainer.predict(
+            imgs, draw_threshold=0.5, output_dir='output', save_txt=True)
+
+
+if __name__ == '__main__':
+    unittest.main()

+ 4 - 2
paddlex/ppdet/optimizer.py

@@ -56,11 +56,13 @@ class CosineDecay(object):
         max_iters = self.max_epochs * int(step_per_epoch)
 
         if boundary is not None and value is not None and self.use_warmup:
+            warmup_iters = len(boundary)
             for i in range(int(boundary[-1]), max_iters):
                 boundary.append(i)
 
-                decayed_lr = base_lr * 0.5 * (
-                    math.cos(i * math.pi / max_iters) + 1)
+                decayed_lr = base_lr * 0.5 * (math.cos(
+                    (i - warmup_iters) * math.pi /
+                    (max_iters - warmup_iters)) + 1)
                 value.append(decayed_lr)
             return optimizer.lr.PiecewiseDecay(boundary, value)