瀏覽代碼

Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleX into develop_qh

FlyingQianMM 4 年之前
父節點
當前提交
a4cac75a0d
共有 50 個文件被更改,包括 385 次插入3303 次删除
  1. 1 1
      PaddleDetection
  2. 1 1
      paddlex/cv/datasets/coco.py
  3. 4 2
      paddlex/cv/datasets/voc.py
  4. 5 1
      paddlex/cv/models/base.py
  5. 157 1
      paddlex/cv/models/detector.py
  6. 5 2
      paddlex/ppdet/data/source/category.py
  7. 5 6
      paddlex/ppdet/data/source/coco.py
  8. 17 14
      paddlex/ppdet/data/source/voc.py
  9. 0 267
      paddlex/ppdet/data/transform/atss_assigner.py
  10. 4 132
      paddlex/ppdet/data/transform/batch_operators.py
  11. 4 6
      paddlex/ppdet/engine/export_utils.py
  12. 2 2
      paddlex/ppdet/engine/tracker.py
  13. 11 14
      paddlex/ppdet/engine/trainer.py
  14. 2 39
      paddlex/ppdet/metrics/keypoint_metrics.py
  15. 0 192
      paddlex/ppdet/metrics/mot_eval_utils.py
  16. 2 2
      paddlex/ppdet/metrics/mot_metrics.py
  17. 0 4
      paddlex/ppdet/modeling/architectures/__init__.py
  18. 0 87
      paddlex/ppdet/modeling/architectures/gfl.py
  19. 4 5
      paddlex/ppdet/modeling/architectures/keypoint_hrnet.py
  20. 0 91
      paddlex/ppdet/modeling/architectures/picodet.py
  21. 0 4
      paddlex/ppdet/modeling/backbones/__init__.py
  22. 14 3
      paddlex/ppdet/modeling/backbones/blazenet.py
  23. 10 4
      paddlex/ppdet/modeling/backbones/ghostnet.py
  24. 19 7
      paddlex/ppdet/modeling/backbones/hrnet.py
  25. 0 886
      paddlex/ppdet/modeling/backbones/lite_hrnet.py
  26. 20 6
      paddlex/ppdet/modeling/backbones/mobilenet_v3.py
  27. 0 262
      paddlex/ppdet/modeling/backbones/shufflenet_v2.py
  28. 7 2
      paddlex/ppdet/modeling/backbones/vgg.py
  29. 1 45
      paddlex/ppdet/modeling/bbox_utils.py
  30. 0 4
      paddlex/ppdet/modeling/heads/__init__.py
  31. 2 1
      paddlex/ppdet/modeling/heads/centernet_head.py
  32. 5 3
      paddlex/ppdet/modeling/heads/detr_head.py
  33. 19 8
      paddlex/ppdet/modeling/heads/fcos_head.py
  34. 0 476
      paddlex/ppdet/modeling/heads/gfl_head.py
  35. 0 329
      paddlex/ppdet/modeling/heads/pico_head.py
  36. 0 2
      paddlex/ppdet/modeling/losses/__init__.py
  37. 0 214
      paddlex/ppdet/modeling/losses/gfocal_loss.py
  38. 4 5
      paddlex/ppdet/modeling/losses/keypoint_loss.py
  39. 0 2
      paddlex/ppdet/modeling/necks/__init__.py
  40. 14 3
      paddlex/ppdet/modeling/necks/blazeface_fpn.py
  41. 4 0
      paddlex/ppdet/modeling/necks/hrfpn.py
  42. 0 135
      paddlex/ppdet/modeling/necks/pan.py
  43. 0 12
      paddlex/ppdet/modeling/ops.py
  44. 3 1
      paddlex/ppdet/modeling/reid/jde_embedding_head.py
  45. 7 3
      paddlex/ppdet/modeling/reid/pyramidal_embedding.py
  46. 12 2
      paddlex/ppdet/modeling/reid/resnet.py
  47. 0 10
      paddlex/ppdet/modeling/tests/test_architectures.py
  48. 2 5
      paddlex/ppdet/utils/download.py
  49. 9 0
      paddlex_restful/restful/app.py
  50. 9 0
      static/paddlex_restful/restful/app.py

+ 1 - 1
PaddleDetection

@@ -1 +1 @@
-Subproject commit 3bdf2671f3188de3c4158c9056a46e949cf02eb8
+Subproject commit 5b949596ea7603cd79e3fc9067766bbc79a3e93d

+ 1 - 1
paddlex/cv/datasets/coco.py

@@ -196,7 +196,7 @@ class CocoDetection(VOCDetection):
             logging.error(
                 "No coco record found in %s' % (ann_file)", exit=True)
         self.pos_num = len(self.file_list)
-        if self.allow_empty:
+        if self.allow_empty and neg_file_list:
             self.file_list += self._sample_empty(neg_file_list)
         logging.info(
             "{} samples in file {}, including {} positive samples and {} negative samples.".

+ 4 - 2
paddlex/cv/datasets/voc.py

@@ -290,7 +290,7 @@ class VOCDetection(Dataset):
             logging.error(
                 "No voc record found in %s' % (file_list)", exit=True)
         self.pos_num = len(self.file_list)
-        if self.allow_empty:
+        if self.allow_empty and neg_file_list:
             self.file_list += self._sample_empty(neg_file_list)
         logging.info(
             "{} samples in file {}, including {} positive samples and {} negative samples.".
@@ -423,7 +423,9 @@ class VOCDetection(Dataset):
                 **
                 label_info
             })
-        self.file_list += self._sample_empty(neg_file_list)
+        if neg_file_list:
+            self.allow_empty = True
+            self.file_list += self._sample_empty(neg_file_list)
         logging.info(
             "{} negative samples added. Dataset contains {} positive samples and {} negative samples.".
             format(

+ 5 - 1
paddlex/cv/models/base.py

@@ -271,7 +271,11 @@ class BaseModel:
             transforms=train_dataset.transforms,
             mode='train')
 
-        nranks = paddle.distributed.get_world_size()
+        if "RCNN" in self.__class__.__name__ and train_dataset.pos_num < len(
+                train_dataset.file_list):
+            nranks = 1
+        else:
+            nranks = paddle.distributed.get_world_size()
         local_rank = paddle.distributed.get_rank()
         if nranks > 1:
             find_unused_parameters = getattr(self, 'find_unused_parameters',

+ 157 - 1
paddlex/cv/models/detector.py

@@ -18,7 +18,6 @@ import collections
 import copy
 import os
 import os.path as osp
-import six
 import numpy as np
 import paddle
 from paddle.static import InputSpec
@@ -29,6 +28,7 @@ import paddlex.utils.logging as logging
 from paddlex.cv.transforms.operators import _NormalizeBox, _PadBox, _BboxXYXY2XYWH, Resize, Padding
 from paddlex.cv.transforms.batch_operators import BatchCompose, BatchRandomResize, BatchRandomResizeByShort, _BatchPadding, _Gt2YoloTarget
 from paddlex.cv.transforms import arrange_transforms
+from paddlex.utils import get_single_card_bs
 from .base import BaseModel
 from .utils.det_metrics import VOCMetric, COCOMetric
 from .utils.ema import ExponentialMovingAverage
@@ -975,6 +975,84 @@ class FasterRCNN(BaseDetector):
         super(FasterRCNN, self).__init__(
             model_name='FasterRCNN', num_classes=num_classes, **params)
 
+    def train(self,
+              num_epochs,
+              train_dataset,
+              train_batch_size=64,
+              eval_dataset=None,
+              optimizer=None,
+              save_interval_epochs=1,
+              log_interval_steps=10,
+              save_dir='output',
+              pretrain_weights='IMAGENET',
+              learning_rate=.001,
+              warmup_steps=0,
+              warmup_start_lr=0.0,
+              lr_decay_epochs=(216, 243),
+              lr_decay_gamma=0.1,
+              metric=None,
+              use_ema=False,
+              early_stop=False,
+              early_stop_patience=5,
+              use_vdl=True,
+              resume_checkpoint=None):
+        """
+        Train the model.
+        Args:
+            num_epochs(int): The number of epochs.
+            train_dataset(paddlex.dataset): Training dataset.
+            train_batch_size(int, optional): Total batch size among all cards used in training. Defaults to 64.
+            eval_dataset(paddlex.dataset, optional):
+                Evaluation dataset. If None, the model will not be evaluated during training process. Defaults to None.
+            optimizer(paddle.optimizer.Optimizer or None, optional):
+                Optimizer used for training. If None, a default optimizer is used. Defaults to None.
+            save_interval_epochs(int, optional): Epoch interval for saving the model. Defaults to 1.
+            log_interval_steps(int, optional): Step interval for printing training information. Defaults to 10.
+            save_dir(str, optional): Directory to save the model. Defaults to 'output'.
+            pretrain_weights(str or None, optional):
+                None or name/path of pretrained weights. If None, no pretrained weights will be loaded. Defaults to 'IMAGENET'.
+            learning_rate(float, optional): Learning rate for training. Defaults to .001.
+            warmup_steps(int, optional): The number of steps of warm-up training. Defaults to 0.
+            warmup_start_lr(float, optional): Start learning rate of warm-up training. Defaults to 0..
+            lr_decay_epochs(list or tuple, optional): Epoch milestones for learning rate decay. Defaults to (216, 243).
+            lr_decay_gamma(float, optional): Gamma coefficient of learning rate decay. Defaults to .1.
+            metric({'VOC', 'COCO', None}, optional):
+                Evaluation metric. If None, determine the metric according to the dataset format. Defaults to None.
+            use_ema(bool, optional): Whether to use exponential moving average strategy. Defaults to False.
+            early_stop(bool, optional): Whether to adopt early stop strategy. Defaults to False.
+            early_stop_patience(int, optional): Early stop patience. Defaults to 5.
+            use_vdl(bool, optional): Whether to use VisualDL to monitor the training process. Defaults to True.
+            resume_checkpoint(str or None, optional): The path of the checkpoint to resume training from.
+                If None, no training checkpoint will be resumed. At most one of `resume_checkpoint` and
+                `pretrain_weights` can be set simultaneously. Defaults to None.
+        """
+        if train_dataset.pos_num < len(train_dataset.file_list):
+            train_dataset.num_workers = 0
+            if train_batch_size != 1:
+                train_batch_size = 1
+                logging.warning(
+                    "Training RCNN models with negative samples only support batch size equals to 1 "
+                    "on a single gpu/cpu card, `train_batch_size` is forcibly set to 1."
+                )
+            nranks = paddle.distributed.get_world_size()
+            local_rank = paddle.distributed.get_rank()
+            # single card training
+            if nranks < 2 or local_rank == 0:
+                super(FasterRCNN, self).train(
+                    num_epochs, train_dataset, train_batch_size, eval_dataset,
+                    optimizer, save_interval_epochs, log_interval_steps,
+                    save_dir, pretrain_weights, learning_rate, warmup_steps,
+                    warmup_start_lr, lr_decay_epochs, lr_decay_gamma, metric,
+                    use_ema, early_stop, early_stop_patience, use_vdl,
+                    resume_checkpoint)
+        else:
+            super(FasterRCNN, self).train(
+                num_epochs, train_dataset, train_batch_size, eval_dataset,
+                optimizer, save_interval_epochs, log_interval_steps, save_dir,
+                pretrain_weights, learning_rate, warmup_steps, warmup_start_lr,
+                lr_decay_epochs, lr_decay_gamma, metric, use_ema, early_stop,
+                early_stop_patience, use_vdl, resume_checkpoint)
+
     def _compose_batch_transform(self, transforms, mode='train'):
         if mode == 'train':
             default_batch_transforms = [
@@ -1755,6 +1833,84 @@ class MaskRCNN(BaseDetector):
         super(MaskRCNN, self).__init__(
             model_name='MaskRCNN', num_classes=num_classes, **params)
 
+    def train(self,
+              num_epochs,
+              train_dataset,
+              train_batch_size=64,
+              eval_dataset=None,
+              optimizer=None,
+              save_interval_epochs=1,
+              log_interval_steps=10,
+              save_dir='output',
+              pretrain_weights='IMAGENET',
+              learning_rate=.001,
+              warmup_steps=0,
+              warmup_start_lr=0.0,
+              lr_decay_epochs=(216, 243),
+              lr_decay_gamma=0.1,
+              metric=None,
+              use_ema=False,
+              early_stop=False,
+              early_stop_patience=5,
+              use_vdl=True,
+              resume_checkpoint=None):
+        """
+        Train the model.
+        Args:
+            num_epochs(int): The number of epochs.
+            train_dataset(paddlex.dataset): Training dataset.
+            train_batch_size(int, optional): Total batch size among all cards used in training. Defaults to 64.
+            eval_dataset(paddlex.dataset, optional):
+                Evaluation dataset. If None, the model will not be evaluated during training process. Defaults to None.
+            optimizer(paddle.optimizer.Optimizer or None, optional):
+                Optimizer used for training. If None, a default optimizer is used. Defaults to None.
+            save_interval_epochs(int, optional): Epoch interval for saving the model. Defaults to 1.
+            log_interval_steps(int, optional): Step interval for printing training information. Defaults to 10.
+            save_dir(str, optional): Directory to save the model. Defaults to 'output'.
+            pretrain_weights(str or None, optional):
+                None or name/path of pretrained weights. If None, no pretrained weights will be loaded. Defaults to 'IMAGENET'.
+            learning_rate(float, optional): Learning rate for training. Defaults to .001.
+            warmup_steps(int, optional): The number of steps of warm-up training. Defaults to 0.
+            warmup_start_lr(float, optional): Start learning rate of warm-up training. Defaults to 0..
+            lr_decay_epochs(list or tuple, optional): Epoch milestones for learning rate decay. Defaults to (216, 243).
+            lr_decay_gamma(float, optional): Gamma coefficient of learning rate decay. Defaults to .1.
+            metric({'VOC', 'COCO', None}, optional):
+                Evaluation metric. If None, determine the metric according to the dataset format. Defaults to None.
+            use_ema(bool, optional): Whether to use exponential moving average strategy. Defaults to False.
+            early_stop(bool, optional): Whether to adopt early stop strategy. Defaults to False.
+            early_stop_patience(int, optional): Early stop patience. Defaults to 5.
+            use_vdl(bool, optional): Whether to use VisualDL to monitor the training process. Defaults to True.
+            resume_checkpoint(str or None, optional): The path of the checkpoint to resume training from.
+                If None, no training checkpoint will be resumed. At most one of `resume_checkpoint` and
+                `pretrain_weights` can be set simultaneously. Defaults to None.
+        """
+        if train_dataset.pos_num < len(train_dataset.file_list):
+            train_dataset.num_workers = 0
+            if train_batch_size != 1:
+                train_batch_size = 1
+                logging.warning(
+                    "Training RCNN models with negative samples only support batch size equals to 1 "
+                    "on a single gpu/cpu card, `train_batch_size` is forcibly set to 1."
+                )
+            nranks = paddle.distributed.get_world_size()
+            local_rank = paddle.distributed.get_rank()
+            # single card training
+            if nranks < 2 or local_rank == 0:
+                super(MaskRCNN, self).train(
+                    num_epochs, train_dataset, train_batch_size, eval_dataset,
+                    optimizer, save_interval_epochs, log_interval_steps,
+                    save_dir, pretrain_weights, learning_rate, warmup_steps,
+                    warmup_start_lr, lr_decay_epochs, lr_decay_gamma, metric,
+                    use_ema, early_stop, early_stop_patience, use_vdl,
+                    resume_checkpoint)
+        else:
+            super(MaskRCNN, self).train(
+                num_epochs, train_dataset, train_batch_size, eval_dataset,
+                optimizer, save_interval_epochs, log_interval_steps, save_dir,
+                pretrain_weights, learning_rate, warmup_steps, warmup_start_lr,
+                lr_decay_epochs, lr_decay_gamma, metric, use_ema, early_stop,
+                early_stop_patience, use_vdl, resume_checkpoint)
+
     def _compose_batch_transform(self, transforms, mode='train'):
         if mode == 'train':
             default_batch_transforms = [

+ 5 - 2
paddlex/ppdet/data/source/category.py

@@ -90,16 +90,19 @@ def get_categories(metric_type, anno_file=None, arch=None):
     elif metric_type.lower() in ['mot', 'motdet', 'reid']:
         return _mot_category()
 
+    elif metric_type.lower() in ['kitti', 'bdd100k']:
+        return _mot_category(category='car')
+
     else:
         raise ValueError("unknown metric type {}".format(metric_type))
 
 
-def _mot_category():
+def _mot_category(category='person'):
     """
     Get class id to category id map and category id
     to category name map of mot dataset
     """
-    label_map = {'person': 0}
+    label_map = {category: 0}
     label_map = sorted(label_map.items(), key=lambda x: x[1])
     cats = [l[0] for l in label_map]
 

+ 5 - 6
paddlex/ppdet/data/source/coco.py

@@ -181,7 +181,7 @@ class COCODataSet(DetDataset):
                 gt_poly = [None] * num_bbox
 
                 has_segmentation = False
-                for i, box in enumerate(bboxes):
+                for i, box in reversed(list(enumerate(bboxes))):
                     catid = box['category_id']
                     gt_class[i][0] = self.catid2clsid[catid]
                     gt_bbox[i, :] = box['clean_bbox']
@@ -195,11 +195,10 @@ class COCODataSet(DetDataset):
                     elif 'segmentation' in box and box['segmentation']:
                         if not np.array(box['segmentation']
                                         ).size > 0 and not self.allow_empty:
-                            bboxes.pop(i)
                             gt_poly.pop(i)
-                            np.delete(is_crowd, i)
-                            np.delete(gt_class, i)
-                            np.delete(gt_bbox, i)
+                            is_crowd = np.delete(is_crowd, i)
+                            gt_class = np.delete(gt_class, i)
+                            gt_bbox = np.delete(gt_bbox, i)
                         else:
                             gt_poly[i] = box['segmentation']
                         has_segmentation = True
@@ -245,7 +244,7 @@ class COCODataSet(DetDataset):
                 break
         assert ct > 0, 'not found any coco record in %s' % (anno_path)
         logger.debug('{} samples in file {}'.format(ct, anno_path))
-        if len(empty_records) > 0:
+        if self.allow_empty and len(empty_records) > 0:
             empty_records = self._sample_empty(empty_records, len(records))
             records += empty_records
         self.roidbs = records

+ 17 - 14
paddlex/ppdet/data/source/voc.py

@@ -131,11 +131,13 @@ class VOCDataSet(DetDataset):
                         'Illegal width: {} or height: {} in annotation, '
                         'and {} will be ignored'.format(im_w, im_h, xml_file))
                     continue
-                gt_bbox = []
-                gt_class = []
-                gt_score = []
-                difficult = []
-                for i, obj in enumerate(objs):
+
+                num_bbox, i = len(objs), 0
+                gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
+                gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
+                gt_score = np.zeros((num_bbox, 1), dtype=np.float32)
+                difficult = np.zeros((num_bbox, 1), dtype=np.int32)
+                for obj in objs:
                     cname = obj.find('name').text
 
                     # user dataset may not contain difficult field
@@ -152,19 +154,20 @@ class VOCDataSet(DetDataset):
                     x2 = min(im_w - 1, x2)
                     y2 = min(im_h - 1, y2)
                     if x2 > x1 and y2 > y1:
-                        gt_bbox.append([x1, y1, x2, y2])
-                        gt_class.append([cname2cid[cname]])
-                        gt_score.append([1.])
-                        difficult.append([_difficult])
+                        gt_bbox[i, :] = [x1, y1, x2, y2]
+                        gt_class[i, 0] = cname2cid[cname]
+                        gt_score[i, 0] = 1.
+                        difficult[i, 0] = _difficult
+                        i += 1
                     else:
                         logger.warning(
                             'Found an invalid bbox in annotations: xml_file: {}'
                             ', x1: {}, y1: {}, x2: {}, y2: {}.'.format(
                                 xml_file, x1, y1, x2, y2))
-                gt_bbox = np.array(gt_bbox).astype('float32')
-                gt_class = np.array(gt_class).astype('int32')
-                gt_score = np.array(gt_score).astype('float32')
-                difficult = np.array(difficult).astype('int32')
+                gt_bbox = gt_bbox[:i, :]
+                gt_class = gt_class[:i, :]
+                gt_score = gt_score[:i, :]
+                difficult = difficult[:i, :]
 
                 voc_rec = {
                     'im_file': img_file,
@@ -193,7 +196,7 @@ class VOCDataSet(DetDataset):
                     break
         assert ct > 0, 'not found any voc record in %s' % (self.anno_path)
         logger.debug('{} samples in file {}'.format(ct, anno_path))
-        if len(empty_records) > 0:
+        if self.allow_empty and len(empty_records) > 0:
             empty_records = self._sample_empty(empty_records, len(records))
             records += empty_records
         self.roidbs, self.cname2cid = records, cname2cid

+ 0 - 267
paddlex/ppdet/data/transform/atss_assigner.py

@@ -1,267 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-from paddlex.ppdet.utils.logger import setup_logger
-logger = setup_logger(__name__)
-
-
-def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6):
-    """Calculate overlap between two set of bboxes.
-    If ``is_aligned `` is ``False``, then calculate the overlaps between each
-    bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned
-    pair of bboxes1 and bboxes2.
-    Args:
-        bboxes1 (Tensor): shape (B, m, 4) in <x1, y1, x2, y2> format or empty.
-        bboxes2 (Tensor): shape (B, n, 4) in <x1, y1, x2, y2> format or empty.
-            B indicates the batch dim, in shape (B1, B2, ..., Bn).
-            If ``is_aligned `` is ``True``, then m and n must be equal.
-        mode (str): "iou" (intersection over union) or "iof" (intersection over
-            foreground).
-        is_aligned (bool, optional): If True, then m and n must be equal.
-            Default False.
-        eps (float, optional): A value added to the denominator for numerical
-            stability. Default 1e-6.
-    Returns:
-        Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,)
-    """
-    assert mode in ['iou', 'iof', 'giou'], 'Unsupported mode {}'.format(mode)
-    # Either the boxes are empty or the length of boxes's last dimenstion is 4
-    assert (bboxes1.shape[-1] == 4 or bboxes1.shape[0] == 0)
-    assert (bboxes2.shape[-1] == 4 or bboxes2.shape[0] == 0)
-
-    # Batch dim must be the same
-    # Batch dim: (B1, B2, ... Bn)
-    assert bboxes1.shape[:-2] == bboxes2.shape[:-2]
-    batch_shape = bboxes1.shape[:-2]
-
-    rows = bboxes1.shape[-2] if bboxes1.shape[0] > 0 else 0
-    cols = bboxes2.shape[-2] if bboxes2.shape[0] > 0 else 0
-    if is_aligned:
-        assert rows == cols
-
-    if rows * cols == 0:
-        if is_aligned:
-            return np.random.random(batch_shape + (rows, ))
-        else:
-            return np.random.random(batch_shape + (rows, cols))
-
-    area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (
-        bboxes1[..., 3] - bboxes1[..., 1])
-    area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (
-        bboxes2[..., 3] - bboxes2[..., 1])
-
-    if is_aligned:
-        lt = np.maximum(bboxes1[..., :2], bboxes2[..., :2])  # [B, rows, 2]
-        rb = np.minimum(bboxes1[..., 2:], bboxes2[..., 2:])  # [B, rows, 2]
-
-        wh = (rb - lt).clip(min=0)  # [B, rows, 2]
-        overlap = wh[..., 0] * wh[..., 1]
-
-        if mode in ['iou', 'giou']:
-            union = area1 + area2 - overlap
-        else:
-            union = area1
-        if mode == 'giou':
-            enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
-            enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
-    else:
-        lt = np.maximum(bboxes1[..., :, None, :2],
-                        bboxes2[..., None, :, :2])  # [B, rows, cols, 2]
-        rb = np.minimum(bboxes1[..., :, None, 2:],
-                        bboxes2[..., None, :, 2:])  # [B, rows, cols, 2]
-
-        wh = (rb - lt).clip(min=0)  # [B, rows, cols, 2]
-        overlap = wh[..., 0] * wh[..., 1]
-
-        if mode in ['iou', 'giou']:
-            union = area1[..., None] + area2[..., None, :] - overlap
-        else:
-            union = area1[..., None]
-        if mode == 'giou':
-            enclosed_lt = np.minimum(bboxes1[..., :, None, :2],
-                                     bboxes2[..., None, :, :2])
-            enclosed_rb = np.maximum(bboxes1[..., :, None, 2:],
-                                     bboxes2[..., None, :, 2:])
-
-    eps = np.array([eps])
-    union = np.maximum(union, eps)
-    ious = overlap / union
-    if mode in ['iou', 'iof']:
-        return ious
-    # calculate gious
-    enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
-    enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]
-    enclose_area = np.maximum(enclose_area, eps)
-    gious = ious - (enclose_area - union) / enclose_area
-    return gious
-
-
-def topk_(input, k, axis=1, largest=True):
-    x = -input if largest else input
-    if axis == 0:
-        row_index = np.arange(input.shape[1 - axis])
-        topk_index = np.argpartition(x, k, axis=axis)[0:k, :]
-        topk_data = x[topk_index, row_index]
-
-        topk_index_sort = np.argsort(topk_data, axis=axis)
-        topk_data_sort = topk_data[topk_index_sort, row_index]
-        topk_index_sort = topk_index[0:k, :][topk_index_sort, row_index]
-    else:
-        column_index = np.arange(x.shape[1 - axis])[:, None]
-        topk_index = np.argpartition(x, k, axis=axis)[:, 0:k]
-        topk_data = x[column_index, topk_index]
-        topk_data = -topk_data if largest else topk_data
-        topk_index_sort = np.argsort(topk_data, axis=axis)
-        topk_data_sort = topk_data[column_index, topk_index_sort]
-        topk_index_sort = topk_index[:, 0:k][column_index, topk_index_sort]
-
-    return topk_data_sort, topk_index_sort
-
-
-class ATSSAssigner(object):
-    """Assign a corresponding gt bbox or background to each bbox.
-
-    Each proposals will be assigned with `0` or a positive integer
-    indicating the ground truth index.
-
-    - 0: negative sample, no assigned gt
-    - positive integer: positive sample, index (1-based) of assigned gt
-
-    Args:
-        topk (float): number of bbox selected in each level
-    """
-
-    def __init__(self, topk=9):
-        self.topk = topk
-
-    def __call__(self,
-                 bboxes,
-                 num_level_bboxes,
-                 gt_bboxes,
-                 gt_bboxes_ignore=None,
-                 gt_labels=None):
-        """Assign gt to bboxes.
-        The assignment is done in following steps
-        1. compute iou between all bbox (bbox of all pyramid levels) and gt
-        2. compute center distance between all bbox and gt
-        3. on each pyramid level, for each gt, select k bbox whose center
-           are closest to the gt center, so we total select k*l bbox as
-           candidates for each gt
-        4. get corresponding iou for the these candidates, and compute the
-           mean and std, set mean + std as the iou threshold
-        5. select these candidates whose iou are greater than or equal to
-           the threshold as postive
-        6. limit the positive sample's center in gt
-        Args:
-            bboxes (np.array): Bounding boxes to be assigned, shape(n, 4).
-            num_level_bboxes (List): num of bboxes in each level
-            gt_bboxes (np.array): Groundtruth boxes, shape (k, 4).
-            gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are
-                labelled as `ignored`, e.g., crowd boxes in COCO.
-            gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ).
-        """
-        bboxes = bboxes[:, :4]
-        num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0]
-        # compute iou between all bbox and gt
-        overlaps = bbox_overlaps(bboxes, gt_bboxes)
-
-        # assign 0 by default
-        assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64)
-
-        if num_gt == 0 or num_bboxes == 0:
-            # No ground truth or boxes, return empty assignment
-            max_overlaps = np.zeros((num_bboxes, ))
-            if num_gt == 0:
-                # No truth, assign everything to background
-                assigned_gt_inds[:] = 0
-            if not np.any(gt_labels):
-                assigned_labels = None
-            else:
-                assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64)
-            return assigned_gt_inds, max_overlaps, assigned_labels
-
-        # compute center distance between all bbox and gt
-        gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
-        gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
-        gt_points = np.stack((gt_cx, gt_cy), axis=1)
-
-        bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
-        bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
-        bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1)
-
-        distances = np.sqrt(
-            np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2)
-            .sum(-1))
-
-        # Selecting candidates based on the center distance
-        candidate_idxs = []
-        start_idx = 0
-        for bboxes_per_level in num_level_bboxes:
-            # on each pyramid level, for each gt,
-            # select k bbox whose center are closest to the gt center
-            end_idx = start_idx + bboxes_per_level
-            distances_per_level = distances[start_idx:end_idx, :]
-            selectable_k = min(self.topk, bboxes_per_level)
-            _, topk_idxs_per_level = topk_(
-                distances_per_level, selectable_k, axis=0, largest=False)
-            candidate_idxs.append(topk_idxs_per_level + start_idx)
-            start_idx = end_idx
-        candidate_idxs = np.concatenate(candidate_idxs, axis=0)
-
-        # get corresponding iou for the these candidates, and compute the
-        # mean and std, set mean + std as the iou threshold
-        candidate_overlaps = overlaps[candidate_idxs, np.arange(num_gt)]
-        overlaps_mean_per_gt = candidate_overlaps.mean(0)
-        overlaps_std_per_gt = candidate_overlaps.std(0)
-        overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
-
-        is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :]
-
-        # limit the positive sample's center in gt
-        for gt_idx in range(num_gt):
-            candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
-        ep_bboxes_cx = np.broadcast_to(
-            bboxes_cx.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
-        ep_bboxes_cy = np.broadcast_to(
-            bboxes_cy.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
-        candidate_idxs = candidate_idxs.reshape(-1)
-
-        # calculate the left, top, right, bottom distance between positive
-        # bbox center and gt side
-        l_ = ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 0]
-        t_ = ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 1]
-        r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt)
-        b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt)
-        is_in_gts = np.stack([l_, t_, r_, b_], axis=1).min(axis=1) > 0.01
-        is_pos = is_pos & is_in_gts
-
-        # if an anchor box is assigned to multiple gts,
-        # the one with the highest IoU will be selected.
-        overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
-        index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)]
-        overlaps_inf[index] = overlaps.T.reshape(-1)[index]
-        overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
-
-        max_overlaps = overlaps_inf.max(axis=1)
-        argmax_overlaps = overlaps_inf.argmax(axis=1)
-        assigned_gt_inds[max_overlaps !=
-                         -np.inf] = argmax_overlaps[max_overlaps !=
-                                                    -np.inf] + 1
-
-        return assigned_gt_inds, max_overlaps

+ 4 - 132
paddlex/ppdet/data/transform/batch_operators.py

@@ -22,11 +22,9 @@ except Exception:
     from collections import Sequence
 
 import cv2
-import math
 import numpy as np
 from .operators import register_op, BaseOperator, Resize
 from .op_helper import jaccard_overlap, gaussian2D
-from .atss_assigner import ATSSAssigner
 from scipy import ndimage
 
 from paddlex.ppdet.modeling import bbox_utils
@@ -35,8 +33,7 @@ logger = setup_logger(__name__)
 
 __all__ = [
     'PadBatch', 'BatchRandomResize', 'Gt2YoloTarget', 'Gt2FCOSTarget',
-    'Gt2TTFTarget', 'Gt2Solov2Target', 'Gt2SparseRCNNTarget', 'PadMaskBatch',
-    'Gt2GFLTarget'
+    'Gt2TTFTarget', 'Gt2Solov2Target', 'Gt2SparseRCNNTarget', 'PadMaskBatch'
 ]
 
 
@@ -181,6 +178,8 @@ class Gt2YoloTarget(BaseOperator):
         h, w = samples[0]['image'].shape[1:3]
         an_hw = np.array(self.anchors) / np.array([[w, h]])
         for sample in samples:
+            # im, gt_bbox, gt_class, gt_score = sample
+            im = sample['image']
             gt_bbox = sample['gt_bbox']
             gt_class = sample['gt_class']
             if 'gt_score' not in sample:
@@ -371,6 +370,7 @@ class Gt2FCOSTarget(BaseOperator):
             "object_sizes_of_interest', and 'downsample_ratios' should have same length."
 
         for sample in samples:
+            # im, gt_bbox, gt_class, gt_score = sample
             im = sample['image']
             bboxes = sample['gt_bbox']
             gt_class = sample['gt_class']
@@ -472,134 +472,6 @@ class Gt2FCOSTarget(BaseOperator):
 
 
 @register_op
-class Gt2GFLTarget(BaseOperator):
-    """
-    Generate GFocal loss targets by groud truth data
-    """
-
-    def __init__(self,
-                 num_classes=80,
-                 downsample_ratios=[8, 16, 32, 64, 128],
-                 grid_cell_scale=4,
-                 cell_offset=0):
-        super(Gt2GFLTarget, self).__init__()
-        self.num_classes = num_classes
-        self.downsample_ratios = downsample_ratios
-        self.grid_cell_scale = grid_cell_scale
-        self.cell_offset = cell_offset
-
-        self.assigner = ATSSAssigner()
-
-    def get_grid_cells(self, featmap_size, scale, stride, offset=0):
-        """
-        Generate grid cells of a feature map for target assignment.
-        Args:
-            featmap_size: Size of a single level feature map.
-            scale: Grid cell scale.
-            stride: Down sample stride of the feature map.
-            offset: Offset of grid cells.
-        return:
-            Grid_cells xyxy position. Size should be [feat_w * feat_h, 4]
-        """
-        cell_size = stride * scale
-        h, w = featmap_size
-        x_range = (np.arange(w, dtype=np.float32) + offset) * stride
-        y_range = (np.arange(h, dtype=np.float32) + offset) * stride
-        x, y = np.meshgrid(x_range, y_range)
-        y = y.flatten()
-        x = x.flatten()
-        grid_cells = np.stack(
-            [
-                x - 0.5 * cell_size, y - 0.5 * cell_size, x + 0.5 * cell_size,
-                y + 0.5 * cell_size
-            ],
-            axis=-1)
-        return grid_cells
-
-    def get_sample(self, assign_gt_inds, gt_bboxes):
-        pos_inds = np.unique(np.nonzero(assign_gt_inds > 0)[0])
-        neg_inds = np.unique(np.nonzero(assign_gt_inds == 0)[0])
-        pos_assigned_gt_inds = assign_gt_inds[pos_inds] - 1
-
-        if gt_bboxes.size == 0:
-            # hack for index error case
-            assert pos_assigned_gt_inds.size == 0
-            pos_gt_bboxes = np.empty_like(gt_bboxes).reshape(-1, 4)
-        else:
-            if len(gt_bboxes.shape) < 2:
-                gt_bboxes = gt_bboxes.resize(-1, 4)
-            pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
-        return pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds
-
-    def __call__(self, samples, context=None):
-        assert len(samples) > 0
-        batch_size = len(samples)
-        # get grid cells of image
-        h, w = samples[0]['image'].shape[1:3]
-        multi_level_grid_cells = []
-        for stride in self.downsample_ratios:
-            featmap_size = (int(math.ceil(h / stride)),
-                            int(math.ceil(w / stride)))
-            multi_level_grid_cells.append(
-                self.get_grid_cells(featmap_size, self.grid_cell_scale, stride,
-                                    self.cell_offset))
-        mlvl_grid_cells_list = [
-            multi_level_grid_cells for i in range(batch_size)
-        ]
-        # pixel cell number of multi-level feature maps
-        num_level_cells = [
-            grid_cells.shape[0] for grid_cells in mlvl_grid_cells_list[0]
-        ]
-        num_level_cells_list = [num_level_cells] * batch_size
-        # concat all level cells and to a single array
-        for i in range(batch_size):
-            mlvl_grid_cells_list[i] = np.concatenate(mlvl_grid_cells_list[i])
-        # target assign on all images
-        for sample, grid_cells, num_level_cells in zip(
-                samples, mlvl_grid_cells_list, num_level_cells_list):
-            gt_bboxes = sample['gt_bbox']
-            gt_labels = sample['gt_class'].squeeze()
-            if gt_labels.size == 1:
-                gt_labels = np.array([gt_labels]).astype(np.int32)
-            gt_bboxes_ignore = None
-            assign_gt_inds, _ = self.assigner(grid_cells, num_level_cells,
-                                              gt_bboxes, gt_bboxes_ignore,
-                                              gt_labels)
-            pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds = self.get_sample(
-                assign_gt_inds, gt_bboxes)
-
-            num_cells = grid_cells.shape[0]
-            bbox_targets = np.zeros_like(grid_cells)
-            bbox_weights = np.zeros_like(grid_cells)
-            labels = np.ones([num_cells], dtype=np.int64) * self.num_classes
-            label_weights = np.zeros([num_cells], dtype=np.float32)
-
-            if len(pos_inds) > 0:
-                pos_bbox_targets = pos_gt_bboxes
-                bbox_targets[pos_inds, :] = pos_bbox_targets
-                bbox_weights[pos_inds, :] = 1.0
-                if not np.any(gt_labels):
-                    labels[pos_inds] = 0
-                else:
-                    labels[pos_inds] = gt_labels[pos_assigned_gt_inds]
-
-                label_weights[pos_inds] = 1.0
-            if len(neg_inds) > 0:
-                label_weights[neg_inds] = 1.0
-            sample['grid_cells'] = grid_cells
-            sample['labels'] = labels
-            sample['label_weights'] = label_weights
-            sample['bbox_targets'] = bbox_targets
-            sample['pos_num'] = max(pos_inds.size, 1)
-            sample.pop('is_crowd', None)
-            sample.pop('difficult', None)
-            sample.pop('gt_class', None)
-            sample.pop('gt_bbox', None)
-            sample.pop('gt_score', None)
-        return samples
-
-
-@register_op
 class Gt2TTFTarget(BaseOperator):
     __shared__ = ['num_classes']
     """

+ 4 - 6
paddlex/ppdet/engine/export_utils.py

@@ -42,8 +42,6 @@ TRT_MIN_SUBGRAPH = {
     'DeepSORT': 3,
     'JDE': 10,
     'FairMOT': 5,
-    'GFL': 16,
-    'PicoDet': 3,
 }
 
 KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet']
@@ -118,11 +116,11 @@ def _dump_infer_config(config, path, image_shape, model):
             break
     if not arch_state:
         logger.error(
-            'Architecture: {} is not supported for exporting model now.\n'.
-            format(infer_arch) +
-            'Please set TRT_MIN_SUBGRAPH in ppdet/engine/export_utils.py')
+            'Architecture: {} is not supported for exporting model now'.format(
+                infer_arch))
         os._exit(0)
-    if 'Mask' in infer_arch:
+    if 'mask_head' in config[config['architecture']] and config[config[
+            'architecture']]['mask_head']:
         infer_cfg['mask'] = True
     label_arch = 'detection_arch'
     if infer_arch in KEYPOINT_ARCH:

+ 2 - 2
paddlex/ppdet/engine/tracker.py

@@ -333,7 +333,7 @@ class Tracker(object):
             if save_videos:
                 output_video_path = os.path.join(save_dir, '..',
                                                  '{}_vis.mp4'.format(seq))
-                cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg -vf "scale=trunc(iw/2)*2:trunc(ih/2)*2" {}'.format(
+                cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
                     save_dir, output_video_path)
                 os.system(cmd_str)
                 logger.info('Save video in {}.'.format(output_video_path))
@@ -451,7 +451,7 @@ class Tracker(object):
         if save_videos:
             output_video_path = os.path.join(save_dir, '..',
                                              '{}_vis.mp4'.format(seq))
-            cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg -vf "scale=trunc(iw/2)*2:trunc(ih/2)*2" {}'.format(
+            cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
                 save_dir, output_video_path)
             os.system(cmd_str)
             logger.info('Save video in {}'.format(output_video_path))

+ 11 - 14
paddlex/ppdet/engine/trainer.py

@@ -228,27 +228,19 @@ class Trainer(object):
             eval_dataset = self.cfg['EvalDataset']
             eval_dataset.check_or_download_dataset()
             anno_file = eval_dataset.get_anno()
-            save_prediction_only = self.cfg.get('save_prediction_only', False)
             self._metrics = [
-                KeyPointTopDownCOCOEval(
-                    anno_file,
-                    len(eval_dataset),
-                    self.cfg.num_joints,
-                    self.cfg.save_dir,
-                    save_prediction_only=save_prediction_only)
+                KeyPointTopDownCOCOEval(anno_file,
+                                        len(eval_dataset), self.cfg.num_joints,
+                                        self.cfg.save_dir)
             ]
         elif self.cfg.metric == 'KeyPointTopDownMPIIEval':
             eval_dataset = self.cfg['EvalDataset']
             eval_dataset.check_or_download_dataset()
             anno_file = eval_dataset.get_anno()
-            save_prediction_only = self.cfg.get('save_prediction_only', False)
             self._metrics = [
-                KeyPointTopDownMPIIEval(
-                    anno_file,
-                    len(eval_dataset),
-                    self.cfg.num_joints,
-                    self.cfg.save_dir,
-                    save_prediction_only=save_prediction_only)
+                KeyPointTopDownMPIIEval(anno_file,
+                                        len(eval_dataset), self.cfg.num_joints,
+                                        self.cfg.save_dir)
             ]
         elif self.cfg.metric == 'MOTDet':
             self._metrics = [JDEDetMetric(), ]
@@ -303,6 +295,11 @@ class Trainer(object):
         assert self.mode == 'train', "Model not in 'train' mode"
         Init_mark = False
 
+        # if validation in training is enabled, metrics should be re-init
+        if validate:
+            self._init_metrics(validate=validate)
+            self._reset_metrics()
+
         model = self.model
         if self.cfg.get('fleet', False):
             model = fleet.distributed_model(model)

+ 2 - 39
paddlex/ppdet/metrics/keypoint_metrics.py

@@ -20,8 +20,6 @@ from pycocotools.coco import COCO
 from pycocotools.cocoeval import COCOeval
 from ..modeling.keypoint_utils import oks_nms
 from scipy.io import loadmat, savemat
-from paddlex.ppdet.utils.logger import setup_logger
-logger = setup_logger(__name__)
 
 __all__ = ['KeyPointTopDownCOCOEval', 'KeyPointTopDownMPIIEval']
 
@@ -40,8 +38,7 @@ class KeyPointTopDownCOCOEval(object):
                  output_eval,
                  iou_type='keypoints',
                  in_vis_thre=0.2,
-                 oks_thre=0.9,
-                 save_prediction_only=False):
+                 oks_thre=0.9):
         super(KeyPointTopDownCOCOEval, self).__init__()
         self.coco = COCO(anno_file)
         self.num_samples = num_samples
@@ -51,7 +48,6 @@ class KeyPointTopDownCOCOEval(object):
         self.oks_thre = oks_thre
         self.output_eval = output_eval
         self.res_file = os.path.join(output_eval, "keypoints_results.json")
-        self.save_prediction_only = save_prediction_only
         self.reset()
 
     def reset(self):
@@ -94,7 +90,6 @@ class KeyPointTopDownCOCOEval(object):
             os.makedirs(self.output_eval)
         with open(self.res_file, 'w') as f:
             json.dump(results, f, sort_keys=True, indent=4)
-            logger.info(f'The keypoint result is saved to {self.res_file}.')
         try:
             json.load(open(self.res_file))
         except Exception:
@@ -183,10 +178,6 @@ class KeyPointTopDownCOCOEval(object):
         self.get_final_results(self.results['all_preds'],
                                self.results['all_boxes'],
                                self.results['image_path'])
-        if self.save_prediction_only:
-            logger.info(f'The keypoint result is saved to {self.res_file} '
-                        'and do not evaluate the mAP.')
-            return
         coco_dt = self.coco.loadRes(self.res_file)
         coco_eval = COCOeval(self.coco, coco_dt, 'keypoints')
         coco_eval.params.useSegm = None
@@ -200,8 +191,6 @@ class KeyPointTopDownCOCOEval(object):
         self.eval_results['keypoint'] = keypoint_stats
 
     def log(self):
-        if self.save_prediction_only:
-            return
         stats_names = [
             'AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
             'AR .75', 'AR (M)', 'AR (L)'
@@ -224,12 +213,9 @@ class KeyPointTopDownMPIIEval(object):
                  num_samples,
                  num_joints,
                  output_eval,
-                 oks_thre=0.9,
-                 save_prediction_only=False):
+                 oks_thre=0.9):
         super(KeyPointTopDownMPIIEval, self).__init__()
         self.ann_file = anno_file
-        self.res_file = os.path.join(output_eval, "keypoints_results.json")
-        self.save_prediction_only = save_prediction_only
         self.reset()
 
     def reset(self):
@@ -253,32 +239,9 @@ class KeyPointTopDownMPIIEval(object):
         self.results.append(results)
 
     def accumulate(self):
-        self._mpii_keypoint_results_save()
-        if self.save_prediction_only:
-            logger.info(f'The keypoint result is saved to {self.res_file} '
-                        'and do not evaluate the mAP.')
-            return
-
         self.eval_results = self.evaluate(self.results)
 
-    def _mpii_keypoint_results_save(self):
-        results = []
-        for res in self.results:
-            if len(res) == 0:
-                continue
-            result = [{
-                'preds': res['preds'][k].tolist(),
-                'boxes': res['boxes'][k].tolist(),
-                'image_path': res['image_path'][k],
-            } for k in range(len(res))]
-            results.extend(result)
-        with open(self.res_file, 'w') as f:
-            json.dump(results, f, sort_keys=True, indent=4)
-            logger.info(f'The keypoint result is saved to {self.res_file}.')
-
     def log(self):
-        if self.save_prediction_only:
-            return
         for item, value in self.eval_results.items():
             print("{} : {}".format(item, value))
 

+ 0 - 192
paddlex/ppdet/metrics/mot_eval_utils.py

@@ -1,192 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import numpy as np
-import copy
-import motmetrics as mm
-mm.lap.default_solver = 'lap'
-
-__all__ = [
-    'read_mot_results',
-    'unzip_objs',
-    'MOTEvaluator',
-]
-
-
-def read_mot_results(filename, is_gt=False, is_ignore=False):
-    valid_labels = {1}
-    ignore_labels = {2, 7, 8, 12}
-    results_dict = dict()
-    if os.path.isfile(filename):
-        with open(filename, 'r') as f:
-            for line in f.readlines():
-                linelist = line.split(',')
-                if len(linelist) < 7:
-                    continue
-                fid = int(linelist[0])
-                if fid < 1:
-                    continue
-                results_dict.setdefault(fid, list())
-
-                box_size = float(linelist[4]) * float(linelist[5])
-
-                if is_gt:
-                    if 'MOT16-' in filename or 'MOT17-' in filename:
-                        label = int(float(linelist[7]))
-                        mark = int(float(linelist[6]))
-                        if mark == 0 or label not in valid_labels:
-                            continue
-                    score = 1
-                elif is_ignore:
-                    if 'MOT16-' in filename or 'MOT17-' in filename:
-                        label = int(float(linelist[7]))
-                        vis_ratio = float(linelist[8])
-                        if label not in ignore_labels and vis_ratio >= 0:
-                            continue
-                    else:
-                        continue
-                    score = 1
-                else:
-                    score = float(linelist[6])
-
-                tlwh = tuple(map(float, linelist[2:6]))
-                target_id = int(linelist[1])
-
-                results_dict[fid].append((tlwh, target_id, score))
-    return results_dict
-
-
-"""
-labels={'ped', ...			    % 1
-        'person_on_vhcl', ...	% 2
-        'car', ...				% 3
-        'bicycle', ...			% 4
-        'mbike', ...			% 5
-        'non_mot_vhcl', ...		% 6
-        'static_person', ...	% 7
-        'distractor', ...		% 8
-        'occluder', ...			% 9
-        'occluder_on_grnd', ...	% 10
-        'occluder_full', ...	% 11
-        'reflection', ...		% 12
-        'crowd' ...			    % 13
-};
-"""
-
-
-def unzip_objs(objs):
-    if len(objs) > 0:
-        tlwhs, ids, scores = zip(*objs)
-    else:
-        tlwhs, ids, scores = [], [], []
-    tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
-    return tlwhs, ids, scores
-
-
-class MOTEvaluator(object):
-    def __init__(self, data_root, seq_name, data_type):
-        self.data_root = data_root
-        self.seq_name = seq_name
-        self.data_type = data_type
-
-        self.load_annotations()
-        self.reset_accumulator()
-
-    def load_annotations(self):
-        assert self.data_type == 'mot'
-        gt_filename = os.path.join(self.data_root, self.seq_name, 'gt',
-                                   'gt.txt')
-        self.gt_frame_dict = read_mot_results(gt_filename, is_gt=True)
-        self.gt_ignore_frame_dict = read_mot_results(
-            gt_filename, is_ignore=True)
-
-    def reset_accumulator(self):
-        self.acc = mm.MOTAccumulator(auto_id=True)
-
-    def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
-        # results
-        trk_tlwhs = np.copy(trk_tlwhs)
-        trk_ids = np.copy(trk_ids)
-
-        # gts
-        gt_objs = self.gt_frame_dict.get(frame_id, [])
-        gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
-
-        # ignore boxes
-        ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
-        ignore_tlwhs = unzip_objs(ignore_objs)[0]
-
-        # remove ignored results
-        keep = np.ones(len(trk_tlwhs), dtype=bool)
-        iou_distance = mm.distances.iou_matrix(
-            ignore_tlwhs, trk_tlwhs, max_iou=0.5)
-        if len(iou_distance) > 0:
-            match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
-            match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
-            match_ious = iou_distance[match_is, match_js]
-
-            match_js = np.asarray(match_js, dtype=int)
-            match_js = match_js[np.logical_not(np.isnan(match_ious))]
-            keep[match_js] = False
-            trk_tlwhs = trk_tlwhs[keep]
-            trk_ids = trk_ids[keep]
-
-        # get distance matrix
-        iou_distance = mm.distances.iou_matrix(
-            gt_tlwhs, trk_tlwhs, max_iou=0.5)
-
-        # acc
-        self.acc.update(gt_ids, trk_ids, iou_distance)
-
-        if rtn_events and iou_distance.size > 0 and hasattr(self.acc,
-                                                            'last_mot_events'):
-            events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
-        else:
-            events = None
-        return events
-
-    def eval_file(self, filename):
-        self.reset_accumulator()
-
-        result_frame_dict = read_mot_results(filename, is_gt=False)
-        frames = sorted(list(set(result_frame_dict.keys())))
-        for frame_id in frames:
-            trk_objs = result_frame_dict.get(frame_id, [])
-            trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
-            self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
-
-        return self.acc
-
-    @staticmethod
-    def get_summary(accs,
-                    names,
-                    metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1',
-                             'precision', 'recall')):
-        names = copy.deepcopy(names)
-        if metrics is None:
-            metrics = mm.metrics.motchallenge_metrics
-        metrics = copy.deepcopy(metrics)
-
-        mh = mm.metrics.create()
-        summary = mh.compute_many(
-            accs, metrics=metrics, names=names, generate_overall=True)
-        return summary
-
-    @staticmethod
-    def save_summary(summary, filename):
-        import pandas as pd
-        writer = pd.ExcelWriter(filename)
-        summary.to_excel(writer)
-        writer.save()

+ 2 - 2
paddlex/ppdet/metrics/mot_metrics.py

@@ -539,7 +539,7 @@ class KITTIEvaluation(object):
                         return
 
                 # do not consider objects marked as invalid
-                if t_data.track_id is -1 and t_data.obj_type != "dontcare":
+                if t_data.track_id == -1 and t_data.obj_type != "dontcare":
                     continue
 
                 idx = t_data.frame
@@ -718,7 +718,7 @@ class KITTIEvaluation(object):
                     seq_trajectories[gg.track_id].append(-1)
                     seq_ignored[gg.track_id].append(False)
 
-                if len(g) is 0:
+                if len(g) == 0:
                     cost_matrix = [[]]
                 # associate
                 association_matrix = hm.compute(cost_matrix)

+ 0 - 4
paddlex/ppdet/modeling/architectures/__init__.py

@@ -21,8 +21,6 @@ from . import jde
 from . import deepsort
 from . import fairmot
 from . import centernet
-from . import gfl
-from . import picodet
 from . import detr
 from . import sparse_rcnn
 
@@ -43,7 +41,5 @@ from .deepsort import *
 from .fairmot import *
 from .centernet import *
 from .blazeface import *
-from .gfl import *
-from .picodet import *
 from .detr import *
 from .sparse_rcnn import *

+ 0 - 87
paddlex/ppdet/modeling/architectures/gfl.py

@@ -1,87 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle
-from paddlex.ppdet.core.workspace import register, create
-from .meta_arch import BaseArch
-
-__all__ = ['GFL']
-
-
-@register
-class GFL(BaseArch):
-    """
-    Generalized Focal Loss network, see https://arxiv.org/abs/2006.04388
-
-    Args:
-        backbone (object): backbone instance
-        neck (object): 'FPN' instance
-        head (object): 'GFLHead' instance
-    """
-
-    __category__ = 'architecture'
-
-    def __init__(self, backbone, neck, head='GFLHead'):
-        super(GFL, self).__init__()
-        self.backbone = backbone
-        self.neck = neck
-        self.head = head
-
-    @classmethod
-    def from_config(cls, cfg, *args, **kwargs):
-        backbone = create(cfg['backbone'])
-
-        kwargs = {'input_shape': backbone.out_shape}
-        neck = create(cfg['neck'], **kwargs)
-
-        kwargs = {'input_shape': neck.out_shape}
-        head = create(cfg['head'], **kwargs)
-
-        return {
-            'backbone': backbone,
-            'neck': neck,
-            "head": head,
-        }
-
-    def _forward(self):
-        body_feats = self.backbone(self.inputs)
-        fpn_feats = self.neck(body_feats)
-        head_outs = self.head(fpn_feats)
-        if not self.training:
-            im_shape = self.inputs['im_shape']
-            scale_factor = self.inputs['scale_factor']
-            bboxes, bbox_num = self.head.post_process(head_outs, im_shape,
-                                                      scale_factor)
-            return bboxes, bbox_num
-        else:
-            return head_outs
-
-    def get_loss(self, ):
-        loss = {}
-
-        head_outs = self._forward()
-        loss_gfl = self.head.get_loss(head_outs, self.inputs)
-        loss.update(loss_gfl)
-        total_loss = paddle.add_n(list(loss.values()))
-        loss.update({'loss': total_loss})
-        return loss
-
-    def get_pred(self):
-        bbox_pred, bbox_num = self._forward()
-        output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
-        return output

+ 4 - 5
paddlex/ppdet/modeling/architectures/keypoint_hrnet.py

@@ -41,20 +41,18 @@ class TopDownHRNet(BaseArch):
                  post_process='HRNetPostProcess',
                  flip_perm=None,
                  flip=True,
-                 shift_heatmap=True,
-                 use_dark=True):
+                 shift_heatmap=True):
         """
-        HRNet network, see https://arxiv.org/abs/1902.09212
+        HRNnet network, see https://arxiv.org/abs/1902.09212
 
         Args:
             backbone (nn.Layer): backbone instance
             post_process (object): `HRNetPostProcess` instance
             flip_perm (list): The left-right joints exchange order list
-            use_dark(bool): Whether to use DARK in post processing
         """
         super(TopDownHRNet, self).__init__()
         self.backbone = backbone
-        self.post_process = HRNetPostProcess(use_dark)
+        self.post_process = HRNetPostProcess()
         self.loss = loss
         self.flip_perm = flip_perm
         self.flip = flip
@@ -220,6 +218,7 @@ class HRNetPostProcess(object):
             preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
             maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
         """
+
         coords, maxvals = self.get_max_preds(heatmaps)
 
         heatmap_height = heatmaps.shape[2]

+ 0 - 91
paddlex/ppdet/modeling/architectures/picodet.py

@@ -1,91 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle
-from paddlex.ppdet.core.workspace import register, create
-from .meta_arch import BaseArch
-
-__all__ = ['PicoDet']
-
-
-@register
-class PicoDet(BaseArch):
-    """
-    Generalized Focal Loss network, see https://arxiv.org/abs/2006.04388
-
-    Args:
-        backbone (object): backbone instance
-        neck (object): 'FPN' instance
-        head (object): 'PicoHead' instance
-    """
-
-    __category__ = 'architecture'
-
-    def __init__(self, backbone, neck, head='PicoHead'):
-        super(PicoDet, self).__init__()
-        self.backbone = backbone
-        self.neck = neck
-        self.head = head
-        self.deploy = False
-
-    @classmethod
-    def from_config(cls, cfg, *args, **kwargs):
-        backbone = create(cfg['backbone'])
-
-        kwargs = {'input_shape': backbone.out_shape}
-        neck = create(cfg['neck'], **kwargs)
-
-        kwargs = {'input_shape': neck.out_shape}
-        head = create(cfg['head'], **kwargs)
-
-        return {
-            'backbone': backbone,
-            'neck': neck,
-            "head": head,
-        }
-
-    def _forward(self):
-        body_feats = self.backbone(self.inputs)
-        fpn_feats = self.neck(body_feats)
-        head_outs = self.head(fpn_feats)
-        if self.training or self.deploy:
-            return head_outs
-        else:
-            im_shape = self.inputs['im_shape']
-            scale_factor = self.inputs['scale_factor']
-            bboxes, bbox_num = self.head.post_process(head_outs, im_shape,
-                                                      scale_factor)
-            return bboxes, bbox_num
-
-    def get_loss(self, ):
-        loss = {}
-
-        head_outs = self._forward()
-        loss_gfl = self.head.get_loss(head_outs, self.inputs)
-        loss.update(loss_gfl)
-        total_loss = paddle.add_n(list(loss.values()))
-        loss.update({'loss': total_loss})
-        return loss
-
-    def get_pred(self):
-        if self.deploy:
-            return {'picodet': self._forward()[0]}
-        else:
-            bbox_pred, bbox_num = self._forward()
-            output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
-            return output

+ 0 - 4
paddlex/ppdet/modeling/backbones/__init__.py

@@ -18,13 +18,11 @@ from . import darknet
 from . import mobilenet_v1
 from . import mobilenet_v3
 from . import hrnet
-from . import lite_hrnet
 from . import blazenet
 from . import ghostnet
 from . import senet
 from . import res2net
 from . import dla
-from . import shufflenet_v2
 
 from .vgg import *
 from .resnet import *
@@ -32,10 +30,8 @@ from .darknet import *
 from .mobilenet_v1 import *
 from .mobilenet_v3 import *
 from .hrnet import *
-from .lite_hrnet import *
 from .blazenet import *
 from .ghostnet import *
 from .senet import *
 from .res2net import *
 from .dla import *
-from .shufflenet_v2 import *

+ 14 - 3
paddlex/ppdet/modeling/backbones/blazenet.py

@@ -55,14 +55,25 @@ class ConvBNLayer(nn.Layer):
             padding=padding,
             groups=num_groups,
             weight_attr=ParamAttr(
-                learning_rate=conv_lr, initializer=KaimingNormal()),
+                learning_rate=conv_lr,
+                initializer=KaimingNormal(),
+                name=name + "_weights"),
             bias_attr=False)
 
+        param_attr = ParamAttr(name=name + "_bn_scale")
+        bias_attr = ParamAttr(name=name + "_bn_offset")
         if norm_type == 'sync_bn':
-            self._batch_norm = nn.SyncBatchNorm(out_channels)
+            self._batch_norm = nn.SyncBatchNorm(
+                out_channels, weight_attr=param_attr, bias_attr=bias_attr)
         else:
             self._batch_norm = nn.BatchNorm(
-                out_channels, act=None, use_global_stats=False)
+                out_channels,
+                act=None,
+                param_attr=param_attr,
+                bias_attr=bias_attr,
+                use_global_stats=False,
+                moving_mean_name=name + '_bn_mean',
+                moving_variance_name=name + '_bn_variance')
 
     def forward(self, x):
         x = self._conv(x)

+ 10 - 4
paddlex/ppdet/modeling/backbones/ghostnet.py

@@ -100,15 +100,21 @@ class SEBlock(nn.Layer):
             num_channels,
             med_ch,
             weight_attr=ParamAttr(
-                learning_rate=lr_mult, initializer=Uniform(-stdv, stdv)),
-            bias_attr=ParamAttr(learning_rate=lr_mult))
+                learning_rate=lr_mult,
+                initializer=Uniform(-stdv, stdv),
+                name=name + "_1_weights"),
+            bias_attr=ParamAttr(
+                learning_rate=lr_mult, name=name + "_1_offset"))
         stdv = 1.0 / math.sqrt(med_ch * 1.0)
         self.excitation = Linear(
             med_ch,
             num_channels,
             weight_attr=ParamAttr(
-                learning_rate=lr_mult, initializer=Uniform(-stdv, stdv)),
-            bias_attr=ParamAttr(learning_rate=lr_mult))
+                learning_rate=lr_mult,
+                initializer=Uniform(-stdv, stdv),
+                name=name + "_2_weights"),
+            bias_attr=ParamAttr(
+                learning_rate=lr_mult, name=name + "_2_offset"))
 
     def forward(self, inputs):
         pool = self.pool2d_gap(inputs)

+ 19 - 7
paddlex/ppdet/modeling/backbones/hrnet.py

@@ -52,23 +52,31 @@ class ConvNormLayer(nn.Layer):
             stride=stride,
             padding=(filter_size - 1) // 2,
             groups=1,
-            weight_attr=ParamAttr(initializer=Normal(
-                mean=0., std=0.01)),
+            weight_attr=ParamAttr(
+                name=name + "_weights", initializer=Normal(
+                    mean=0., std=0.01)),
             bias_attr=False)
 
         norm_lr = 0. if freeze_norm else 1.
 
+        norm_name = name + '_bn'
         param_attr = ParamAttr(
-            learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
+            name=norm_name + "_scale",
+            learning_rate=norm_lr,
+            regularizer=L2Decay(norm_decay))
         bias_attr = ParamAttr(
-            learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
+            name=norm_name + "_offset",
+            learning_rate=norm_lr,
+            regularizer=L2Decay(norm_decay))
         global_stats = True if freeze_norm else False
         if norm_type in ['bn', 'sync_bn']:
             self.norm = nn.BatchNorm(
                 ch_out,
                 param_attr=param_attr,
                 bias_attr=bias_attr,
-                use_global_stats=global_stats)
+                use_global_stats=global_stats,
+                moving_mean_name=norm_name + '_mean',
+                moving_variance_name=norm_name + '_variance')
         elif norm_type == 'gn':
             self.norm = nn.GroupNorm(
                 num_groups=norm_groups,
@@ -368,13 +376,17 @@ class SELayer(nn.Layer):
         self.squeeze = Linear(
             num_channels,
             med_ch,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
+            weight_attr=ParamAttr(
+                initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"),
+            bias_attr=ParamAttr(name=name + '_sqz_offset'))
 
         stdv = 1.0 / math.sqrt(med_ch * 1.0)
         self.excitation = Linear(
             med_ch,
             num_filters,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
+            weight_attr=ParamAttr(
+                initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"),
+            bias_attr=ParamAttr(name=name + '_exc_offset'))
 
     def forward(self, input):
         pool = self.pool2d_gap(input)

+ 0 - 886
paddlex/ppdet/modeling/backbones/lite_hrnet.py

@@ -1,886 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-
-from numbers import Integral
-from paddle import ParamAttr
-from paddle.regularizer import L2Decay
-from paddle.nn.initializer import Normal, Constant
-from paddlex.ppdet.core.workspace import register
-from paddlex.ppdet.modeling.shape_spec import ShapeSpec
-from paddlex.ppdet.modeling.ops import channel_shuffle
-from .. import layers as L
-
-__all__ = ['LiteHRNet']
-
-
-class ConvNormLayer(nn.Layer):
-    def __init__(self,
-                 ch_in,
-                 ch_out,
-                 filter_size,
-                 stride=1,
-                 groups=1,
-                 norm_type=None,
-                 norm_groups=32,
-                 norm_decay=0.,
-                 freeze_norm=False,
-                 act=None):
-        super(ConvNormLayer, self).__init__()
-        self.act = act
-        norm_lr = 0. if freeze_norm else 1.
-        if norm_type is not None:
-            assert (norm_type in [
-                'bn', 'sync_bn', 'gn'
-            ], "norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".
-                    format(norm_type))
-            param_attr = ParamAttr(
-                initializer=Constant(1.0),
-                learning_rate=norm_lr,
-                regularizer=L2Decay(norm_decay), )
-            bias_attr = ParamAttr(
-                learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
-            global_stats = True if freeze_norm else False
-            if norm_type in ['bn', 'sync_bn']:
-                self.norm = nn.BatchNorm(
-                    ch_out,
-                    param_attr=param_attr,
-                    bias_attr=bias_attr,
-                    use_global_stats=global_stats, )
-            elif norm_type == 'gn':
-                self.norm = nn.GroupNorm(
-                    num_groups=norm_groups,
-                    num_channels=ch_out,
-                    weight_attr=param_attr,
-                    bias_attr=bias_attr)
-            norm_params = self.norm.parameters()
-            if freeze_norm:
-                for param in norm_params:
-                    param.stop_gradient = True
-            conv_bias_attr = False
-        else:
-            conv_bias_attr = True
-            self.norm = None
-
-        self.conv = nn.Conv2D(
-            in_channels=ch_in,
-            out_channels=ch_out,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(initializer=Normal(
-                mean=0., std=0.001)),
-            bias_attr=conv_bias_attr)
-
-    def forward(self, inputs):
-        out = self.conv(inputs)
-        if self.norm is not None:
-            out = self.norm(out)
-
-        if self.act == 'relu':
-            out = F.relu(out)
-        elif self.act == 'sigmoid':
-            out = F.sigmoid(out)
-        return out
-
-
-class DepthWiseSeparableConvNormLayer(nn.Layer):
-    def __init__(self,
-                 ch_in,
-                 ch_out,
-                 filter_size,
-                 stride=1,
-                 dw_norm_type=None,
-                 pw_norm_type=None,
-                 norm_decay=0.,
-                 freeze_norm=False,
-                 dw_act=None,
-                 pw_act=None):
-        super(DepthWiseSeparableConvNormLayer, self).__init__()
-        self.depthwise_conv = ConvNormLayer(
-            ch_in=ch_in,
-            ch_out=ch_in,
-            filter_size=filter_size,
-            stride=stride,
-            groups=ch_in,
-            norm_type=dw_norm_type,
-            act=dw_act,
-            norm_decay=norm_decay,
-            freeze_norm=freeze_norm, )
-        self.pointwise_conv = ConvNormLayer(
-            ch_in=ch_in,
-            ch_out=ch_out,
-            filter_size=1,
-            stride=1,
-            norm_type=pw_norm_type,
-            act=pw_act,
-            norm_decay=norm_decay,
-            freeze_norm=freeze_norm, )
-
-    def forward(self, x):
-        x = self.depthwise_conv(x)
-        x = self.pointwise_conv(x)
-        return x
-
-
-class CrossResolutionWeightingModule(nn.Layer):
-    def __init__(self,
-                 channels,
-                 ratio=16,
-                 norm_type='bn',
-                 freeze_norm=False,
-                 norm_decay=0.):
-        super(CrossResolutionWeightingModule, self).__init__()
-        self.channels = channels
-        total_channel = sum(channels)
-        self.conv1 = ConvNormLayer(
-            ch_in=total_channel,
-            ch_out=total_channel // ratio,
-            filter_size=1,
-            stride=1,
-            norm_type=norm_type,
-            act='relu',
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-        self.conv2 = ConvNormLayer(
-            ch_in=total_channel // ratio,
-            ch_out=total_channel,
-            filter_size=1,
-            stride=1,
-            norm_type=norm_type,
-            act='sigmoid',
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-
-    def forward(self, x):
-        mini_size = x[-1].shape[-2:]
-        out = [F.adaptive_avg_pool2d(s, mini_size) for s in x[:-1]] + [x[-1]]
-        out = paddle.concat(out, 1)
-        out = self.conv1(out)
-        out = self.conv2(out)
-        out = paddle.split(out, self.channels, 1)
-        out = [
-            s * F.interpolate(
-                a, s.shape[-2:], mode='nearest') for s, a in zip(x, out)
-        ]
-        return out
-
-
-class SpatialWeightingModule(nn.Layer):
-    def __init__(self, in_channel, ratio=16, freeze_norm=False, norm_decay=0.):
-        super(SpatialWeightingModule, self).__init__()
-        self.global_avgpooling = nn.AdaptiveAvgPool2D(1)
-        self.conv1 = ConvNormLayer(
-            ch_in=in_channel,
-            ch_out=in_channel // ratio,
-            filter_size=1,
-            stride=1,
-            act='relu',
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-        self.conv2 = ConvNormLayer(
-            ch_in=in_channel // ratio,
-            ch_out=in_channel,
-            filter_size=1,
-            stride=1,
-            act='sigmoid',
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-
-    def forward(self, x):
-        out = self.global_avgpooling(x)
-        out = self.conv1(out)
-        out = self.conv2(out)
-        return x * out
-
-
-class ConditionalChannelWeightingBlock(nn.Layer):
-    def __init__(self,
-                 in_channels,
-                 stride,
-                 reduce_ratio,
-                 norm_type='bn',
-                 freeze_norm=False,
-                 norm_decay=0.):
-        super(ConditionalChannelWeightingBlock, self).__init__()
-        assert stride in [1, 2]
-        branch_channels = [channel // 2 for channel in in_channels]
-
-        self.cross_resolution_weighting = CrossResolutionWeightingModule(
-            branch_channels,
-            ratio=reduce_ratio,
-            norm_type=norm_type,
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-        self.depthwise_convs = nn.LayerList([
-            ConvNormLayer(
-                channel,
-                channel,
-                filter_size=3,
-                stride=stride,
-                groups=channel,
-                norm_type=norm_type,
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay) for channel in branch_channels
-        ])
-
-        self.spatial_weighting = nn.LayerList([
-            SpatialWeightingModule(
-                channel,
-                ratio=4,
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay) for channel in branch_channels
-        ])
-
-    def forward(self, x):
-        x = [s.chunk(2, axis=1) for s in x]
-        x1 = [s[0] for s in x]
-        x2 = [s[1] for s in x]
-
-        x2 = self.cross_resolution_weighting(x2)
-        x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)]
-        x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)]
-
-        out = [paddle.concat([s1, s2], axis=1) for s1, s2 in zip(x1, x2)]
-        out = [channel_shuffle(s, groups=2) for s in out]
-        return out
-
-
-class ShuffleUnit(nn.Layer):
-    def __init__(self,
-                 in_channel,
-                 out_channel,
-                 stride,
-                 norm_type='bn',
-                 freeze_norm=False,
-                 norm_decay=0.):
-        super(ShuffleUnit, self).__init__()
-        branch_channel = out_channel // 2
-        stride = self.stride
-        if self.stride == 1:
-            assert (
-                in_channel == branch_channel * 2,
-                "when stride=1, in_channel {} should equal to branch_channel*2 {}"
-                .format(in_channel, branch_channel * 2))
-        if stride > 1:
-            self.branch1 = nn.Sequential(
-                ConvNormLayer(
-                    ch_in=in_channel,
-                    ch_out=in_channel,
-                    filter_size=3,
-                    stride=self.stride,
-                    groups=in_channel,
-                    norm_type=norm_type,
-                    freeze_norm=freeze_norm,
-                    norm_decay=norm_decay),
-                ConvNormLayer(
-                    ch_in=in_channel,
-                    ch_out=branch_channel,
-                    filter_size=1,
-                    stride=1,
-                    norm_type=norm_type,
-                    act='relu',
-                    freeze_norm=freeze_norm,
-                    norm_decay=norm_decay), )
-        self.branch2 = nn.Sequential(
-            ConvNormLayer(
-                ch_in=branch_channel if stride == 1 else in_channel,
-                ch_out=branch_channel,
-                filter_size=1,
-                stride=1,
-                norm_type=norm_type,
-                act='relu',
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay),
-            ConvNormLayer(
-                ch_in=branch_channel,
-                ch_out=branch_channel,
-                filter_size=3,
-                stride=self.stride,
-                groups=branch_channel,
-                norm_type=norm_type,
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay),
-            ConvNormLayer(
-                ch_in=branch_channel,
-                ch_out=branch_channel,
-                filter_size=1,
-                stride=1,
-                norm_type=norm_type,
-                act='relu',
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay), )
-
-    def forward(self, x):
-        if self.stride > 1:
-            x1 = self.branch1(x)
-            x2 = self.branch2(x)
-        else:
-            x1, x2 = x.chunk(2, axis=1)
-            x2 = self.branch2(x2)
-        out = paddle.concat([x1, x2], axis=1)
-        out = channel_shuffle(out, groups=2)
-        return out
-
-
-class IterativeHead(nn.Layer):
-    def __init__(self,
-                 in_channels,
-                 norm_type='bn',
-                 freeze_norm=False,
-                 norm_decay=0.):
-        super(IterativeHead, self).__init__()
-        num_branches = len(in_channels)
-        self.in_channels = in_channels[::-1]
-
-        projects = []
-        for i in range(num_branches):
-            if i != num_branches - 1:
-                projects.append(
-                    DepthWiseSeparableConvNormLayer(
-                        ch_in=self.in_channels[i],
-                        ch_out=self.in_channels[i + 1],
-                        filter_size=3,
-                        stride=1,
-                        dw_act=None,
-                        pw_act='relu',
-                        dw_norm_type=norm_type,
-                        pw_norm_type=norm_type,
-                        freeze_norm=freeze_norm,
-                        norm_decay=norm_decay))
-            else:
-                projects.append(
-                    DepthWiseSeparableConvNormLayer(
-                        ch_in=self.in_channels[i],
-                        ch_out=self.in_channels[i],
-                        filter_size=3,
-                        stride=1,
-                        dw_act=None,
-                        pw_act='relu',
-                        dw_norm_type=norm_type,
-                        pw_norm_type=norm_type,
-                        freeze_norm=freeze_norm,
-                        norm_decay=norm_decay))
-        self.projects = nn.LayerList(projects)
-
-    def forward(self, x):
-        x = x[::-1]
-        y = []
-        last_x = None
-        for i, s in enumerate(x):
-            if last_x is not None:
-                last_x = F.interpolate(
-                    last_x,
-                    size=s.shape[-2:],
-                    mode='bilinear',
-                    align_corners=True)
-                s = s + last_x
-            s = self.projects[i](s)
-            y.append(s)
-            last_x = s
-
-        return y[::-1]
-
-
-class Stem(nn.Layer):
-    def __init__(self,
-                 in_channel,
-                 stem_channel,
-                 out_channel,
-                 expand_ratio,
-                 norm_type='bn',
-                 freeze_norm=False,
-                 norm_decay=0.):
-        super(Stem, self).__init__()
-        self.conv1 = ConvNormLayer(
-            in_channel,
-            stem_channel,
-            filter_size=3,
-            stride=2,
-            norm_type=norm_type,
-            act='relu',
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-        mid_channel = int(round(stem_channel * expand_ratio))
-        branch_channel = stem_channel // 2
-        if stem_channel == out_channel:
-            inc_channel = out_channel - branch_channel
-        else:
-            inc_channel = out_channel - stem_channel
-        self.branch1 = nn.Sequential(
-            ConvNormLayer(
-                ch_in=branch_channel,
-                ch_out=branch_channel,
-                filter_size=3,
-                stride=2,
-                groups=branch_channel,
-                norm_type=norm_type,
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay),
-            ConvNormLayer(
-                ch_in=branch_channel,
-                ch_out=inc_channel,
-                filter_size=1,
-                stride=1,
-                norm_type=norm_type,
-                act='relu',
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay), )
-        self.expand_conv = ConvNormLayer(
-            ch_in=branch_channel,
-            ch_out=mid_channel,
-            filter_size=1,
-            stride=1,
-            norm_type=norm_type,
-            act='relu',
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-        self.depthwise_conv = ConvNormLayer(
-            ch_in=mid_channel,
-            ch_out=mid_channel,
-            filter_size=3,
-            stride=2,
-            groups=mid_channel,
-            norm_type=norm_type,
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-        self.linear_conv = ConvNormLayer(
-            ch_in=mid_channel,
-            ch_out=branch_channel
-            if stem_channel == out_channel else stem_channel,
-            filter_size=1,
-            stride=1,
-            norm_type=norm_type,
-            act='relu',
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x1, x2 = x.chunk(2, axis=1)
-        x1 = self.branch1(x1)
-        x2 = self.expand_conv(x2)
-        x2 = self.depthwise_conv(x2)
-        x2 = self.linear_conv(x2)
-        out = paddle.concat([x1, x2], axis=1)
-        out = channel_shuffle(out, groups=2)
-
-        return out
-
-
-class LiteHRNetModule(nn.Layer):
-    def __init__(self,
-                 num_branches,
-                 num_blocks,
-                 in_channels,
-                 reduce_ratio,
-                 module_type,
-                 multiscale_output=False,
-                 with_fuse=True,
-                 norm_type='bn',
-                 freeze_norm=False,
-                 norm_decay=0.):
-        super(LiteHRNetModule, self).__init__()
-        assert (num_branches == len(in_channels),
-                "num_branches {} should equal to num_in_channels {}"
-                .format(num_branches, len(in_channels)))
-        assert (module_type in ['LITE', 'NAIVE'],
-                "module_type should be one of ['LITE', 'NAIVE']")
-        self.num_branches = num_branches
-        self.in_channels = in_channels
-        self.multiscale_output = multiscale_output
-        self.with_fuse = with_fuse
-        self.norm_type = 'bn'
-        self.module_type = module_type
-
-        if self.module_type == 'LITE':
-            self.layers = self._make_weighting_blocks(
-                num_blocks,
-                reduce_ratio,
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay)
-        elif self.module_type == 'NAIVE':
-            self.layers = self._make_naive_branches(
-                num_branches,
-                num_blocks,
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay)
-
-        if self.with_fuse:
-            self.fuse_layers = self._make_fuse_layers(
-                freeze_norm=freeze_norm, norm_decay=norm_decay)
-            self.relu = nn.ReLU()
-
-    def _make_weighting_blocks(self,
-                               num_blocks,
-                               reduce_ratio,
-                               stride=1,
-                               freeze_norm=False,
-                               norm_decay=0.):
-        layers = []
-        for i in range(num_blocks):
-            layers.append(
-                ConditionalChannelWeightingBlock(
-                    self.in_channels,
-                    stride=stride,
-                    reduce_ratio=reduce_ratio,
-                    norm_type=self.norm_type,
-                    freeze_norm=freeze_norm,
-                    norm_decay=norm_decay))
-        return nn.Sequential(*layers)
-
-    def _make_naive_branchs(self,
-                            num_branches,
-                            num_blocks,
-                            freeze_norm=False,
-                            norm_decay=0.):
-        branches = []
-        for branch_idx in range(num_branches):
-            layers = []
-            for i in range(num_blocks):
-                layers.append(
-                    ShuffleUnit(
-                        self.in_channels[branch_idx],
-                        self.in_channels[branch_idx],
-                        stride=1,
-                        norm_type=self.norm_type,
-                        freeze_norm=freeze_norm,
-                        norm_decay=norm_decay))
-            branches.append(nn.Sequential(*layers))
-        return nn.LayerList(branches)
-
-    def _make_fuse_layers(self, freeze_norm=False, norm_decay=0.):
-        if self.num_branches == 1:
-            return None
-        fuse_layers = []
-        num_out_branches = self.num_branches if self.multiscale_output else 1
-        for i in range(num_out_branches):
-            fuse_layer = []
-            for j in range(self.num_branches):
-                if j > i:
-                    fuse_layer.append(
-                        nn.Sequential(
-                            L.Conv2d(
-                                self.in_channels[j],
-                                self.in_channels[i],
-                                kernel_size=1,
-                                stride=1,
-                                padding=0,
-                                bias=False, ),
-                            nn.BatchNorm(self.in_channels[i]),
-                            nn.Upsample(
-                                scale_factor=2**(j - i), mode='nearest')))
-                elif j == i:
-                    fuse_layer.append(None)
-                else:
-                    conv_downsamples = []
-                    for k in range(i - j):
-                        if k == i - j - 1:
-                            conv_downsamples.append(
-                                nn.Sequential(
-                                    L.Conv2d(
-                                        self.in_channels[j],
-                                        self.in_channels[j],
-                                        kernel_size=3,
-                                        stride=2,
-                                        padding=1,
-                                        groups=self.in_channels[j],
-                                        bias=False, ),
-                                    nn.BatchNorm(self.in_channels[j]),
-                                    L.Conv2d(
-                                        self.in_channels[j],
-                                        self.in_channels[i],
-                                        kernel_size=1,
-                                        stride=1,
-                                        padding=0,
-                                        bias=False, ),
-                                    nn.BatchNorm(self.in_channels[i])))
-                        else:
-                            conv_downsamples.append(
-                                nn.Sequential(
-                                    L.Conv2d(
-                                        self.in_channels[j],
-                                        self.in_channels[j],
-                                        kernel_size=3,
-                                        stride=2,
-                                        padding=1,
-                                        groups=self.in_channels[j],
-                                        bias=False, ),
-                                    nn.BatchNorm(self.in_channels[j]),
-                                    L.Conv2d(
-                                        self.in_channels[j],
-                                        self.in_channels[j],
-                                        kernel_size=1,
-                                        stride=1,
-                                        padding=0,
-                                        bias=False, ),
-                                    nn.BatchNorm(self.in_channels[j]),
-                                    nn.ReLU()))
-
-                    fuse_layer.append(nn.Sequential(*conv_downsamples))
-            fuse_layers.append(nn.LayerList(fuse_layer))
-
-        return nn.LayerList(fuse_layers)
-
-    def forward(self, x):
-        if self.num_branches == 1:
-            return [self.layers[0](x[0])]
-        if self.module_type == 'LITE':
-            out = self.layers(x)
-        elif self.module_type == 'NAIVE':
-            for i in range(self.num_branches):
-                x[i] = self.layers(x[i])
-            out = x
-        if self.with_fuse:
-            out_fuse = []
-            for i in range(len(self.fuse_layers)):
-                y = out[0] if i == 0 else self.fuse_layers[i][0](out[0])
-                for j in range(self.num_branches):
-                    if i == j:
-                        y += out[j]
-                    else:
-                        y += self.fuse_layers[i][j](out[j])
-                    if i == 0:
-                        out[i] = y
-                out_fuse.append(self.relu(y))
-            out = out_fuse
-        elif not self.multiscale_output:
-            out = [out[0]]
-        return out
-
-
-@register
-class LiteHRNet(nn.Layer):
-    """
-    @inproceedings{Yulitehrnet21,
-    title={Lite-HRNet: A Lightweight High-Resolution Network},
-        author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
-        booktitle={CVPR},year={2021}
-    }
-    Args:
-        network_type (str): the network_type should be one of ["lite_18", "lite_30", "naive", "wider_naive"],
-            "naive": Simply combining the shuffle block in ShuffleNet and the highresolution design pattern in HRNet.
-            "wider_naive": Naive network with wider channels in each block.
-            "lite_18": Lite-HRNet-18, which replaces the pointwise convolution in a shuffle block by conditional channel weighting.
-            "lite_30": Lite-HRNet-30, with more blocks compared with Lite-HRNet-18.
-        freeze_at (int): the stage to freeze
-        freeze_norm (bool): whether to freeze norm in HRNet
-        norm_decay (float): weight decay for normalization layer weights
-        return_idx (List): the stage to return
-    """
-
-    def __init__(self,
-                 network_type,
-                 freeze_at=0,
-                 freeze_norm=True,
-                 norm_decay=0.,
-                 return_idx=[0, 1, 2, 3]):
-        super(LiteHRNet, self).__init__()
-        if isinstance(return_idx, Integral):
-            return_idx = [return_idx]
-        assert (
-            network_type in ["lite_18", "lite_30", "naive", "wider_naive"],
-            "the network_type should be one of [lite_18, lite_30, naive, wider_naive]"
-        )
-        assert len(return_idx) > 0, "need one or more return index"
-        self.freeze_at = freeze_at
-        self.freeze_norm = freeze_norm
-        self.norm_decay = norm_decay
-        self.return_idx = return_idx
-        self.norm_type = 'bn'
-
-        self.module_configs = {
-            "lite_18": {
-                "num_modules": [2, 4, 2],
-                "num_branches": [2, 3, 4],
-                "num_blocks": [2, 2, 2],
-                "module_type": ["LITE", "LITE", "LITE"],
-                "reduce_ratios": [8, 8, 8],
-                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
-            },
-            "lite_30": {
-                "num_modules": [3, 8, 3],
-                "num_branches": [2, 3, 4],
-                "num_blocks": [2, 2, 2],
-                "module_type": ["LITE", "LITE", "LITE"],
-                "reduce_ratios": [8, 8, 8],
-                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
-            },
-            "naive": {
-                "num_modules": [2, 4, 2],
-                "num_branches": [2, 3, 4],
-                "num_blocks": [2, 2, 2],
-                "module_type": ["NAIVE", "NAIVE", "NAIVE"],
-                "reduce_ratios": [1, 1, 1],
-                "num_channels": [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
-            },
-            "wider_naive": {
-                "num_modules": [2, 4, 2],
-                "num_branches": [2, 3, 4],
-                "num_blocks": [2, 2, 2],
-                "module_type": ["NAIVE", "NAIVE", "NAIVE"],
-                "reduce_ratios": [1, 1, 1],
-                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
-            },
-        }
-
-        self.stages_config = self.module_configs[network_type]
-
-        self.stem = Stem(3, 32, 32, 1)
-        num_channels_pre_layer = [32]
-        for stage_idx in range(3):
-            num_channels = self.stages_config["num_channels"][stage_idx]
-            setattr(self, 'transition{}'.format(stage_idx),
-                    self._make_transition_layer(num_channels_pre_layer,
-                                                num_channels, self.freeze_norm,
-                                                self.norm_decay))
-            stage, num_channels_pre_layer = self._make_stage(
-                self.stages_config, stage_idx, num_channels, True,
-                self.freeze_norm, self.norm_decay)
-            setattr(self, 'stage{}'.format(stage_idx), stage)
-        self.head_layer = IterativeHead(num_channels_pre_layer, 'bn',
-                                        self.freeze_norm, self.norm_decay)
-
-    def _make_transition_layer(self,
-                               num_channels_pre_layer,
-                               num_channels_cur_layer,
-                               freeze_norm=False,
-                               norm_decay=0.):
-        num_branches_pre = len(num_channels_pre_layer)
-        num_branches_cur = len(num_channels_cur_layer)
-        transition_layers = []
-        for i in range(num_branches_cur):
-            if i < num_branches_pre:
-                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
-                    transition_layers.append(
-                        nn.Sequential(
-                            L.Conv2d(
-                                num_channels_pre_layer[i],
-                                num_channels_pre_layer[i],
-                                kernel_size=3,
-                                stride=1,
-                                padding=1,
-                                groups=num_channels_pre_layer[i],
-                                bias=False),
-                            nn.BatchNorm(num_channels_pre_layer[i]),
-                            L.Conv2d(
-                                num_channels_pre_layer[i],
-                                num_channels_cur_layer[i],
-                                kernel_size=1,
-                                stride=1,
-                                padding=0,
-                                bias=False, ),
-                            nn.BatchNorm(num_channels_cur_layer[i]),
-                            nn.ReLU()))
-                else:
-                    transition_layers.append(None)
-            else:
-                conv_downsamples = []
-                for j in range(i + 1 - num_branches_pre):
-                    conv_downsamples.append(
-                        nn.Sequential(
-                            L.Conv2d(
-                                num_channels_pre_layer[-1],
-                                num_channels_pre_layer[-1],
-                                groups=num_channels_pre_layer[-1],
-                                kernel_size=3,
-                                stride=2,
-                                padding=1,
-                                bias=False, ),
-                            nn.BatchNorm(num_channels_pre_layer[-1]),
-                            L.Conv2d(
-                                num_channels_pre_layer[-1],
-                                num_channels_cur_layer[i]
-                                if j == i - num_branches_pre else
-                                num_channels_pre_layer[-1],
-                                kernel_size=1,
-                                stride=1,
-                                padding=0,
-                                bias=False, ),
-                            nn.BatchNorm(num_channels_cur_layer[i]
-                                         if j == i - num_branches_pre else
-                                         num_channels_pre_layer[-1]),
-                            nn.ReLU()))
-                transition_layers.append(nn.Sequential(*conv_downsamples))
-        return nn.LayerList(transition_layers)
-
-    def _make_stage(self,
-                    stages_config,
-                    stage_idx,
-                    in_channels,
-                    multiscale_output,
-                    freeze_norm=False,
-                    norm_decay=0.):
-        num_modules = stages_config["num_modules"][stage_idx]
-        num_branches = stages_config["num_branches"][stage_idx]
-        num_blocks = stages_config["num_blocks"][stage_idx]
-        reduce_ratio = stages_config['reduce_ratios'][stage_idx]
-        module_type = stages_config['module_type'][stage_idx]
-
-        modules = []
-        for i in range(num_modules):
-            if not multiscale_output and i == num_modules - 1:
-                reset_multiscale_output = False
-            else:
-                reset_multiscale_output = True
-            modules.append(
-                LiteHRNetModule(
-                    num_branches,
-                    num_blocks,
-                    in_channels,
-                    reduce_ratio,
-                    module_type,
-                    multiscale_output=reset_multiscale_output,
-                    with_fuse=True,
-                    freeze_norm=freeze_norm,
-                    norm_decay=norm_decay))
-            in_channels = modules[-1].in_channels
-        return nn.Sequential(*modules), in_channels
-
-    def forward(self, inputs):
-        x = inputs['image']
-        x = self.stem(x)
-        y_list = [x]
-        for stage_idx in range(3):
-            x_list = []
-            transition = getattr(self, 'transition{}'.format(stage_idx))
-            for j in range(self.stages_config["num_branches"][stage_idx]):
-                if transition[j] is not None:
-                    if j >= len(y_list):
-                        x_list.append(transition[j](y_list[-1]))
-                    else:
-                        x_list.append(transition[j](y_list[j]))
-                else:
-                    x_list.append(y_list[j])
-            y_list = getattr(self, 'stage{}'.format(stage_idx))(x_list)
-        x = self.head_layer(y_list)
-        res = []
-        for i, layer in enumerate(x):
-            if i == self.freeze_at:
-                layer.stop_gradient = True
-            if i in self.return_idx:
-                res.append(layer)
-        return res
-
-    @property
-    def out_shape(self):
-        return [
-            ShapeSpec(
-                channels=self._out_channels[i], stride=self._out_strides[i])
-            for i in self.return_idx
-        ]

+ 20 - 6
paddlex/ppdet/modeling/backbones/mobilenet_v3.py

@@ -62,17 +62,21 @@ class ConvBNLayer(nn.Layer):
             padding=padding,
             groups=num_groups,
             weight_attr=ParamAttr(
-                learning_rate=lr_mult, regularizer=L2Decay(conv_decay)),
+                learning_rate=lr_mult,
+                regularizer=L2Decay(conv_decay),
+                name=name + "_weights"),
             bias_attr=False)
 
         norm_lr = 0. if freeze_norm else lr_mult
         param_attr = ParamAttr(
             learning_rate=norm_lr,
             regularizer=L2Decay(norm_decay),
+            name=name + "_bn_scale",
             trainable=False if freeze_norm else True)
         bias_attr = ParamAttr(
             learning_rate=norm_lr,
             regularizer=L2Decay(norm_decay),
+            name=name + "_bn_offset",
             trainable=False if freeze_norm else True)
         global_stats = True if freeze_norm else False
         if norm_type == 'sync_bn':
@@ -84,7 +88,9 @@ class ConvBNLayer(nn.Layer):
                 act=None,
                 param_attr=param_attr,
                 bias_attr=bias_attr,
-                use_global_stats=global_stats)
+                use_global_stats=global_stats,
+                moving_mean_name=name + '_bn_mean',
+                moving_variance_name=name + '_bn_variance')
         norm_params = self.bn.parameters()
         if freeze_norm:
             for param in norm_params:
@@ -197,9 +203,13 @@ class SEModule(nn.Layer):
             stride=1,
             padding=0,
             weight_attr=ParamAttr(
-                learning_rate=lr_mult, regularizer=L2Decay(conv_decay)),
+                learning_rate=lr_mult,
+                regularizer=L2Decay(conv_decay),
+                name=name + "_1_weights"),
             bias_attr=ParamAttr(
-                learning_rate=lr_mult, regularizer=L2Decay(conv_decay)))
+                learning_rate=lr_mult,
+                regularizer=L2Decay(conv_decay),
+                name=name + "_1_offset"))
         self.conv2 = nn.Conv2D(
             in_channels=mid_channels,
             out_channels=channel,
@@ -207,9 +217,13 @@ class SEModule(nn.Layer):
             stride=1,
             padding=0,
             weight_attr=ParamAttr(
-                learning_rate=lr_mult, regularizer=L2Decay(conv_decay)),
+                learning_rate=lr_mult,
+                regularizer=L2Decay(conv_decay),
+                name=name + "_2_weights"),
             bias_attr=ParamAttr(
-                learning_rate=lr_mult, regularizer=L2Decay(conv_decay)))
+                learning_rate=lr_mult,
+                regularizer=L2Decay(conv_decay),
+                name=name + "_2_offset"))
 
     def forward(self, inputs):
         outputs = self.avg_pool(inputs)

+ 0 - 262
paddlex/ppdet/modeling/backbones/shufflenet_v2.py

@@ -1,262 +0,0 @@
-# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle
-import paddle.nn as nn
-from paddle import ParamAttr
-from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm
-from paddle.nn.initializer import KaimingNormal
-
-from paddlex.ppdet.core.workspace import register, serializable
-from numbers import Integral
-from ..shape_spec import ShapeSpec
-from paddlex.ppdet.modeling.ops import channel_shuffle
-
-__all__ = ['ShuffleNetV2']
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride,
-                 padding,
-                 groups=1,
-                 act=None):
-        super(ConvBNLayer, self).__init__()
-        self._conv = Conv2D(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            weight_attr=ParamAttr(initializer=KaimingNormal()),
-            bias_attr=False)
-
-        self._batch_norm = BatchNorm(out_channels, act=act)
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class InvertedResidual(nn.Layer):
-    def __init__(self, in_channels, out_channels, stride, act="relu"):
-        super(InvertedResidual, self).__init__()
-        self._conv_pw = ConvBNLayer(
-            in_channels=in_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act)
-        self._conv_dw = ConvBNLayer(
-            in_channels=out_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=out_channels // 2,
-            act=None)
-        self._conv_linear = ConvBNLayer(
-            in_channels=out_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act)
-
-    def forward(self, inputs):
-        x1, x2 = paddle.split(
-            inputs,
-            num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
-            axis=1)
-        x2 = self._conv_pw(x2)
-        x2 = self._conv_dw(x2)
-        x2 = self._conv_linear(x2)
-        out = paddle.concat([x1, x2], axis=1)
-        return channel_shuffle(out, 2)
-
-
-class InvertedResidualDS(nn.Layer):
-    def __init__(self, in_channels, out_channels, stride, act="relu"):
-        super(InvertedResidualDS, self).__init__()
-
-        # branch1
-        self._conv_dw_1 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=in_channels,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=in_channels,
-            act=None)
-        self._conv_linear_1 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act)
-        # branch2
-        self._conv_pw_2 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act)
-        self._conv_dw_2 = ConvBNLayer(
-            in_channels=out_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=out_channels // 2,
-            act=None)
-        self._conv_linear_2 = ConvBNLayer(
-            in_channels=out_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act)
-
-    def forward(self, inputs):
-        x1 = self._conv_dw_1(inputs)
-        x1 = self._conv_linear_1(x1)
-        x2 = self._conv_pw_2(inputs)
-        x2 = self._conv_dw_2(x2)
-        x2 = self._conv_linear_2(x2)
-        out = paddle.concat([x1, x2], axis=1)
-
-        return channel_shuffle(out, 2)
-
-
-@register
-@serializable
-class ShuffleNetV2(nn.Layer):
-    def __init__(self,
-                 scale=1.0,
-                 act="relu",
-                 feature_maps=[5, 13, 17],
-                 with_last_conv=False):
-        super(ShuffleNetV2, self).__init__()
-        self.scale = scale
-        self.with_last_conv = with_last_conv
-        if isinstance(feature_maps, Integral):
-            feature_maps = [feature_maps]
-        self.feature_maps = feature_maps
-        stage_repeats = [4, 8, 4]
-
-        if scale == 0.25:
-            stage_out_channels = [-1, 24, 24, 48, 96, 512]
-        elif scale == 0.33:
-            stage_out_channels = [-1, 24, 32, 64, 128, 512]
-        elif scale == 0.5:
-            stage_out_channels = [-1, 24, 48, 96, 192, 1024]
-        elif scale == 1.0:
-            stage_out_channels = [-1, 24, 116, 232, 464, 1024]
-        elif scale == 1.5:
-            stage_out_channels = [-1, 24, 176, 352, 704, 1024]
-        elif scale == 2.0:
-            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
-        else:
-            raise NotImplementedError("This scale size:[" + str(scale) +
-                                      "] is not implemented!")
-
-        self._out_channels = []
-        self._feature_idx = 0
-        # 1. conv1
-        self._conv1 = ConvBNLayer(
-            in_channels=3,
-            out_channels=stage_out_channels[1],
-            kernel_size=3,
-            stride=2,
-            padding=1,
-            act=act)
-        self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
-        self._feature_idx += 1
-
-        # 2. bottleneck sequences
-        self._block_list = []
-        for stage_id, num_repeat in enumerate(stage_repeats):
-            for i in range(num_repeat):
-                if i == 0:
-                    block = self.add_sublayer(
-                        name=str(stage_id + 2) + '_' + str(i + 1),
-                        sublayer=InvertedResidualDS(
-                            in_channels=stage_out_channels[stage_id + 1],
-                            out_channels=stage_out_channels[stage_id + 2],
-                            stride=2,
-                            act=act))
-                else:
-                    block = self.add_sublayer(
-                        name=str(stage_id + 2) + '_' + str(i + 1),
-                        sublayer=InvertedResidual(
-                            in_channels=stage_out_channels[stage_id + 2],
-                            out_channels=stage_out_channels[stage_id + 2],
-                            stride=1,
-                            act=act))
-                self._block_list.append(block)
-                self._feature_idx += 1
-                self._update_out_channels(stage_out_channels[stage_id + 2],
-                                          self._feature_idx, self.feature_maps)
-
-        if self.with_last_conv:
-            # last_conv
-            self._last_conv = ConvBNLayer(
-                in_channels=stage_out_channels[-2],
-                out_channels=stage_out_channels[-1],
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                act=act)
-            self._feature_idx += 1
-            self._update_out_channels(stage_out_channels[-1],
-                                      self._feature_idx, self.feature_maps)
-
-    def _update_out_channels(self, channel, feature_idx, feature_maps):
-        if feature_idx in feature_maps:
-            self._out_channels.append(channel)
-
-    def forward(self, inputs):
-        y = self._conv1(inputs['image'])
-        y = self._max_pool(y)
-        outs = []
-        for i, inv in enumerate(self._block_list):
-            y = inv(y)
-            if i + 2 in self.feature_maps:
-                outs.append(y)
-
-        if self.with_last_conv:
-            y = self._last_conv(y)
-            outs.append(y)
-        return outs
-
-    @property
-    def out_shape(self):
-        return [ShapeSpec(channels=c) for c in self._out_channels]

+ 7 - 2
paddlex/ppdet/modeling/backbones/vgg.py

@@ -30,7 +30,9 @@ class ConvBlock(nn.Layer):
             out_channels=out_channels,
             kernel_size=3,
             stride=1,
-            padding=1)
+            padding=1,
+            weight_attr=ParamAttr(name=name + "1_weights"),
+            bias_attr=ParamAttr(name=name + "1_bias"))
         self.conv_out_list = []
         for i in range(1, groups):
             conv_out = self.add_sublayer(
@@ -40,7 +42,10 @@ class ConvBlock(nn.Layer):
                     out_channels=out_channels,
                     kernel_size=3,
                     stride=1,
-                    padding=1))
+                    padding=1,
+                    weight_attr=ParamAttr(
+                        name=name + "{}_weights".format(i + 1)),
+                    bias_attr=ParamAttr(name=name + "{}_bias".format(i + 1))))
             self.conv_out_list.append(conv_out)
 
         self.pool = MaxPool2D(

+ 1 - 45
paddlex/ppdet/modeling/bbox_utils.py

@@ -100,7 +100,7 @@ def clip_bbox(boxes, im_shape):
 def nonempty_bbox(boxes, min_size=0, return_mask=False):
     w = boxes[:, 2] - boxes[:, 0]
     h = boxes[:, 3] - boxes[:, 1]
-    mask = paddle.logical_and(h > min_size, w > min_size)
+    mask = paddle.logical_and(w > min_size, w > min_size)
     if return_mask:
         return mask
     keep = paddle.nonzero(mask).flatten()
@@ -604,47 +604,3 @@ def bbox_iou_np_expand(box1, box2, x1y1x2y2=True, eps=1e-16):
 
     ious = inter_area / (b1_area + b2_area - inter_area + eps)
     return ious
-
-
-def bbox2distance(points, bbox, max_dis=None, eps=0.1):
-    """Decode bounding box based on distances.
-    Args:
-        points (Tensor): Shape (n, 2), [x, y].
-        bbox (Tensor): Shape (n, 4), "xyxy" format
-        max_dis (float): Upper bound of the distance.
-        eps (float): a small value to ensure target < max_dis, instead <=
-    Returns:
-        Tensor: Decoded distances.
-    """
-    left = points[:, 0] - bbox[:, 0]
-    top = points[:, 1] - bbox[:, 1]
-    right = bbox[:, 2] - points[:, 0]
-    bottom = bbox[:, 3] - points[:, 1]
-    if max_dis is not None:
-        left = left.clip(min=0, max=max_dis - eps)
-        top = top.clip(min=0, max=max_dis - eps)
-        right = right.clip(min=0, max=max_dis - eps)
-        bottom = bottom.clip(min=0, max=max_dis - eps)
-    return paddle.stack([left, top, right, bottom], -1)
-
-
-def distance2bbox(points, distance, max_shape=None):
-    """Decode distance prediction to bounding box.
-        Args:
-            points (Tensor): Shape (n, 2), [x, y].
-            distance (Tensor): Distance from the given point to 4
-                boundaries (left, top, right, bottom).
-            max_shape (tuple): Shape of the image.
-        Returns:
-            Tensor: Decoded bboxes.
-        """
-    x1 = points[:, 0] - distance[:, 0]
-    y1 = points[:, 1] - distance[:, 1]
-    x2 = points[:, 0] + distance[:, 2]
-    y2 = points[:, 1] + distance[:, 3]
-    if max_shape is not None:
-        x1 = x1.clip(min=0, max=max_shape[1])
-        y1 = y1.clip(min=0, max=max_shape[0])
-        x2 = x2.clip(min=0, max=max_shape[1])
-        y2 = y2.clip(min=0, max=max_shape[0])
-    return paddle.stack([x1, y1, x2, y2], -1)

+ 0 - 4
paddlex/ppdet/modeling/heads/__init__.py

@@ -25,8 +25,6 @@ from . import face_head
 from . import s2anet_head
 from . import keypoint_hrhrnet_head
 from . import centernet_head
-from . import gfl_head
-from . import pico_head
 from . import detr_head
 from . import sparsercnn_head
 
@@ -43,7 +41,5 @@ from .face_head import *
 from .s2anet_head import *
 from .keypoint_hrhrnet_head import *
 from .centernet_head import *
-from .gfl_head import *
-from .pico_head import *
 from .detr_head import *
 from .sparsercnn_head import *

+ 2 - 1
paddlex/ppdet/modeling/heads/centernet_head.py

@@ -98,7 +98,8 @@ class CenterNetHead(nn.Layer):
                 stride=1,
                 padding=0,
                 bias=True))
-        self.heatmap[2].conv.bias[:] = -2.19
+        with paddle.no_grad():
+            self.heatmap[2].conv.bias[:] = -2.19
         self.size = nn.Sequential(
             ConvLayer(
                 in_channels, head_planes, kernel_size=3, padding=1, bias=True),

+ 5 - 3
paddlex/ppdet/modeling/heads/detr_head.py

@@ -311,9 +311,11 @@ class DeformableDETRHead(nn.Layer):
         linear_init_(self.score_head)
         constant_(self.score_head.bias, -4.595)
         constant_(self.bbox_head.layers[-1].weight)
-        bias = paddle.zeros_like(self.bbox_head.layers[-1].bias)
-        bias[2:] = -2.0
-        self.bbox_head.layers[-1].bias.set_value(bias)
+
+        with paddle.no_grad():
+            bias = paddle.zeros_like(self.bbox_head.layers[-1].bias)
+            bias[2:] = -2.0
+            self.bbox_head.layers[-1].bias.set_value(bias)
 
     @classmethod
     def from_config(cls, cfg, hidden_dim, nhead, input_shape):

+ 19 - 8
paddlex/ppdet/modeling/heads/fcos_head.py

@@ -151,9 +151,12 @@ class FCOSHead(nn.Layer):
                 kernel_size=3,
                 stride=1,
                 padding=1,
-                weight_attr=ParamAttr(initializer=Normal(
-                    mean=0., std=0.01)),
+                weight_attr=ParamAttr(
+                    name=conv_cls_name + "_weights",
+                    initializer=Normal(
+                        mean=0., std=0.01)),
                 bias_attr=ParamAttr(
+                    name=conv_cls_name + "_bias",
                     initializer=Constant(value=bias_init_value))))
 
         conv_reg_name = "fcos_head_reg"
@@ -165,9 +168,13 @@ class FCOSHead(nn.Layer):
                 kernel_size=3,
                 stride=1,
                 padding=1,
-                weight_attr=ParamAttr(initializer=Normal(
-                    mean=0., std=0.01)),
-                bias_attr=ParamAttr(initializer=Constant(value=0))))
+                weight_attr=ParamAttr(
+                    name=conv_reg_name + "_weights",
+                    initializer=Normal(
+                        mean=0., std=0.01)),
+                bias_attr=ParamAttr(
+                    name=conv_reg_name + "_bias",
+                    initializer=Constant(value=0))))
 
         conv_centerness_name = "fcos_head_centerness"
         self.fcos_head_centerness = self.add_sublayer(
@@ -178,9 +185,13 @@ class FCOSHead(nn.Layer):
                 kernel_size=3,
                 stride=1,
                 padding=1,
-                weight_attr=ParamAttr(initializer=Normal(
-                    mean=0., std=0.01)),
-                bias_attr=ParamAttr(initializer=Constant(value=0))))
+                weight_attr=ParamAttr(
+                    name=conv_centerness_name + "_weights",
+                    initializer=Normal(
+                        mean=0., std=0.01)),
+                bias_attr=ParamAttr(
+                    name=conv_centerness_name + "_bias",
+                    initializer=Constant(value=0))))
 
         self.scales_regs = []
         for i in range(len(self.fpn_stride)):

+ 0 - 476
paddlex/ppdet/modeling/heads/gfl_head.py

@@ -1,476 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import numpy as np
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle import ParamAttr
-from paddle.nn.initializer import Normal, Constant
-
-from paddlex.ppdet.core.workspace import register
-from paddlex.ppdet.modeling.layers import ConvNormLayer
-from paddlex.ppdet.modeling.bbox_utils import distance2bbox, bbox2distance
-from paddlex.ppdet.data.transform.atss_assigner import bbox_overlaps
-
-
-class ScaleReg(nn.Layer):
-    """
-    Parameter for scaling the regression outputs.
-    """
-
-    def __init__(self):
-        super(ScaleReg, self).__init__()
-        self.scale_reg = self.create_parameter(
-            shape=[1],
-            attr=ParamAttr(initializer=Constant(value=1.)),
-            dtype="float32")
-
-    def forward(self, inputs):
-        out = inputs * self.scale_reg
-        return out
-
-
-class Integral(nn.Layer):
-    """A fixed layer for calculating integral result from distribution.
-    This layer calculates the target location by :math: `sum{P(y_i) * y_i}`,
-    P(y_i) denotes the softmax vector that represents the discrete distribution
-    y_i denotes the discrete set, usually {0, 1, 2, ..., reg_max}
-
-    Args:
-        reg_max (int): The maximal value of the discrete set. Default: 16. You
-            may want to reset it according to your new dataset or related
-            settings.
-    """
-
-    def __init__(self, reg_max=16):
-        super(Integral, self).__init__()
-        self.reg_max = reg_max
-        self.register_buffer(
-            'project', paddle.linspace(0, self.reg_max, self.reg_max + 1))
-
-    def forward(self, x):
-        """Forward feature from the regression head to get integral result of
-        bounding box location.
-        Args:
-            x (Tensor): Features of the regression head, shape (N, 4*(n+1)),
-                n is self.reg_max.
-        Returns:
-            x (Tensor): Integral result of box locations, i.e., distance
-                offsets from the box center in four directions, shape (N, 4).
-        """
-        x = F.softmax(x.reshape([-1, self.reg_max + 1]), axis=1)
-        x = F.linear(x, self.project).reshape([-1, 4])
-        return x
-
-
-@register
-class DGQP(nn.Layer):
-    """Distribution-Guided Quality Predictor of GFocal head
-
-    Args:
-        reg_topk (int): top-k statistics of distribution to guide LQE
-        reg_channels (int): hidden layer unit to generate LQE
-        add_mean (bool): Whether to calculate the mean of top-k statistics
-    """
-
-    def __init__(self, reg_topk=4, reg_channels=64, add_mean=True):
-        super(DGQP, self).__init__()
-        self.reg_topk = reg_topk
-        self.reg_channels = reg_channels
-        self.add_mean = add_mean
-        self.total_dim = reg_topk
-        if add_mean:
-            self.total_dim += 1
-        self.reg_conv1 = self.add_sublayer(
-            'dgqp_reg_conv1',
-            nn.Conv2D(
-                in_channels=4 * self.total_dim,
-                out_channels=self.reg_channels,
-                kernel_size=1,
-                weight_attr=ParamAttr(initializer=Normal(
-                    mean=0., std=0.01)),
-                bias_attr=ParamAttr(initializer=Constant(value=0))))
-        self.reg_conv2 = self.add_sublayer(
-            'dgqp_reg_conv2',
-            nn.Conv2D(
-                in_channels=self.reg_channels,
-                out_channels=1,
-                kernel_size=1,
-                weight_attr=ParamAttr(initializer=Normal(
-                    mean=0., std=0.01)),
-                bias_attr=ParamAttr(initializer=Constant(value=0))))
-
-    def forward(self, x):
-        """Forward feature from the regression head to get integral result of
-        bounding box location.
-        Args:
-            x (Tensor): Features of the regression head, shape (N, 4*(n+1)),
-                n is self.reg_max.
-        Returns:
-            x (Tensor): Integral result of box locations, i.e., distance
-                offsets from the box center in four directions, shape (N, 4).
-        """
-        N, _, H, W = x.shape[:]
-        prob = F.softmax(x.reshape([N, 4, -1, H, W]), axis=2)
-        prob_topk, _ = prob.topk(self.reg_topk, axis=2)
-        if self.add_mean:
-            stat = paddle.concat(
-                [prob_topk, prob_topk.mean(
-                    axis=2, keepdim=True)], axis=2)
-        else:
-            stat = prob_topk
-        y = F.relu(self.reg_conv1(stat.reshape([N, -1, H, W])))
-        y = F.sigmoid(self.reg_conv2(y))
-        return y
-
-
-@register
-class GFLHead(nn.Layer):
-    """
-    GFLHead
-    Args:
-        conv_feat (object): Instance of 'FCOSFeat'
-        num_classes (int): Number of classes
-        fpn_stride (list): The stride of each FPN Layer
-        prior_prob (float): Used to set the bias init for the class prediction layer
-        loss_qfl (object):
-        loss_dfl (object):
-        loss_bbox (object):
-        reg_max: Max value of integral set :math: `{0, ..., reg_max}`
-                n QFL setting. Default: 16.
-    """
-    __inject__ = [
-        'conv_feat', 'dgqp_module', 'loss_qfl', 'loss_dfl', 'loss_bbox', 'nms'
-    ]
-    __shared__ = ['num_classes']
-
-    def __init__(self,
-                 conv_feat='FCOSFeat',
-                 dgqp_module=None,
-                 num_classes=80,
-                 fpn_stride=[8, 16, 32, 64, 128],
-                 prior_prob=0.01,
-                 loss_qfl='QualityFocalLoss',
-                 loss_dfl='DistributionFocalLoss',
-                 loss_bbox='GIoULoss',
-                 reg_max=16,
-                 feat_in_chan=256,
-                 nms=None,
-                 nms_pre=1000,
-                 cell_offset=0):
-        super(GFLHead, self).__init__()
-        self.conv_feat = conv_feat
-        self.dgqp_module = dgqp_module
-        self.num_classes = num_classes
-        self.fpn_stride = fpn_stride
-        self.prior_prob = prior_prob
-        self.loss_qfl = loss_qfl
-        self.loss_dfl = loss_dfl
-        self.loss_bbox = loss_bbox
-        self.reg_max = reg_max
-        self.feat_in_chan = feat_in_chan
-        self.nms = nms
-        self.nms_pre = nms_pre
-        self.cell_offset = cell_offset
-        self.use_sigmoid = self.loss_qfl.use_sigmoid
-        if self.use_sigmoid:
-            self.cls_out_channels = self.num_classes
-        else:
-            self.cls_out_channels = self.num_classes + 1
-
-        conv_cls_name = "gfl_head_cls"
-        bias_init_value = -math.log((1 - self.prior_prob) / self.prior_prob)
-        self.gfl_head_cls = self.add_sublayer(
-            conv_cls_name,
-            nn.Conv2D(
-                in_channels=self.feat_in_chan,
-                out_channels=self.cls_out_channels,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                weight_attr=ParamAttr(initializer=Normal(
-                    mean=0., std=0.01)),
-                bias_attr=ParamAttr(
-                    initializer=Constant(value=bias_init_value))))
-
-        conv_reg_name = "gfl_head_reg"
-        self.gfl_head_reg = self.add_sublayer(
-            conv_reg_name,
-            nn.Conv2D(
-                in_channels=self.feat_in_chan,
-                out_channels=4 * (self.reg_max + 1),
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                weight_attr=ParamAttr(initializer=Normal(
-                    mean=0., std=0.01)),
-                bias_attr=ParamAttr(initializer=Constant(value=0))))
-
-        self.scales_regs = []
-        for i in range(len(self.fpn_stride)):
-            lvl = int(math.log(int(self.fpn_stride[i]), 2))
-            feat_name = 'p{}_feat'.format(lvl)
-            scale_reg = self.add_sublayer(feat_name, ScaleReg())
-            self.scales_regs.append(scale_reg)
-
-        self.distribution_project = Integral(self.reg_max)
-
-    def forward(self, fpn_feats):
-        assert len(fpn_feats) == len(
-            self.fpn_stride
-        ), "The size of fpn_feats is not equal to size of fpn_stride"
-        cls_logits_list = []
-        bboxes_reg_list = []
-        for scale_reg, fpn_feat in zip(self.scales_regs, fpn_feats):
-            conv_cls_feat, conv_reg_feat = self.conv_feat(fpn_feat)
-            cls_logits = self.gfl_head_cls(conv_cls_feat)
-            bbox_reg = scale_reg(self.gfl_head_reg(conv_reg_feat))
-            if self.dgqp_module:
-                quality_score = self.dgqp_module(bbox_reg)
-                cls_logits = F.sigmoid(cls_logits) * quality_score
-            cls_logits_list.append(cls_logits)
-            bboxes_reg_list.append(bbox_reg)
-
-        return (cls_logits_list, bboxes_reg_list)
-
-    def _images_to_levels(self, target, num_level_anchors):
-        """
-        Convert targets by image to targets by feature level.
-        """
-        level_targets = []
-        start = 0
-        for n in num_level_anchors:
-            end = start + n
-            level_targets.append(target[:, start:end].squeeze(0))
-            start = end
-        return level_targets
-
-    def _grid_cells_to_center(self, grid_cells):
-        """
-        Get center location of each gird cell
-        Args:
-            grid_cells: grid cells of a feature map
-        Returns:
-            center points
-        """
-        cells_cx = (grid_cells[:, 2] + grid_cells[:, 0]) / 2
-        cells_cy = (grid_cells[:, 3] + grid_cells[:, 1]) / 2
-        return paddle.stack([cells_cx, cells_cy], axis=-1)
-
-    def get_loss(self, gfl_head_outs, gt_meta):
-        cls_logits, bboxes_reg = gfl_head_outs
-        num_level_anchors = [
-            featmap.shape[-2] * featmap.shape[-1] for featmap in cls_logits
-        ]
-        grid_cells_list = self._images_to_levels(gt_meta['grid_cells'],
-                                                 num_level_anchors)
-        labels_list = self._images_to_levels(gt_meta['labels'],
-                                             num_level_anchors)
-        label_weights_list = self._images_to_levels(gt_meta['label_weights'],
-                                                    num_level_anchors)
-        bbox_targets_list = self._images_to_levels(gt_meta['bbox_targets'],
-                                                   num_level_anchors)
-        num_total_pos = sum(gt_meta['pos_num'])
-
-        loss_bbox_list, loss_dfl_list, loss_qfl_list, avg_factor = [], [], [], []
-        for cls_score, bbox_pred, grid_cells, labels, label_weights, bbox_targets, stride in zip(
-                cls_logits, bboxes_reg, grid_cells_list, labels_list,
-                label_weights_list, bbox_targets_list, self.fpn_stride):
-            grid_cells = grid_cells.reshape([-1, 4])
-            cls_score = cls_score.transpose([0, 2, 3, 1]).reshape(
-                [-1, self.cls_out_channels])
-            bbox_pred = bbox_pred.transpose([0, 2, 3, 1]).reshape(
-                [-1, 4 * (self.reg_max + 1)])
-            bbox_targets = bbox_targets.reshape([-1, 4])
-            labels = labels.reshape([-1])
-            label_weights = label_weights.reshape([-1])
-
-            bg_class_ind = self.num_classes
-            pos_inds = paddle.nonzero(
-                paddle.logical_and((labels >= 0), (labels < bg_class_ind)),
-                as_tuple=False).squeeze(1)
-            score = np.zeros(labels.shape)
-            if len(pos_inds) > 0:
-                pos_bbox_targets = paddle.gather(
-                    bbox_targets, pos_inds, axis=0)
-                pos_bbox_pred = paddle.gather(bbox_pred, pos_inds, axis=0)
-                pos_grid_cells = paddle.gather(grid_cells, pos_inds, axis=0)
-                pos_grid_cell_centers = self._grid_cells_to_center(
-                    pos_grid_cells) / stride
-
-                weight_targets = F.sigmoid(cls_score.detach())
-                weight_targets = paddle.gather(
-                    weight_targets.max(axis=1), pos_inds, axis=0)
-                pos_bbox_pred_corners = self.distribution_project(
-                    pos_bbox_pred)
-                pos_decode_bbox_pred = distance2bbox(pos_grid_cell_centers,
-                                                     pos_bbox_pred_corners)
-                pos_decode_bbox_targets = pos_bbox_targets / stride
-                bbox_iou = bbox_overlaps(
-                    pos_decode_bbox_pred.detach().numpy(),
-                    pos_decode_bbox_targets.detach().numpy(),
-                    is_aligned=True)
-                score[pos_inds.numpy()] = bbox_iou
-                pred_corners = pos_bbox_pred.reshape([-1, self.reg_max + 1])
-                target_corners = bbox2distance(pos_grid_cell_centers,
-                                               pos_decode_bbox_targets,
-                                               self.reg_max).reshape([-1])
-                # regression loss
-                loss_bbox = paddle.sum(
-                    self.loss_bbox(pos_decode_bbox_pred,
-                                   pos_decode_bbox_targets) *
-                    weight_targets.mean(axis=-1))
-
-                # dfl loss
-                loss_dfl = self.loss_dfl(
-                    pred_corners,
-                    target_corners,
-                    weight=weight_targets.unsqueeze(-1).expand(
-                        [-1, 4]).reshape([-1]),
-                    avg_factor=4.0)
-            else:
-                loss_bbox = bbox_pred.sum() * 0
-                loss_dfl = bbox_pred.sum() * 0
-                weight_targets = paddle.to_tensor([0])
-
-            # qfl loss
-            score = paddle.to_tensor(score)
-            loss_qfl = self.loss_qfl(
-                cls_score, (labels, score),
-                weight=label_weights,
-                avg_factor=num_total_pos)
-            loss_bbox_list.append(loss_bbox)
-            loss_dfl_list.append(loss_dfl)
-            loss_qfl_list.append(loss_qfl)
-            avg_factor.append(weight_targets.sum())
-
-        avg_factor = sum(avg_factor)
-        if avg_factor <= 0:
-            loss_qfl = paddle.to_tensor(
-                0, dtype='float32', stop_gradient=False)
-            loss_bbox = paddle.to_tensor(
-                0, dtype='float32', stop_gradient=False)
-            loss_dfl = paddle.to_tensor(
-                0, dtype='float32', stop_gradient=False)
-        else:
-            losses_bbox = list(map(lambda x: x / avg_factor, loss_bbox_list))
-            losses_dfl = list(map(lambda x: x / avg_factor, loss_dfl_list))
-            loss_qfl = sum(loss_qfl_list)
-            loss_bbox = sum(losses_bbox)
-            loss_dfl = sum(losses_dfl)
-
-        loss_states = dict(
-            loss_qfl=loss_qfl, loss_bbox=loss_bbox, loss_dfl=loss_dfl)
-
-        return loss_states
-
-    def get_single_level_center_point(self,
-                                      featmap_size,
-                                      stride,
-                                      cell_offset=0):
-        """
-        Generate pixel centers of a single stage feature map.
-        Args:
-            featmap_size: height and width of the feature map
-            stride: down sample stride of the feature map
-        Returns:
-            y and x of the center points
-        """
-        h, w = featmap_size
-        x_range = (paddle.arange(w, dtype='float32') + cell_offset) * stride
-        y_range = (paddle.arange(h, dtype='float32') + cell_offset) * stride
-        y, x = paddle.meshgrid(y_range, x_range)
-        y = y.flatten()
-        x = x.flatten()
-        return y, x
-
-    def get_bboxes_single(self,
-                          cls_scores,
-                          bbox_preds,
-                          img_shape,
-                          scale_factor,
-                          rescale=True,
-                          cell_offset=0):
-        assert len(cls_scores) == len(bbox_preds)
-        mlvl_bboxes = []
-        mlvl_scores = []
-        for stride, cls_score, bbox_pred in zip(self.fpn_stride, cls_scores,
-                                                bbox_preds):
-            featmap_size = cls_score.shape[-2:]
-            y, x = self.get_single_level_center_point(
-                featmap_size, stride, cell_offset=cell_offset)
-            center_points = paddle.stack([x, y], axis=-1)
-            scores = F.sigmoid(
-                cls_score.transpose([1, 2, 0]).reshape(
-                    [-1, self.cls_out_channels]))
-            bbox_pred = bbox_pred.transpose([1, 2, 0])
-            bbox_pred = self.distribution_project(bbox_pred) * stride
-
-            if scores.shape[0] > self.nms_pre:
-                max_scores = scores.max(axis=1)
-                _, topk_inds = max_scores.topk(self.nms_pre)
-                center_points = center_points.gather(topk_inds)
-                bbox_pred = bbox_pred.gather(topk_inds)
-                scores = scores.gather(topk_inds)
-
-            bboxes = distance2bbox(
-                center_points, bbox_pred, max_shape=img_shape)
-            mlvl_bboxes.append(bboxes)
-            mlvl_scores.append(scores)
-        mlvl_bboxes = paddle.concat(mlvl_bboxes)
-        if rescale:
-            # [h_scale, w_scale] to [w_scale, h_scale, w_scale, h_scale]
-            im_scale = paddle.concat([scale_factor[::-1], scale_factor[::-1]])
-            mlvl_bboxes /= im_scale
-        mlvl_scores = paddle.concat(mlvl_scores)
-        if self.use_sigmoid:
-            # add a dummy background class to the backend when use_sigmoid
-            padding = paddle.zeros([mlvl_scores.shape[0], 1])
-            mlvl_scores = paddle.concat([mlvl_scores, padding], axis=1)
-        mlvl_scores = mlvl_scores.transpose([1, 0])
-        return mlvl_bboxes, mlvl_scores
-
-    def decode(self, cls_scores, bbox_preds, im_shape, scale_factor,
-               cell_offset):
-        batch_bboxes = []
-        batch_scores = []
-        for img_id in range(cls_scores[0].shape[0]):
-            num_levels = len(cls_scores)
-            cls_score_list = [cls_scores[i][img_id] for i in range(num_levels)]
-            bbox_pred_list = [bbox_preds[i][img_id] for i in range(num_levels)]
-            bboxes, scores = self.get_bboxes_single(
-                cls_score_list,
-                bbox_pred_list,
-                im_shape[img_id],
-                scale_factor[img_id],
-                cell_offset=cell_offset)
-            batch_bboxes.append(bboxes)
-            batch_scores.append(scores)
-        batch_bboxes = paddle.stack(batch_bboxes, axis=0)
-        batch_scores = paddle.stack(batch_scores, axis=0)
-
-        return batch_bboxes, batch_scores
-
-    def post_process(self, gfl_head_outs, im_shape, scale_factor):
-        cls_scores, bboxes_reg = gfl_head_outs
-        bboxes, score = self.decode(cls_scores, bboxes_reg, im_shape,
-                                    scale_factor, self.cell_offset)
-        bbox_pred, bbox_num, _ = self.nms(bboxes, score)
-        return bbox_pred, bbox_num

+ 0 - 329
paddlex/ppdet/modeling/heads/pico_head.py

@@ -1,329 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import numpy as np
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle import ParamAttr
-from paddle.nn.initializer import Normal, Constant
-
-from paddlex.ppdet.core.workspace import register
-from paddlex.ppdet.modeling.layers import ConvNormLayer
-from paddlex.ppdet.modeling.bbox_utils import distance2bbox, bbox2distance
-from paddlex.ppdet.data.transform.atss_assigner import bbox_overlaps
-from .gfl_head import GFLHead
-
-
-@register
-class PicoFeat(nn.Layer):
-    """
-    PicoFeat of PicoDet
-
-    Args:
-        feat_in (int): The channel number of input Tensor.
-        feat_out (int): The channel number of output Tensor.
-        num_convs (int): The convolution number of the LiteGFLFeat.
-        norm_type (str): Normalization type, 'bn'/'sync_bn'/'gn'.
-    """
-
-    def __init__(self,
-                 feat_in=256,
-                 feat_out=96,
-                 num_fpn_stride=3,
-                 num_convs=2,
-                 norm_type='bn',
-                 share_cls_reg=False):
-        super(PicoFeat, self).__init__()
-        self.num_convs = num_convs
-        self.norm_type = norm_type
-        self.share_cls_reg = share_cls_reg
-        self.cls_convs = []
-        self.reg_convs = []
-        for stage_idx in range(num_fpn_stride):
-            cls_subnet_convs = []
-            reg_subnet_convs = []
-            for i in range(self.num_convs):
-                in_c = feat_in if i == 0 else feat_out
-                cls_conv_dw = self.add_sublayer(
-                    'cls_conv_dw{}.{}'.format(stage_idx, i),
-                    ConvNormLayer(
-                        ch_in=in_c,
-                        ch_out=feat_out,
-                        filter_size=3,
-                        stride=1,
-                        groups=feat_out,
-                        norm_type=norm_type,
-                        bias_on=False,
-                        lr_scale=2.))
-                cls_subnet_convs.append(cls_conv_dw)
-                cls_conv_pw = self.add_sublayer(
-                    'cls_conv_pw{}.{}'.format(stage_idx, i),
-                    ConvNormLayer(
-                        ch_in=in_c,
-                        ch_out=feat_out,
-                        filter_size=1,
-                        stride=1,
-                        norm_type=norm_type,
-                        bias_on=False,
-                        lr_scale=2.))
-                cls_subnet_convs.append(cls_conv_pw)
-
-                if not self.share_cls_reg:
-                    reg_conv_dw = self.add_sublayer(
-                        'reg_conv_dw{}.{}'.format(stage_idx, i),
-                        ConvNormLayer(
-                            ch_in=in_c,
-                            ch_out=feat_out,
-                            filter_size=3,
-                            stride=1,
-                            groups=feat_out,
-                            norm_type=norm_type,
-                            bias_on=False,
-                            lr_scale=2.))
-                    reg_subnet_convs.append(reg_conv_dw)
-                    reg_conv_pw = self.add_sublayer(
-                        'reg_conv_pw{}.{}'.format(stage_idx, i),
-                        ConvNormLayer(
-                            ch_in=in_c,
-                            ch_out=feat_out,
-                            filter_size=1,
-                            stride=1,
-                            norm_type=norm_type,
-                            bias_on=False,
-                            lr_scale=2.))
-                    reg_subnet_convs.append(reg_conv_pw)
-            self.cls_convs.append(cls_subnet_convs)
-            self.reg_convs.append(reg_subnet_convs)
-
-    def forward(self, fpn_feat, stage_idx):
-        assert stage_idx < len(self.cls_convs)
-        cls_feat = fpn_feat
-        reg_feat = fpn_feat
-        for i in range(len(self.cls_convs[stage_idx])):
-            cls_feat = F.leaky_relu(self.cls_convs[stage_idx][i](cls_feat),
-                                    0.1)
-            if not self.share_cls_reg:
-                reg_feat = F.leaky_relu(self.reg_convs[stage_idx][i](reg_feat),
-                                        0.1)
-        return cls_feat, reg_feat
-
-
-@register
-class PicoHead(GFLHead):
-    """
-    PicoHead
-    Args:
-        conv_feat (object): Instance of 'LiteGFLFeat'
-        num_classes (int): Number of classes
-        fpn_stride (list): The stride of each FPN Layer
-        prior_prob (float): Used to set the bias init for the class prediction layer
-        loss_qfl (object):
-        loss_dfl (object):
-        loss_bbox (object):
-        reg_max: Max value of integral set :math: `{0, ..., reg_max}`
-                n QFL setting. Default: 16.
-    """
-    __inject__ = [
-        'conv_feat', 'dgqp_module', 'loss_qfl', 'loss_dfl', 'loss_bbox', 'nms'
-    ]
-    __shared__ = ['num_classes']
-
-    def __init__(self,
-                 conv_feat='PicoFeat',
-                 dgqp_module=None,
-                 num_classes=80,
-                 fpn_stride=[8, 16, 32],
-                 prior_prob=0.01,
-                 loss_qfl='QualityFocalLoss',
-                 loss_dfl='DistributionFocalLoss',
-                 loss_bbox='GIoULoss',
-                 reg_max=16,
-                 feat_in_chan=96,
-                 nms=None,
-                 nms_pre=1000,
-                 cell_offset=0):
-        super(PicoHead, self).__init__(
-            conv_feat=conv_feat,
-            dgqp_module=dgqp_module,
-            num_classes=num_classes,
-            fpn_stride=fpn_stride,
-            prior_prob=prior_prob,
-            loss_qfl=loss_qfl,
-            loss_dfl=loss_dfl,
-            loss_bbox=loss_bbox,
-            reg_max=reg_max,
-            feat_in_chan=feat_in_chan,
-            nms=nms,
-            nms_pre=nms_pre,
-            cell_offset=cell_offset)
-        self.conv_feat = conv_feat
-        self.num_classes = num_classes
-        self.fpn_stride = fpn_stride
-        self.prior_prob = prior_prob
-        self.loss_qfl = loss_qfl
-        self.loss_dfl = loss_dfl
-        self.loss_bbox = loss_bbox
-        self.reg_max = reg_max
-        self.feat_in_chan = feat_in_chan
-        self.nms = nms
-        self.nms_pre = nms_pre
-        self.cell_offset = cell_offset
-        self.use_sigmoid = self.loss_qfl.use_sigmoid
-        if self.use_sigmoid:
-            self.cls_out_channels = self.num_classes
-        else:
-            self.cls_out_channels = self.num_classes + 1
-        bias_init_value = -math.log((1 - self.prior_prob) / self.prior_prob)
-        # Clear the super class initialization
-        self.gfl_head_cls = None
-        self.gfl_head_reg = None
-        self.scales_regs = None
-
-        self.head_cls_list = []
-        self.head_reg_list = []
-        for i in range(len(fpn_stride)):
-            head_cls = self.add_sublayer(
-                "head_cls" + str(i),
-                nn.Conv2D(
-                    in_channels=self.feat_in_chan,
-                    out_channels=self.cls_out_channels + 4 * (self.reg_max + 1)
-                    if self.conv_feat.share_cls_reg else self.cls_out_channels,
-                    kernel_size=1,
-                    stride=1,
-                    padding=0,
-                    weight_attr=ParamAttr(initializer=Normal(
-                        mean=0., std=0.01)),
-                    bias_attr=ParamAttr(
-                        initializer=Constant(value=bias_init_value))))
-            self.head_cls_list.append(head_cls)
-            if not self.conv_feat.share_cls_reg:
-                head_reg = self.add_sublayer(
-                    "head_reg" + str(i),
-                    nn.Conv2D(
-                        in_channels=self.feat_in_chan,
-                        out_channels=4 * (self.reg_max + 1),
-                        kernel_size=1,
-                        stride=1,
-                        padding=0,
-                        weight_attr=ParamAttr(initializer=Normal(
-                            mean=0., std=0.01)),
-                        bias_attr=ParamAttr(initializer=Constant(value=0))))
-                self.head_reg_list.append(head_reg)
-
-    def forward(self, fpn_feats):
-        assert len(fpn_feats) == len(
-            self.fpn_stride
-        ), "The size of fpn_feats is not equal to size of fpn_stride"
-        cls_logits_list = []
-        bboxes_reg_list = []
-        for i, fpn_feat in enumerate(fpn_feats):
-            conv_cls_feat, conv_reg_feat = self.conv_feat(fpn_feat, i)
-            if self.conv_feat.share_cls_reg:
-                cls_logits = self.head_cls_list[i](conv_cls_feat)
-                cls_score, bbox_pred = paddle.split(
-                    cls_logits,
-                    [self.cls_out_channels, 4 * (self.reg_max + 1)],
-                    axis=1)
-            else:
-                cls_score = self.head_cls_list[i](conv_cls_feat)
-                bbox_pred = self.head_reg_list[i](conv_reg_feat)
-            if self.dgqp_module:
-                quality_score = self.dgqp_module(bbox_pred)
-                cls_score = F.sigmoid(cls_score) * quality_score
-
-            if not self.training:
-                cls_score = F.sigmoid(cls_score.transpose([0, 2, 3, 1]))
-                bbox_pred = self.distribution_project(
-                    bbox_pred.transpose([0, 2, 3, 1])) * self.fpn_stride[i]
-
-            cls_logits_list.append(cls_score)
-            bboxes_reg_list.append(bbox_pred)
-
-        return (cls_logits_list, bboxes_reg_list)
-
-    def get_bboxes_single(self,
-                          cls_scores,
-                          bbox_preds,
-                          img_shape,
-                          scale_factor,
-                          rescale=True,
-                          cell_offset=0):
-        assert len(cls_scores) == len(bbox_preds)
-        mlvl_bboxes = []
-        mlvl_scores = []
-        for stride, cls_score, bbox_pred in zip(self.fpn_stride, cls_scores,
-                                                bbox_preds):
-            featmap_size = cls_score.shape[0:2]
-            y, x = self.get_single_level_center_point(
-                featmap_size, stride, cell_offset=cell_offset)
-            center_points = paddle.stack([x, y], axis=-1)
-            scores = cls_score.reshape([-1, self.cls_out_channels])
-
-            if scores.shape[0] > self.nms_pre:
-                max_scores = scores.max(axis=1)
-                _, topk_inds = max_scores.topk(self.nms_pre)
-                center_points = center_points.gather(topk_inds)
-                bbox_pred = bbox_pred.gather(topk_inds)
-                scores = scores.gather(topk_inds)
-
-            bboxes = distance2bbox(
-                center_points, bbox_pred, max_shape=img_shape)
-            mlvl_bboxes.append(bboxes)
-            mlvl_scores.append(scores)
-        mlvl_bboxes = paddle.concat(mlvl_bboxes)
-        if rescale:
-            # [h_scale, w_scale] to [w_scale, h_scale, w_scale, h_scale]
-            im_scale = paddle.concat([scale_factor[::-1], scale_factor[::-1]])
-            mlvl_bboxes /= im_scale
-        mlvl_scores = paddle.concat(mlvl_scores)
-        mlvl_scores = mlvl_scores.transpose([1, 0])
-        return mlvl_bboxes, mlvl_scores
-
-    def decode(self, cls_scores, bbox_preds, im_shape, scale_factor,
-               cell_offset):
-        batch_bboxes = []
-        batch_scores = []
-        batch_size = cls_scores[0].shape[0]
-        for img_id in range(batch_size):
-            num_levels = len(cls_scores)
-            cls_score_list = [cls_scores[i][img_id] for i in range(num_levels)]
-            bbox_pred_list = [
-                bbox_preds[i].reshape([batch_size, -1, 4])[img_id]
-                for i in range(num_levels)
-            ]
-            bboxes, scores = self.get_bboxes_single(
-                cls_score_list,
-                bbox_pred_list,
-                im_shape[img_id],
-                scale_factor[img_id],
-                cell_offset=cell_offset)
-            batch_bboxes.append(bboxes)
-            batch_scores.append(scores)
-        batch_bboxes = paddle.stack(batch_bboxes, axis=0)
-        batch_scores = paddle.stack(batch_scores, axis=0)
-
-        return batch_bboxes, batch_scores
-
-    def post_process(self, gfl_head_outs, im_shape, scale_factor):
-        cls_scores, bboxes_reg = gfl_head_outs
-        bboxes, score = self.decode(cls_scores, bboxes_reg, im_shape,
-                                    scale_factor, self.cell_offset)
-        bbox_pred, bbox_num, _ = self.nms(bboxes, score)
-        return bbox_pred, bbox_num

+ 0 - 2
paddlex/ppdet/modeling/losses/__init__.py

@@ -22,7 +22,6 @@ from . import ctfocal_loss
 from . import keypoint_loss
 from . import jde_loss
 from . import fairmot_loss
-from . import gfocal_loss
 from . import detr_loss
 from . import sparsercnn_loss
 
@@ -36,6 +35,5 @@ from .ctfocal_loss import *
 from .keypoint_loss import *
 from .jde_loss import *
 from .fairmot_loss import *
-from .gfocal_loss import *
 from .detr_loss import *
 from .sparsercnn_loss import *

+ 0 - 214
paddlex/ppdet/modeling/losses/gfocal_loss.py

@@ -1,214 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import numpy as np
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddlex.ppdet.core.workspace import register, serializable
-from paddlex.ppdet.modeling import ops
-
-__all__ = ['QualityFocalLoss', 'DistributionFocalLoss']
-
-
-def quality_focal_loss(pred, target, beta=2.0, use_sigmoid=True):
-    """
-    Quality Focal Loss (QFL) is from `Generalized Focal Loss: Learning
-    Qualified and Distributed Bounding Boxes for Dense Object Detection
-    <https://arxiv.org/abs/2006.04388>`_.
-    Args:
-        pred (Tensor): Predicted joint representation of classification
-            and quality (IoU) estimation with shape (N, C), C is the number of
-            classes.
-        target (tuple([Tensor])): Target category label with shape (N,)
-            and target quality label with shape (N,).
-        beta (float): The beta parameter for calculating the modulating factor.
-            Defaults to 2.0.
-    Returns:
-        Tensor: Loss tensor with shape (N,).
-    """
-    assert len(target) == 2, """target for QFL must be a tuple of two elements,
-        including category label and quality label, respectively"""
-    # label denotes the category id, score denotes the quality score
-    label, score = target
-    if use_sigmoid:
-        func = F.binary_cross_entropy_with_logits
-    else:
-        func = F.binary_cross_entropy
-
-    # negatives are supervised by 0 quality score
-    pred_sigmoid = F.sigmoid(pred) if use_sigmoid else pred
-    scale_factor = pred_sigmoid
-    zerolabel = paddle.zeros(pred.shape, dtype='float32')
-    loss = func(pred, zerolabel, reduction='none') * scale_factor.pow(beta)
-
-    # FG cat_id: [0, num_classes -1], BG cat_id: num_classes
-    bg_class_ind = pred.shape[1]
-    pos = paddle.logical_and((label >= 0),
-                             (label < bg_class_ind)).nonzero().squeeze(1)
-    if pos.shape[0] == 0:
-        return loss.sum(axis=1)
-    pos_label = paddle.gather(label, pos, axis=0)
-    pos_mask = np.zeros(pred.shape, dtype=np.int32)
-    pos_mask[pos.numpy(), pos_label.numpy()] = 1
-    pos_mask = paddle.to_tensor(pos_mask, dtype='bool')
-    score = score.unsqueeze(-1).expand([-1, pred.shape[1]]).cast('float32')
-    # positives are supervised by bbox quality (IoU) score
-    scale_factor_new = score - pred_sigmoid
-
-    loss_pos = func(
-        pred, score, reduction='none') * scale_factor_new.abs().pow(beta)
-    loss = loss * paddle.logical_not(pos_mask) + loss_pos * pos_mask
-    loss = loss.sum(axis=1)
-    return loss
-
-
-def distribution_focal_loss(pred, label):
-    """Distribution Focal Loss (DFL) is from `Generalized Focal Loss: Learning
-    Qualified and Distributed Bounding Boxes for Dense Object Detection
-    <https://arxiv.org/abs/2006.04388>`_.
-    Args:
-        pred (Tensor): Predicted general distribution of bounding boxes
-            (before softmax) with shape (N, n+1), n is the max value of the
-            integral set `{0, ..., n}` in paper.
-        label (Tensor): Target distance label for bounding boxes with
-            shape (N,).
-    Returns:
-        Tensor: Loss tensor with shape (N,).
-    """
-    dis_left = label.cast('int64')
-    dis_right = dis_left + 1
-    weight_left = dis_right.cast('float32') - label
-    weight_right = label - dis_left.cast('float32')
-    loss = F.cross_entropy(pred, dis_left, reduction='none') * weight_left \
-        + F.cross_entropy(pred, dis_right, reduction='none') * weight_right
-    return loss
-
-
-@register
-@serializable
-class QualityFocalLoss(nn.Layer):
-    r"""Quality Focal Loss (QFL) is a variant of `Generalized Focal Loss:
-    Learning Qualified and Distributed Bounding Boxes for Dense Object
-    Detection <https://arxiv.org/abs/2006.04388>`_.
-    Args:
-        use_sigmoid (bool): Whether sigmoid operation is conducted in QFL.
-            Defaults to True.
-        beta (float): The beta parameter for calculating the modulating factor.
-            Defaults to 2.0.
-        reduction (str): Options are "none", "mean" and "sum".
-        loss_weight (float): Loss weight of current loss.
-    """
-
-    def __init__(self,
-                 use_sigmoid=True,
-                 beta=2.0,
-                 reduction='mean',
-                 loss_weight=1.0):
-        super(QualityFocalLoss, self).__init__()
-        self.use_sigmoid = use_sigmoid
-        self.beta = beta
-        assert reduction in ('none', 'mean', 'sum')
-        self.reduction = reduction
-        self.loss_weight = loss_weight
-
-    def forward(self, pred, target, weight=None, avg_factor=None):
-        """Forward function.
-        Args:
-            pred (Tensor): Predicted joint representation of
-                classification and quality (IoU) estimation with shape (N, C),
-                C is the number of classes.
-            target (tuple([Tensor])): Target category label with shape
-                (N,) and target quality label with shape (N,).
-            weight (Tensor, optional): The weight of loss for each
-                prediction. Defaults to None.
-            avg_factor (int, optional): Average factor that is used to average
-                the loss. Defaults to None.
-        """
-
-        loss = self.loss_weight * quality_focal_loss(
-            pred, target, beta=self.beta, use_sigmoid=self.use_sigmoid)
-
-        if weight is not None:
-            loss = loss * weight
-        if avg_factor is None:
-            if self.reduction == 'none':
-                return loss
-            elif self.reduction == 'mean':
-                return loss.mean()
-            elif self.reduction == 'sum':
-                return loss.sum()
-        else:
-            # if reduction is mean, then average the loss by avg_factor
-            if self.reduction == 'mean':
-                loss = loss.sum() / avg_factor
-            # if reduction is 'none', then do nothing, otherwise raise an error
-            elif self.reduction != 'none':
-                raise ValueError(
-                    'avg_factor can not be used with reduction="sum"')
-        return loss
-
-
-@register
-@serializable
-class DistributionFocalLoss(nn.Layer):
-    """Distribution Focal Loss (DFL) is a variant of `Generalized Focal Loss:
-    Learning Qualified and Distributed Bounding Boxes for Dense Object
-    Detection <https://arxiv.org/abs/2006.04388>`_.
-    Args:
-        reduction (str): Options are `'none'`, `'mean'` and `'sum'`.
-        loss_weight (float): Loss weight of current loss.
-    """
-
-    def __init__(self, reduction='mean', loss_weight=1.0):
-        super(DistributionFocalLoss, self).__init__()
-        assert reduction in ('none', 'mean', 'sum')
-        self.reduction = reduction
-        self.loss_weight = loss_weight
-
-    def forward(self, pred, target, weight=None, avg_factor=None):
-        """Forward function.
-        Args:
-            pred (Tensor): Predicted general distribution of bounding
-                boxes (before softmax) with shape (N, n+1), n is the max value
-                of the integral set `{0, ..., n}` in paper.
-            target (Tensor): Target distance label for bounding boxes
-                with shape (N,).
-            weight (Tensor, optional): The weight of loss for each
-                prediction. Defaults to None.
-            avg_factor (int, optional): Average factor that is used to average
-                the loss. Defaults to None.
-        """
-        loss = self.loss_weight * distribution_focal_loss(pred, target)
-        if weight is not None:
-            loss = loss * weight
-        if avg_factor is None:
-            if self.reduction == 'none':
-                return loss
-            elif self.reduction == 'mean':
-                return loss.mean()
-            elif self.reduction == 'sum':
-                return loss.sum()
-        else:
-            # if reduction is mean, then average the loss by avg_factor
-            if self.reduction == 'mean':
-                loss = loss.sum() / avg_factor
-            # if reduction is 'none', then do nothing, otherwise raise an error
-            elif self.reduction != 'none':
-                raise ValueError(
-                    'avg_factor can not be used with reduction="sum"')
-        return loss

+ 4 - 5
paddlex/ppdet/modeling/losses/keypoint_loss.py

@@ -29,7 +29,7 @@ __all__ = ['HrHRNetLoss', 'KeyPointMSELoss']
 @register
 @serializable
 class KeyPointMSELoss(nn.Layer):
-    def __init__(self, use_target_weight=True, loss_scale=0.5):
+    def __init__(self, use_target_weight=True):
         """
         KeyPointMSELoss layer
 
@@ -39,7 +39,6 @@ class KeyPointMSELoss(nn.Layer):
         super(KeyPointMSELoss, self).__init__()
         self.criterion = nn.MSELoss(reduction='mean')
         self.use_target_weight = use_target_weight
-        self.loss_scale = loss_scale
 
     def forward(self, output, records):
         target = records['target']
@@ -51,16 +50,16 @@ class KeyPointMSELoss(nn.Layer):
         heatmaps_gt = target.reshape(
             (batch_size, num_joints, -1)).split(num_joints, 1)
         loss = 0
+
         for idx in range(num_joints):
             heatmap_pred = heatmaps_pred[idx].squeeze()
             heatmap_gt = heatmaps_gt[idx].squeeze()
             if self.use_target_weight:
-                loss += self.loss_scale * self.criterion(
+                loss += 0.5 * self.criterion(
                     heatmap_pred.multiply(target_weight[:, idx]),
                     heatmap_gt.multiply(target_weight[:, idx]))
             else:
-                loss += self.loss_scale * self.criterion(heatmap_pred,
-                                                         heatmap_gt)
+                loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt)
         keypoint_losses = dict()
         keypoint_losses['loss'] = loss / num_joints
         return keypoint_losses

+ 0 - 2
paddlex/ppdet/modeling/necks/__init__.py

@@ -17,7 +17,6 @@ from . import yolo_fpn
 from . import hrfpn
 from . import ttf_fpn
 from . import centernet_fpn
-from . import pan
 
 from .fpn import *
 from .yolo_fpn import *
@@ -25,4 +24,3 @@ from .hrfpn import *
 from .ttf_fpn import *
 from .centernet_fpn import *
 from .blazeface_fpn import *
-from .pan import *

+ 14 - 3
paddlex/ppdet/modeling/necks/blazeface_fpn.py

@@ -51,14 +51,25 @@ class ConvBNLayer(nn.Layer):
             padding=padding,
             groups=num_groups,
             weight_attr=ParamAttr(
-                learning_rate=conv_lr, initializer=KaimingNormal()),
+                learning_rate=conv_lr,
+                initializer=KaimingNormal(),
+                name=name + "_weights"),
             bias_attr=False)
 
+        param_attr = ParamAttr(name=name + "_bn_scale")
+        bias_attr = ParamAttr(name=name + "_bn_offset")
         if norm_type == 'sync_bn':
-            self._batch_norm = nn.SyncBatchNorm(out_channels)
+            self._batch_norm = nn.SyncBatchNorm(
+                out_channels, weight_attr=param_attr, bias_attr=bias_attr)
         else:
             self._batch_norm = nn.BatchNorm(
-                out_channels, act=None, use_global_stats=False)
+                out_channels,
+                act=None,
+                param_attr=param_attr,
+                bias_attr=bias_attr,
+                use_global_stats=False,
+                moving_mean_name=name + '_bn_mean',
+                moving_variance_name=name + '_bn_variance')
 
     def forward(self, x):
         x = self._conv(x)

+ 4 - 0
paddlex/ppdet/modeling/necks/hrfpn.py

@@ -14,6 +14,7 @@
 
 import paddle
 import paddle.nn.functional as F
+from paddle import ParamAttr
 import paddle.nn as nn
 from paddlex.ppdet.core.workspace import register
 from ..shape_spec import ShapeSpec
@@ -52,6 +53,7 @@ class HRFPN(nn.Layer):
             in_channels=in_channel,
             out_channels=out_channel,
             kernel_size=1,
+            weight_attr=ParamAttr(name='hrfpn_reduction_weights'),
             bias_attr=False)
 
         if share_conv:
@@ -60,6 +62,7 @@ class HRFPN(nn.Layer):
                 out_channels=out_channel,
                 kernel_size=3,
                 padding=1,
+                weight_attr=ParamAttr(name='fpn_conv_weights'),
                 bias_attr=False)
         else:
             self.fpn_conv = []
@@ -72,6 +75,7 @@ class HRFPN(nn.Layer):
                         out_channels=out_channel,
                         kernel_size=3,
                         padding=1,
+                        weight_attr=ParamAttr(name=conv_name + "_weights"),
                         bias_attr=False))
                 self.fpn_conv.append(conv)
 

+ 0 - 135
paddlex/ppdet/modeling/necks/pan.py

@@ -1,135 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle import ParamAttr
-from paddle.nn.initializer import XavierUniform
-from paddle.regularizer import L2Decay
-from paddlex.ppdet.core.workspace import register, serializable
-from paddlex.ppdet.modeling.layers import ConvNormLayer
-from ..shape_spec import ShapeSpec
-
-__all__ = ['PAN']
-
-
-@register
-@serializable
-class PAN(nn.Layer):
-    """
-    Path Aggregation Network, see https://arxiv.org/abs/1803.01534
-
-    Args:
-        in_channels (list[int]): input channels of each level which can be
-            derived from the output shape of backbone by from_config
-        out_channel (list[int]): output channel of each level
-        spatial_scales (list[float]): the spatial scales between input feature
-            maps and original input image which can be derived from the output
-            shape of backbone by from_config
-        has_extra_convs (bool): whether to add extra conv to the last level.
-            default False
-        extra_stage (int): the number of extra stages added to the last level.
-            default 1
-        use_c5 (bool): Whether to use c5 as the input of extra stage,
-            otherwise p5 is used. default True
-        norm_type (string|None): The normalization type in FPN module. If
-            norm_type is None, norm will not be used after conv and if
-            norm_type is string, bn, gn, sync_bn are available. default None
-        norm_decay (float): weight decay for normalization layer weights.
-            default 0.
-        freeze_norm (bool): whether to freeze normalization layer.
-            default False
-        relu_before_extra_convs (bool): whether to add relu before extra convs.
-            default False
-    """
-
-    def __init__(self,
-                 in_channels,
-                 out_channel,
-                 spatial_scales=[0.125, 0.0625, 0.03125],
-                 start_level=0,
-                 end_level=-1,
-                 norm_type=None):
-        super(PAN, self).__init__()
-        self.out_channel = out_channel
-        self.num_ins = len(in_channels)
-        self.spatial_scales = spatial_scales
-        if end_level == -1:
-            self.end_level = self.num_ins
-        else:
-            # if end_level < inputs, no extra level is allowed
-            self.end_level = end_level
-            assert end_level <= len(in_channels)
-        self.start_level = start_level
-        self.norm_type = norm_type
-        self.lateral_convs = []
-
-        for i in range(self.start_level, self.end_level):
-            in_c = in_channels[i - self.start_level]
-            if self.norm_type is not None:
-                lateral = self.add_sublayer(
-                    'pan_lateral' + str(i),
-                    ConvNormLayer(
-                        ch_in=in_c,
-                        ch_out=self.out_channel,
-                        filter_size=1,
-                        stride=1,
-                        norm_type=self.norm_type,
-                        norm_decay=self.norm_decay,
-                        freeze_norm=self.freeze_norm,
-                        initializer=XavierUniform(fan_out=in_c)))
-            else:
-                lateral = self.add_sublayer(
-                    'pan_lateral' + str(i),
-                    nn.Conv2D(
-                        in_channels=in_c,
-                        out_channels=self.out_channel,
-                        kernel_size=1,
-                        weight_attr=ParamAttr(
-                            initializer=XavierUniform(fan_out=in_c))))
-            self.lateral_convs.append(lateral)
-
-    @classmethod
-    def from_config(cls, cfg, input_shape):
-        return {'in_channels': [i.channels for i in input_shape], }
-
-    def forward(self, body_feats):
-        laterals = []
-        for i, lateral_conv in enumerate(self.lateral_convs):
-            laterals.append(lateral_conv(body_feats[i + self.start_level]))
-        num_levels = len(laterals)
-        for i in range(1, num_levels):
-            lvl = num_levels - i
-            upsample = F.interpolate(
-                laterals[lvl],
-                scale_factor=2.,
-                mode='bilinear', )
-            laterals[lvl - 1] += upsample
-
-        outs = [laterals[i] for i in range(num_levels)]
-        for i in range(0, num_levels - 1):
-            outs[i + 1] += F.interpolate(
-                outs[i], scale_factor=0.5, mode='bilinear')
-
-        return outs
-
-    @property
-    def out_shape(self):
-        return [
-            ShapeSpec(
-                channels=self.out_channel, stride=1. / s)
-            for s in self.spatial_scales
-        ]

+ 0 - 12
paddlex/ppdet/modeling/ops.py

@@ -1592,15 +1592,3 @@ def smooth_l1(input,
     out = paddle.reshape(out, shape=[out.shape[0], -1])
     out = paddle.sum(out, axis=1)
     return out
-
-
-def channel_shuffle(x, groups):
-    batch_size, num_channels, height, width = x.shape[0:4]
-    assert (num_channels % groups == 0,
-            'num_channels should be divisible by groups')
-    channels_per_group = num_channels // groups
-    x = paddle.reshape(
-        x=x, shape=[batch_size, groups, channels_per_group, height, width])
-    x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4])
-    x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width])
-    return x

+ 3 - 1
paddlex/ppdet/modeling/reid/jde_embedding_head.py

@@ -92,7 +92,9 @@ class JDEEmbeddingHead(nn.Layer):
                     kernel_size=3,
                     stride=1,
                     padding=1,
-                    bias_attr=ParamAttr(regularizer=L2Decay(0.))))
+                    weight_attr=ParamAttr(name=name + '.conv.weights'),
+                    bias_attr=ParamAttr(
+                        name=name + '.conv.bias', regularizer=L2Decay(0.))))
             self.identify_outputs.append(identify_output)
 
             loss_p_cls = self.add_sublayer('cls.{}'.format(i),

+ 7 - 3
paddlex/ppdet/modeling/reid/pyramidal_embedding.py

@@ -89,12 +89,16 @@ class PCBPyramid(nn.Layer):
             if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
                 idx_levels += 1
 
+            name = "Linear_branch_id_{}".format(idx_branches)
             fc = nn.Linear(
                 in_features=num_conv_out_channels,
                 out_features=self.num_classes,
-                weight_attr=ParamAttr(initializer=Normal(
-                    mean=0., std=0.001)),
-                bias_attr=ParamAttr(initializer=Constant(value=0.)))
+                weight_attr=ParamAttr(
+                    name=name + "_weights",
+                    initializer=Normal(
+                        mean=0., std=0.001)),
+                bias_attr=ParamAttr(
+                    name=name + "_bias", initializer=Constant(value=0.)))
             pyramid_fc_list.append(fc)
         return pyramid_conv_list, pyramid_fc_list
 

+ 12 - 2
paddlex/ppdet/modeling/reid/resnet.py

@@ -50,13 +50,23 @@ class ConvBNLayer(nn.Layer):
             dilation=dilation,
             groups=groups,
             weight_attr=ParamAttr(
+                name=name + "_weights",
                 learning_rate=lr_mult,
                 initializer=Normal(0, math.sqrt(2. / conv_stdv))),
             bias_attr=False,
             data_format=data_format)
-
+        if name == "conv1":
+            bn_name = "bn_" + name
+        else:
+            bn_name = "bn" + name[3:]
         self._batch_norm = nn.BatchNorm(
-            num_filters, act=act, data_layout=data_format)
+            num_filters,
+            act=act,
+            param_attr=ParamAttr(name=bn_name + "_scale"),
+            bias_attr=ParamAttr(bn_name + "_offset"),
+            moving_mean_name=bn_name + "_mean",
+            moving_variance_name=bn_name + "_variance",
+            data_layout=data_format)
 
     def forward(self, inputs):
         y = self._conv(inputs)

+ 0 - 10
paddlex/ppdet/modeling/tests/test_architectures.py

@@ -55,15 +55,5 @@ class TestSSD(TestFasterRCNN):
         self.cfg_file = 'configs/ssd/ssd_vgg16_300_240e_voc.yml'
 
 
-class TestGFL(TestFasterRCNN):
-    def set_config(self):
-        self.cfg_file = 'configs/gfl/gfl_r50_fpn_1x_coco.yml'
-
-
-class TestPicoDet(TestFasterRCNN):
-    def set_config(self):
-        self.cfg_file = 'configs/picodet/picodet_s_shufflenetv2_320_coco.yml'
-
-
 if __name__ == '__main__':
     unittest.main()

+ 2 - 5
paddlex/ppdet/utils/download.py

@@ -99,10 +99,7 @@ DATASETS = {
         'https://paddledet.bj.bcebos.com/data/spine_coco.tar',
         '7ed69ae73f842cd2a8cf4f58dc3c5535', ), ], ['annotations', 'images']),
     'mot': (),
-    'objects365': (),
-    'coco_ce': ([(
-        'https://paddledet.bj.bcebos.com/data/coco_ce.tar',
-        'eadd1b79bc2f069f2744b1dd4e0c0329', ), ], [])
+    'objects365': ()
 }
 
 DOWNLOAD_RETRY_LIMIT = 3
@@ -140,7 +137,7 @@ def get_config_path(url):
 
     # 2. get url
     try:
-        from ppdet import __version__ as version
+        from paddlex.ppdet import __version__ as version
     except ImportError:
         version = None
 

+ 9 - 0
paddlex_restful/restful/app.py

@@ -207,6 +207,9 @@ def get_image_file():
     data = request.get_json()
     if request.method == 'GET':
         ret = data['path']
+        assert os.path.abspath(ret).startswith(
+            os.path.abspath(SD.workspace_dir)
+        ) and ".." not in ret, "Illegal path {}.".format(ret)
         return send_file(ret)
 
 
@@ -584,6 +587,9 @@ def task_evaluate_file():
     if request.method == 'GET':
         if 'path' in data:
             ret = data['path']
+            assert os.path.abspath(ret).startswith(
+                os.path.abspath(SD.workspace_dir)
+            ) and ".." not in ret, "Illegal path {}.".format(ret)
             return send_file(ret)
         else:
             from .project.task import get_evaluate_result
@@ -920,6 +926,9 @@ def model_file():
     data = request.get_json()
     if request.method == 'GET':
         ret = data['path']
+        assert os.path.abspath(ret).startswith(
+            os.path.abspath(SD.workspace_dir)
+        ) and ".." not in ret, "Illegal path {}.".format(ret)
         return send_file(ret)
 
 

+ 9 - 0
static/paddlex_restful/restful/app.py

@@ -206,6 +206,9 @@ def get_image_file():
     data = request.get_json()
     if request.method == 'GET':
         ret = data['path']
+        assert os.path.abspath(ret).startswith(
+            os.path.abspath(SD.workspace_dir)
+        ) and ".." not in ret, "Illegal path {}.".format(ret)
         return send_file(ret)
 
 
@@ -583,6 +586,9 @@ def task_evaluate_file():
     if request.method == 'GET':
         if 'path' in data:
             ret = data['path']
+            assert os.path.abspath(ret).startswith(
+                os.path.abspath(SD.workspace_dir)
+            ) and ".." not in ret, "Illegal path {}.".format(ret)
             return send_file(ret)
         else:
             from .project.task import get_evaluate_result
@@ -919,6 +925,9 @@ def model_file():
     data = request.get_json()
     if request.method == 'GET':
         ret = data['path']
+        assert os.path.abspath(ret).startswith(
+            os.path.abspath(SD.workspace_dir)
+        ) and ".." not in ret, "Illegal path {}.".format(ret)
         return send_file(ret)