Explorar o código

Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleX into develop_qh

FlyingQianMM %!s(int64=4) %!d(string=hai) anos
pai
achega
a4cac75a0d
Modificáronse 50 ficheiros con 385 adicións e 3303 borrados
  1. 1 1
      PaddleDetection
  2. 1 1
      paddlex/cv/datasets/coco.py
  3. 4 2
      paddlex/cv/datasets/voc.py
  4. 5 1
      paddlex/cv/models/base.py
  5. 157 1
      paddlex/cv/models/detector.py
  6. 5 2
      paddlex/ppdet/data/source/category.py
  7. 5 6
      paddlex/ppdet/data/source/coco.py
  8. 17 14
      paddlex/ppdet/data/source/voc.py
  9. 0 267
      paddlex/ppdet/data/transform/atss_assigner.py
  10. 4 132
      paddlex/ppdet/data/transform/batch_operators.py
  11. 4 6
      paddlex/ppdet/engine/export_utils.py
  12. 2 2
      paddlex/ppdet/engine/tracker.py
  13. 11 14
      paddlex/ppdet/engine/trainer.py
  14. 2 39
      paddlex/ppdet/metrics/keypoint_metrics.py
  15. 0 192
      paddlex/ppdet/metrics/mot_eval_utils.py
  16. 2 2
      paddlex/ppdet/metrics/mot_metrics.py
  17. 0 4
      paddlex/ppdet/modeling/architectures/__init__.py
  18. 0 87
      paddlex/ppdet/modeling/architectures/gfl.py
  19. 4 5
      paddlex/ppdet/modeling/architectures/keypoint_hrnet.py
  20. 0 91
      paddlex/ppdet/modeling/architectures/picodet.py
  21. 0 4
      paddlex/ppdet/modeling/backbones/__init__.py
  22. 14 3
      paddlex/ppdet/modeling/backbones/blazenet.py
  23. 10 4
      paddlex/ppdet/modeling/backbones/ghostnet.py
  24. 19 7
      paddlex/ppdet/modeling/backbones/hrnet.py
  25. 0 886
      paddlex/ppdet/modeling/backbones/lite_hrnet.py
  26. 20 6
      paddlex/ppdet/modeling/backbones/mobilenet_v3.py
  27. 0 262
      paddlex/ppdet/modeling/backbones/shufflenet_v2.py
  28. 7 2
      paddlex/ppdet/modeling/backbones/vgg.py
  29. 1 45
      paddlex/ppdet/modeling/bbox_utils.py
  30. 0 4
      paddlex/ppdet/modeling/heads/__init__.py
  31. 2 1
      paddlex/ppdet/modeling/heads/centernet_head.py
  32. 5 3
      paddlex/ppdet/modeling/heads/detr_head.py
  33. 19 8
      paddlex/ppdet/modeling/heads/fcos_head.py
  34. 0 476
      paddlex/ppdet/modeling/heads/gfl_head.py
  35. 0 329
      paddlex/ppdet/modeling/heads/pico_head.py
  36. 0 2
      paddlex/ppdet/modeling/losses/__init__.py
  37. 0 214
      paddlex/ppdet/modeling/losses/gfocal_loss.py
  38. 4 5
      paddlex/ppdet/modeling/losses/keypoint_loss.py
  39. 0 2
      paddlex/ppdet/modeling/necks/__init__.py
  40. 14 3
      paddlex/ppdet/modeling/necks/blazeface_fpn.py
  41. 4 0
      paddlex/ppdet/modeling/necks/hrfpn.py
  42. 0 135
      paddlex/ppdet/modeling/necks/pan.py
  43. 0 12
      paddlex/ppdet/modeling/ops.py
  44. 3 1
      paddlex/ppdet/modeling/reid/jde_embedding_head.py
  45. 7 3
      paddlex/ppdet/modeling/reid/pyramidal_embedding.py
  46. 12 2
      paddlex/ppdet/modeling/reid/resnet.py
  47. 0 10
      paddlex/ppdet/modeling/tests/test_architectures.py
  48. 2 5
      paddlex/ppdet/utils/download.py
  49. 9 0
      paddlex_restful/restful/app.py
  50. 9 0
      static/paddlex_restful/restful/app.py

+ 1 - 1
PaddleDetection

@@ -1 +1 @@
-Subproject commit 3bdf2671f3188de3c4158c9056a46e949cf02eb8
+Subproject commit 5b949596ea7603cd79e3fc9067766bbc79a3e93d

+ 1 - 1
paddlex/cv/datasets/coco.py

@@ -196,7 +196,7 @@ class CocoDetection(VOCDetection):
             logging.error(
             logging.error(
                 "No coco record found in %s' % (ann_file)", exit=True)
                 "No coco record found in %s' % (ann_file)", exit=True)
         self.pos_num = len(self.file_list)
         self.pos_num = len(self.file_list)
-        if self.allow_empty:
+        if self.allow_empty and neg_file_list:
             self.file_list += self._sample_empty(neg_file_list)
             self.file_list += self._sample_empty(neg_file_list)
         logging.info(
         logging.info(
             "{} samples in file {}, including {} positive samples and {} negative samples.".
             "{} samples in file {}, including {} positive samples and {} negative samples.".

+ 4 - 2
paddlex/cv/datasets/voc.py

@@ -290,7 +290,7 @@ class VOCDetection(Dataset):
             logging.error(
             logging.error(
                 "No voc record found in %s' % (file_list)", exit=True)
                 "No voc record found in %s' % (file_list)", exit=True)
         self.pos_num = len(self.file_list)
         self.pos_num = len(self.file_list)
-        if self.allow_empty:
+        if self.allow_empty and neg_file_list:
             self.file_list += self._sample_empty(neg_file_list)
             self.file_list += self._sample_empty(neg_file_list)
         logging.info(
         logging.info(
             "{} samples in file {}, including {} positive samples and {} negative samples.".
             "{} samples in file {}, including {} positive samples and {} negative samples.".
@@ -423,7 +423,9 @@ class VOCDetection(Dataset):
                 **
                 **
                 label_info
                 label_info
             })
             })
-        self.file_list += self._sample_empty(neg_file_list)
+        if neg_file_list:
+            self.allow_empty = True
+            self.file_list += self._sample_empty(neg_file_list)
         logging.info(
         logging.info(
             "{} negative samples added. Dataset contains {} positive samples and {} negative samples.".
             "{} negative samples added. Dataset contains {} positive samples and {} negative samples.".
             format(
             format(

+ 5 - 1
paddlex/cv/models/base.py

@@ -271,7 +271,11 @@ class BaseModel:
             transforms=train_dataset.transforms,
             transforms=train_dataset.transforms,
             mode='train')
             mode='train')
 
 
-        nranks = paddle.distributed.get_world_size()
+        if "RCNN" in self.__class__.__name__ and train_dataset.pos_num < len(
+                train_dataset.file_list):
+            nranks = 1
+        else:
+            nranks = paddle.distributed.get_world_size()
         local_rank = paddle.distributed.get_rank()
         local_rank = paddle.distributed.get_rank()
         if nranks > 1:
         if nranks > 1:
             find_unused_parameters = getattr(self, 'find_unused_parameters',
             find_unused_parameters = getattr(self, 'find_unused_parameters',

+ 157 - 1
paddlex/cv/models/detector.py

@@ -18,7 +18,6 @@ import collections
 import copy
 import copy
 import os
 import os
 import os.path as osp
 import os.path as osp
-import six
 import numpy as np
 import numpy as np
 import paddle
 import paddle
 from paddle.static import InputSpec
 from paddle.static import InputSpec
@@ -29,6 +28,7 @@ import paddlex.utils.logging as logging
 from paddlex.cv.transforms.operators import _NormalizeBox, _PadBox, _BboxXYXY2XYWH, Resize, Padding
 from paddlex.cv.transforms.operators import _NormalizeBox, _PadBox, _BboxXYXY2XYWH, Resize, Padding
 from paddlex.cv.transforms.batch_operators import BatchCompose, BatchRandomResize, BatchRandomResizeByShort, _BatchPadding, _Gt2YoloTarget
 from paddlex.cv.transforms.batch_operators import BatchCompose, BatchRandomResize, BatchRandomResizeByShort, _BatchPadding, _Gt2YoloTarget
 from paddlex.cv.transforms import arrange_transforms
 from paddlex.cv.transforms import arrange_transforms
+from paddlex.utils import get_single_card_bs
 from .base import BaseModel
 from .base import BaseModel
 from .utils.det_metrics import VOCMetric, COCOMetric
 from .utils.det_metrics import VOCMetric, COCOMetric
 from .utils.ema import ExponentialMovingAverage
 from .utils.ema import ExponentialMovingAverage
@@ -975,6 +975,84 @@ class FasterRCNN(BaseDetector):
         super(FasterRCNN, self).__init__(
         super(FasterRCNN, self).__init__(
             model_name='FasterRCNN', num_classes=num_classes, **params)
             model_name='FasterRCNN', num_classes=num_classes, **params)
 
 
+    def train(self,
+              num_epochs,
+              train_dataset,
+              train_batch_size=64,
+              eval_dataset=None,
+              optimizer=None,
+              save_interval_epochs=1,
+              log_interval_steps=10,
+              save_dir='output',
+              pretrain_weights='IMAGENET',
+              learning_rate=.001,
+              warmup_steps=0,
+              warmup_start_lr=0.0,
+              lr_decay_epochs=(216, 243),
+              lr_decay_gamma=0.1,
+              metric=None,
+              use_ema=False,
+              early_stop=False,
+              early_stop_patience=5,
+              use_vdl=True,
+              resume_checkpoint=None):
+        """
+        Train the model.
+        Args:
+            num_epochs(int): The number of epochs.
+            train_dataset(paddlex.dataset): Training dataset.
+            train_batch_size(int, optional): Total batch size among all cards used in training. Defaults to 64.
+            eval_dataset(paddlex.dataset, optional):
+                Evaluation dataset. If None, the model will not be evaluated during training process. Defaults to None.
+            optimizer(paddle.optimizer.Optimizer or None, optional):
+                Optimizer used for training. If None, a default optimizer is used. Defaults to None.
+            save_interval_epochs(int, optional): Epoch interval for saving the model. Defaults to 1.
+            log_interval_steps(int, optional): Step interval for printing training information. Defaults to 10.
+            save_dir(str, optional): Directory to save the model. Defaults to 'output'.
+            pretrain_weights(str or None, optional):
+                None or name/path of pretrained weights. If None, no pretrained weights will be loaded. Defaults to 'IMAGENET'.
+            learning_rate(float, optional): Learning rate for training. Defaults to .001.
+            warmup_steps(int, optional): The number of steps of warm-up training. Defaults to 0.
+            warmup_start_lr(float, optional): Start learning rate of warm-up training. Defaults to 0..
+            lr_decay_epochs(list or tuple, optional): Epoch milestones for learning rate decay. Defaults to (216, 243).
+            lr_decay_gamma(float, optional): Gamma coefficient of learning rate decay. Defaults to .1.
+            metric({'VOC', 'COCO', None}, optional):
+                Evaluation metric. If None, determine the metric according to the dataset format. Defaults to None.
+            use_ema(bool, optional): Whether to use exponential moving average strategy. Defaults to False.
+            early_stop(bool, optional): Whether to adopt early stop strategy. Defaults to False.
+            early_stop_patience(int, optional): Early stop patience. Defaults to 5.
+            use_vdl(bool, optional): Whether to use VisualDL to monitor the training process. Defaults to True.
+            resume_checkpoint(str or None, optional): The path of the checkpoint to resume training from.
+                If None, no training checkpoint will be resumed. At most one of `resume_checkpoint` and
+                `pretrain_weights` can be set simultaneously. Defaults to None.
+        """
+        if train_dataset.pos_num < len(train_dataset.file_list):
+            train_dataset.num_workers = 0
+            if train_batch_size != 1:
+                train_batch_size = 1
+                logging.warning(
+                    "Training RCNN models with negative samples only support batch size equals to 1 "
+                    "on a single gpu/cpu card, `train_batch_size` is forcibly set to 1."
+                )
+            nranks = paddle.distributed.get_world_size()
+            local_rank = paddle.distributed.get_rank()
+            # single card training
+            if nranks < 2 or local_rank == 0:
+                super(FasterRCNN, self).train(
+                    num_epochs, train_dataset, train_batch_size, eval_dataset,
+                    optimizer, save_interval_epochs, log_interval_steps,
+                    save_dir, pretrain_weights, learning_rate, warmup_steps,
+                    warmup_start_lr, lr_decay_epochs, lr_decay_gamma, metric,
+                    use_ema, early_stop, early_stop_patience, use_vdl,
+                    resume_checkpoint)
+        else:
+            super(FasterRCNN, self).train(
+                num_epochs, train_dataset, train_batch_size, eval_dataset,
+                optimizer, save_interval_epochs, log_interval_steps, save_dir,
+                pretrain_weights, learning_rate, warmup_steps, warmup_start_lr,
+                lr_decay_epochs, lr_decay_gamma, metric, use_ema, early_stop,
+                early_stop_patience, use_vdl, resume_checkpoint)
+
     def _compose_batch_transform(self, transforms, mode='train'):
     def _compose_batch_transform(self, transforms, mode='train'):
         if mode == 'train':
         if mode == 'train':
             default_batch_transforms = [
             default_batch_transforms = [
@@ -1755,6 +1833,84 @@ class MaskRCNN(BaseDetector):
         super(MaskRCNN, self).__init__(
         super(MaskRCNN, self).__init__(
             model_name='MaskRCNN', num_classes=num_classes, **params)
             model_name='MaskRCNN', num_classes=num_classes, **params)
 
 
+    def train(self,
+              num_epochs,
+              train_dataset,
+              train_batch_size=64,
+              eval_dataset=None,
+              optimizer=None,
+              save_interval_epochs=1,
+              log_interval_steps=10,
+              save_dir='output',
+              pretrain_weights='IMAGENET',
+              learning_rate=.001,
+              warmup_steps=0,
+              warmup_start_lr=0.0,
+              lr_decay_epochs=(216, 243),
+              lr_decay_gamma=0.1,
+              metric=None,
+              use_ema=False,
+              early_stop=False,
+              early_stop_patience=5,
+              use_vdl=True,
+              resume_checkpoint=None):
+        """
+        Train the model.
+        Args:
+            num_epochs(int): The number of epochs.
+            train_dataset(paddlex.dataset): Training dataset.
+            train_batch_size(int, optional): Total batch size among all cards used in training. Defaults to 64.
+            eval_dataset(paddlex.dataset, optional):
+                Evaluation dataset. If None, the model will not be evaluated during training process. Defaults to None.
+            optimizer(paddle.optimizer.Optimizer or None, optional):
+                Optimizer used for training. If None, a default optimizer is used. Defaults to None.
+            save_interval_epochs(int, optional): Epoch interval for saving the model. Defaults to 1.
+            log_interval_steps(int, optional): Step interval for printing training information. Defaults to 10.
+            save_dir(str, optional): Directory to save the model. Defaults to 'output'.
+            pretrain_weights(str or None, optional):
+                None or name/path of pretrained weights. If None, no pretrained weights will be loaded. Defaults to 'IMAGENET'.
+            learning_rate(float, optional): Learning rate for training. Defaults to .001.
+            warmup_steps(int, optional): The number of steps of warm-up training. Defaults to 0.
+            warmup_start_lr(float, optional): Start learning rate of warm-up training. Defaults to 0..
+            lr_decay_epochs(list or tuple, optional): Epoch milestones for learning rate decay. Defaults to (216, 243).
+            lr_decay_gamma(float, optional): Gamma coefficient of learning rate decay. Defaults to .1.
+            metric({'VOC', 'COCO', None}, optional):
+                Evaluation metric. If None, determine the metric according to the dataset format. Defaults to None.
+            use_ema(bool, optional): Whether to use exponential moving average strategy. Defaults to False.
+            early_stop(bool, optional): Whether to adopt early stop strategy. Defaults to False.
+            early_stop_patience(int, optional): Early stop patience. Defaults to 5.
+            use_vdl(bool, optional): Whether to use VisualDL to monitor the training process. Defaults to True.
+            resume_checkpoint(str or None, optional): The path of the checkpoint to resume training from.
+                If None, no training checkpoint will be resumed. At most one of `resume_checkpoint` and
+                `pretrain_weights` can be set simultaneously. Defaults to None.
+        """
+        if train_dataset.pos_num < len(train_dataset.file_list):
+            train_dataset.num_workers = 0
+            if train_batch_size != 1:
+                train_batch_size = 1
+                logging.warning(
+                    "Training RCNN models with negative samples only support batch size equals to 1 "
+                    "on a single gpu/cpu card, `train_batch_size` is forcibly set to 1."
+                )
+            nranks = paddle.distributed.get_world_size()
+            local_rank = paddle.distributed.get_rank()
+            # single card training
+            if nranks < 2 or local_rank == 0:
+                super(MaskRCNN, self).train(
+                    num_epochs, train_dataset, train_batch_size, eval_dataset,
+                    optimizer, save_interval_epochs, log_interval_steps,
+                    save_dir, pretrain_weights, learning_rate, warmup_steps,
+                    warmup_start_lr, lr_decay_epochs, lr_decay_gamma, metric,
+                    use_ema, early_stop, early_stop_patience, use_vdl,
+                    resume_checkpoint)
+        else:
+            super(MaskRCNN, self).train(
+                num_epochs, train_dataset, train_batch_size, eval_dataset,
+                optimizer, save_interval_epochs, log_interval_steps, save_dir,
+                pretrain_weights, learning_rate, warmup_steps, warmup_start_lr,
+                lr_decay_epochs, lr_decay_gamma, metric, use_ema, early_stop,
+                early_stop_patience, use_vdl, resume_checkpoint)
+
     def _compose_batch_transform(self, transforms, mode='train'):
     def _compose_batch_transform(self, transforms, mode='train'):
         if mode == 'train':
         if mode == 'train':
             default_batch_transforms = [
             default_batch_transforms = [

+ 5 - 2
paddlex/ppdet/data/source/category.py

@@ -90,16 +90,19 @@ def get_categories(metric_type, anno_file=None, arch=None):
     elif metric_type.lower() in ['mot', 'motdet', 'reid']:
     elif metric_type.lower() in ['mot', 'motdet', 'reid']:
         return _mot_category()
         return _mot_category()
 
 
+    elif metric_type.lower() in ['kitti', 'bdd100k']:
+        return _mot_category(category='car')
+
     else:
     else:
         raise ValueError("unknown metric type {}".format(metric_type))
         raise ValueError("unknown metric type {}".format(metric_type))
 
 
 
 
-def _mot_category():
+def _mot_category(category='person'):
     """
     """
     Get class id to category id map and category id
     Get class id to category id map and category id
     to category name map of mot dataset
     to category name map of mot dataset
     """
     """
-    label_map = {'person': 0}
+    label_map = {category: 0}
     label_map = sorted(label_map.items(), key=lambda x: x[1])
     label_map = sorted(label_map.items(), key=lambda x: x[1])
     cats = [l[0] for l in label_map]
     cats = [l[0] for l in label_map]
 
 

+ 5 - 6
paddlex/ppdet/data/source/coco.py

@@ -181,7 +181,7 @@ class COCODataSet(DetDataset):
                 gt_poly = [None] * num_bbox
                 gt_poly = [None] * num_bbox
 
 
                 has_segmentation = False
                 has_segmentation = False
-                for i, box in enumerate(bboxes):
+                for i, box in reversed(list(enumerate(bboxes))):
                     catid = box['category_id']
                     catid = box['category_id']
                     gt_class[i][0] = self.catid2clsid[catid]
                     gt_class[i][0] = self.catid2clsid[catid]
                     gt_bbox[i, :] = box['clean_bbox']
                     gt_bbox[i, :] = box['clean_bbox']
@@ -195,11 +195,10 @@ class COCODataSet(DetDataset):
                     elif 'segmentation' in box and box['segmentation']:
                     elif 'segmentation' in box and box['segmentation']:
                         if not np.array(box['segmentation']
                         if not np.array(box['segmentation']
                                         ).size > 0 and not self.allow_empty:
                                         ).size > 0 and not self.allow_empty:
-                            bboxes.pop(i)
                             gt_poly.pop(i)
                             gt_poly.pop(i)
-                            np.delete(is_crowd, i)
-                            np.delete(gt_class, i)
-                            np.delete(gt_bbox, i)
+                            is_crowd = np.delete(is_crowd, i)
+                            gt_class = np.delete(gt_class, i)
+                            gt_bbox = np.delete(gt_bbox, i)
                         else:
                         else:
                             gt_poly[i] = box['segmentation']
                             gt_poly[i] = box['segmentation']
                         has_segmentation = True
                         has_segmentation = True
@@ -245,7 +244,7 @@ class COCODataSet(DetDataset):
                 break
                 break
         assert ct > 0, 'not found any coco record in %s' % (anno_path)
         assert ct > 0, 'not found any coco record in %s' % (anno_path)
         logger.debug('{} samples in file {}'.format(ct, anno_path))
         logger.debug('{} samples in file {}'.format(ct, anno_path))
-        if len(empty_records) > 0:
+        if self.allow_empty and len(empty_records) > 0:
             empty_records = self._sample_empty(empty_records, len(records))
             empty_records = self._sample_empty(empty_records, len(records))
             records += empty_records
             records += empty_records
         self.roidbs = records
         self.roidbs = records

+ 17 - 14
paddlex/ppdet/data/source/voc.py

@@ -131,11 +131,13 @@ class VOCDataSet(DetDataset):
                         'Illegal width: {} or height: {} in annotation, '
                         'Illegal width: {} or height: {} in annotation, '
                         'and {} will be ignored'.format(im_w, im_h, xml_file))
                         'and {} will be ignored'.format(im_w, im_h, xml_file))
                     continue
                     continue
-                gt_bbox = []
-                gt_class = []
-                gt_score = []
-                difficult = []
-                for i, obj in enumerate(objs):
+
+                num_bbox, i = len(objs), 0
+                gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
+                gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
+                gt_score = np.zeros((num_bbox, 1), dtype=np.float32)
+                difficult = np.zeros((num_bbox, 1), dtype=np.int32)
+                for obj in objs:
                     cname = obj.find('name').text
                     cname = obj.find('name').text
 
 
                     # user dataset may not contain difficult field
                     # user dataset may not contain difficult field
@@ -152,19 +154,20 @@ class VOCDataSet(DetDataset):
                     x2 = min(im_w - 1, x2)
                     x2 = min(im_w - 1, x2)
                     y2 = min(im_h - 1, y2)
                     y2 = min(im_h - 1, y2)
                     if x2 > x1 and y2 > y1:
                     if x2 > x1 and y2 > y1:
-                        gt_bbox.append([x1, y1, x2, y2])
-                        gt_class.append([cname2cid[cname]])
-                        gt_score.append([1.])
-                        difficult.append([_difficult])
+                        gt_bbox[i, :] = [x1, y1, x2, y2]
+                        gt_class[i, 0] = cname2cid[cname]
+                        gt_score[i, 0] = 1.
+                        difficult[i, 0] = _difficult
+                        i += 1
                     else:
                     else:
                         logger.warning(
                         logger.warning(
                             'Found an invalid bbox in annotations: xml_file: {}'
                             'Found an invalid bbox in annotations: xml_file: {}'
                             ', x1: {}, y1: {}, x2: {}, y2: {}.'.format(
                             ', x1: {}, y1: {}, x2: {}, y2: {}.'.format(
                                 xml_file, x1, y1, x2, y2))
                                 xml_file, x1, y1, x2, y2))
-                gt_bbox = np.array(gt_bbox).astype('float32')
-                gt_class = np.array(gt_class).astype('int32')
-                gt_score = np.array(gt_score).astype('float32')
-                difficult = np.array(difficult).astype('int32')
+                gt_bbox = gt_bbox[:i, :]
+                gt_class = gt_class[:i, :]
+                gt_score = gt_score[:i, :]
+                difficult = difficult[:i, :]
 
 
                 voc_rec = {
                 voc_rec = {
                     'im_file': img_file,
                     'im_file': img_file,
@@ -193,7 +196,7 @@ class VOCDataSet(DetDataset):
                     break
                     break
         assert ct > 0, 'not found any voc record in %s' % (self.anno_path)
         assert ct > 0, 'not found any voc record in %s' % (self.anno_path)
         logger.debug('{} samples in file {}'.format(ct, anno_path))
         logger.debug('{} samples in file {}'.format(ct, anno_path))
-        if len(empty_records) > 0:
+        if self.allow_empty and len(empty_records) > 0:
             empty_records = self._sample_empty(empty_records, len(records))
             empty_records = self._sample_empty(empty_records, len(records))
             records += empty_records
             records += empty_records
         self.roidbs, self.cname2cid = records, cname2cid
         self.roidbs, self.cname2cid = records, cname2cid

+ 0 - 267
paddlex/ppdet/data/transform/atss_assigner.py

@@ -1,267 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-from paddlex.ppdet.utils.logger import setup_logger
-logger = setup_logger(__name__)
-
-
-def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6):
-    """Calculate overlap between two set of bboxes.
-    If ``is_aligned `` is ``False``, then calculate the overlaps between each
-    bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned
-    pair of bboxes1 and bboxes2.
-    Args:
-        bboxes1 (Tensor): shape (B, m, 4) in <x1, y1, x2, y2> format or empty.
-        bboxes2 (Tensor): shape (B, n, 4) in <x1, y1, x2, y2> format or empty.
-            B indicates the batch dim, in shape (B1, B2, ..., Bn).
-            If ``is_aligned `` is ``True``, then m and n must be equal.
-        mode (str): "iou" (intersection over union) or "iof" (intersection over
-            foreground).
-        is_aligned (bool, optional): If True, then m and n must be equal.
-            Default False.
-        eps (float, optional): A value added to the denominator for numerical
-            stability. Default 1e-6.
-    Returns:
-        Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,)
-    """
-    assert mode in ['iou', 'iof', 'giou'], 'Unsupported mode {}'.format(mode)
-    # Either the boxes are empty or the length of boxes's last dimenstion is 4
-    assert (bboxes1.shape[-1] == 4 or bboxes1.shape[0] == 0)
-    assert (bboxes2.shape[-1] == 4 or bboxes2.shape[0] == 0)
-
-    # Batch dim must be the same
-    # Batch dim: (B1, B2, ... Bn)
-    assert bboxes1.shape[:-2] == bboxes2.shape[:-2]
-    batch_shape = bboxes1.shape[:-2]
-
-    rows = bboxes1.shape[-2] if bboxes1.shape[0] > 0 else 0
-    cols = bboxes2.shape[-2] if bboxes2.shape[0] > 0 else 0
-    if is_aligned:
-        assert rows == cols
-
-    if rows * cols == 0:
-        if is_aligned:
-            return np.random.random(batch_shape + (rows, ))
-        else:
-            return np.random.random(batch_shape + (rows, cols))
-
-    area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (
-        bboxes1[..., 3] - bboxes1[..., 1])
-    area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (
-        bboxes2[..., 3] - bboxes2[..., 1])
-
-    if is_aligned:
-        lt = np.maximum(bboxes1[..., :2], bboxes2[..., :2])  # [B, rows, 2]
-        rb = np.minimum(bboxes1[..., 2:], bboxes2[..., 2:])  # [B, rows, 2]
-
-        wh = (rb - lt).clip(min=0)  # [B, rows, 2]
-        overlap = wh[..., 0] * wh[..., 1]
-
-        if mode in ['iou', 'giou']:
-            union = area1 + area2 - overlap
-        else:
-            union = area1
-        if mode == 'giou':
-            enclosed_lt = np.minimum(bboxes1[..., :2], bboxes2[..., :2])
-            enclosed_rb = np.maximum(bboxes1[..., 2:], bboxes2[..., 2:])
-    else:
-        lt = np.maximum(bboxes1[..., :, None, :2],
-                        bboxes2[..., None, :, :2])  # [B, rows, cols, 2]
-        rb = np.minimum(bboxes1[..., :, None, 2:],
-                        bboxes2[..., None, :, 2:])  # [B, rows, cols, 2]
-
-        wh = (rb - lt).clip(min=0)  # [B, rows, cols, 2]
-        overlap = wh[..., 0] * wh[..., 1]
-
-        if mode in ['iou', 'giou']:
-            union = area1[..., None] + area2[..., None, :] - overlap
-        else:
-            union = area1[..., None]
-        if mode == 'giou':
-            enclosed_lt = np.minimum(bboxes1[..., :, None, :2],
-                                     bboxes2[..., None, :, :2])
-            enclosed_rb = np.maximum(bboxes1[..., :, None, 2:],
-                                     bboxes2[..., None, :, 2:])
-
-    eps = np.array([eps])
-    union = np.maximum(union, eps)
-    ious = overlap / union
-    if mode in ['iou', 'iof']:
-        return ious
-    # calculate gious
-    enclose_wh = (enclosed_rb - enclosed_lt).clip(min=0)
-    enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]
-    enclose_area = np.maximum(enclose_area, eps)
-    gious = ious - (enclose_area - union) / enclose_area
-    return gious
-
-
-def topk_(input, k, axis=1, largest=True):
-    x = -input if largest else input
-    if axis == 0:
-        row_index = np.arange(input.shape[1 - axis])
-        topk_index = np.argpartition(x, k, axis=axis)[0:k, :]
-        topk_data = x[topk_index, row_index]
-
-        topk_index_sort = np.argsort(topk_data, axis=axis)
-        topk_data_sort = topk_data[topk_index_sort, row_index]
-        topk_index_sort = topk_index[0:k, :][topk_index_sort, row_index]
-    else:
-        column_index = np.arange(x.shape[1 - axis])[:, None]
-        topk_index = np.argpartition(x, k, axis=axis)[:, 0:k]
-        topk_data = x[column_index, topk_index]
-        topk_data = -topk_data if largest else topk_data
-        topk_index_sort = np.argsort(topk_data, axis=axis)
-        topk_data_sort = topk_data[column_index, topk_index_sort]
-        topk_index_sort = topk_index[:, 0:k][column_index, topk_index_sort]
-
-    return topk_data_sort, topk_index_sort
-
-
-class ATSSAssigner(object):
-    """Assign a corresponding gt bbox or background to each bbox.
-
-    Each proposals will be assigned with `0` or a positive integer
-    indicating the ground truth index.
-
-    - 0: negative sample, no assigned gt
-    - positive integer: positive sample, index (1-based) of assigned gt
-
-    Args:
-        topk (float): number of bbox selected in each level
-    """
-
-    def __init__(self, topk=9):
-        self.topk = topk
-
-    def __call__(self,
-                 bboxes,
-                 num_level_bboxes,
-                 gt_bboxes,
-                 gt_bboxes_ignore=None,
-                 gt_labels=None):
-        """Assign gt to bboxes.
-        The assignment is done in following steps
-        1. compute iou between all bbox (bbox of all pyramid levels) and gt
-        2. compute center distance between all bbox and gt
-        3. on each pyramid level, for each gt, select k bbox whose center
-           are closest to the gt center, so we total select k*l bbox as
-           candidates for each gt
-        4. get corresponding iou for the these candidates, and compute the
-           mean and std, set mean + std as the iou threshold
-        5. select these candidates whose iou are greater than or equal to
-           the threshold as postive
-        6. limit the positive sample's center in gt
-        Args:
-            bboxes (np.array): Bounding boxes to be assigned, shape(n, 4).
-            num_level_bboxes (List): num of bboxes in each level
-            gt_bboxes (np.array): Groundtruth boxes, shape (k, 4).
-            gt_bboxes_ignore (np.array, optional): Ground truth bboxes that are
-                labelled as `ignored`, e.g., crowd boxes in COCO.
-            gt_labels (np.array, optional): Label of gt_bboxes, shape (k, ).
-        """
-        bboxes = bboxes[:, :4]
-        num_gt, num_bboxes = gt_bboxes.shape[0], bboxes.shape[0]
-        # compute iou between all bbox and gt
-        overlaps = bbox_overlaps(bboxes, gt_bboxes)
-
-        # assign 0 by default
-        assigned_gt_inds = np.zeros((num_bboxes, ), dtype=np.int64)
-
-        if num_gt == 0 or num_bboxes == 0:
-            # No ground truth or boxes, return empty assignment
-            max_overlaps = np.zeros((num_bboxes, ))
-            if num_gt == 0:
-                # No truth, assign everything to background
-                assigned_gt_inds[:] = 0
-            if not np.any(gt_labels):
-                assigned_labels = None
-            else:
-                assigned_labels = -np.ones((num_bboxes, ), dtype=np.int64)
-            return assigned_gt_inds, max_overlaps, assigned_labels
-
-        # compute center distance between all bbox and gt
-        gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0
-        gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0
-        gt_points = np.stack((gt_cx, gt_cy), axis=1)
-
-        bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0
-        bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0
-        bboxes_points = np.stack((bboxes_cx, bboxes_cy), axis=1)
-
-        distances = np.sqrt(
-            np.power((bboxes_points[:, None, :] - gt_points[None, :, :]), 2)
-            .sum(-1))
-
-        # Selecting candidates based on the center distance
-        candidate_idxs = []
-        start_idx = 0
-        for bboxes_per_level in num_level_bboxes:
-            # on each pyramid level, for each gt,
-            # select k bbox whose center are closest to the gt center
-            end_idx = start_idx + bboxes_per_level
-            distances_per_level = distances[start_idx:end_idx, :]
-            selectable_k = min(self.topk, bboxes_per_level)
-            _, topk_idxs_per_level = topk_(
-                distances_per_level, selectable_k, axis=0, largest=False)
-            candidate_idxs.append(topk_idxs_per_level + start_idx)
-            start_idx = end_idx
-        candidate_idxs = np.concatenate(candidate_idxs, axis=0)
-
-        # get corresponding iou for the these candidates, and compute the
-        # mean and std, set mean + std as the iou threshold
-        candidate_overlaps = overlaps[candidate_idxs, np.arange(num_gt)]
-        overlaps_mean_per_gt = candidate_overlaps.mean(0)
-        overlaps_std_per_gt = candidate_overlaps.std(0)
-        overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
-
-        is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :]
-
-        # limit the positive sample's center in gt
-        for gt_idx in range(num_gt):
-            candidate_idxs[:, gt_idx] += gt_idx * num_bboxes
-        ep_bboxes_cx = np.broadcast_to(
-            bboxes_cx.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
-        ep_bboxes_cy = np.broadcast_to(
-            bboxes_cy.reshape(1, -1), [num_gt, num_bboxes]).reshape(-1)
-        candidate_idxs = candidate_idxs.reshape(-1)
-
-        # calculate the left, top, right, bottom distance between positive
-        # bbox center and gt side
-        l_ = ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 0]
-        t_ = ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt) - gt_bboxes[:, 1]
-        r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].reshape(-1, num_gt)
-        b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].reshape(-1, num_gt)
-        is_in_gts = np.stack([l_, t_, r_, b_], axis=1).min(axis=1) > 0.01
-        is_pos = is_pos & is_in_gts
-
-        # if an anchor box is assigned to multiple gts,
-        # the one with the highest IoU will be selected.
-        overlaps_inf = -np.inf * np.ones_like(overlaps).T.reshape(-1)
-        index = candidate_idxs.reshape(-1)[is_pos.reshape(-1)]
-        overlaps_inf[index] = overlaps.T.reshape(-1)[index]
-        overlaps_inf = overlaps_inf.reshape(num_gt, -1).T
-
-        max_overlaps = overlaps_inf.max(axis=1)
-        argmax_overlaps = overlaps_inf.argmax(axis=1)
-        assigned_gt_inds[max_overlaps !=
-                         -np.inf] = argmax_overlaps[max_overlaps !=
-                                                    -np.inf] + 1
-
-        return assigned_gt_inds, max_overlaps

+ 4 - 132
paddlex/ppdet/data/transform/batch_operators.py

@@ -22,11 +22,9 @@ except Exception:
     from collections import Sequence
     from collections import Sequence
 
 
 import cv2
 import cv2
-import math
 import numpy as np
 import numpy as np
 from .operators import register_op, BaseOperator, Resize
 from .operators import register_op, BaseOperator, Resize
 from .op_helper import jaccard_overlap, gaussian2D
 from .op_helper import jaccard_overlap, gaussian2D
-from .atss_assigner import ATSSAssigner
 from scipy import ndimage
 from scipy import ndimage
 
 
 from paddlex.ppdet.modeling import bbox_utils
 from paddlex.ppdet.modeling import bbox_utils
@@ -35,8 +33,7 @@ logger = setup_logger(__name__)
 
 
 __all__ = [
 __all__ = [
     'PadBatch', 'BatchRandomResize', 'Gt2YoloTarget', 'Gt2FCOSTarget',
     'PadBatch', 'BatchRandomResize', 'Gt2YoloTarget', 'Gt2FCOSTarget',
-    'Gt2TTFTarget', 'Gt2Solov2Target', 'Gt2SparseRCNNTarget', 'PadMaskBatch',
-    'Gt2GFLTarget'
+    'Gt2TTFTarget', 'Gt2Solov2Target', 'Gt2SparseRCNNTarget', 'PadMaskBatch'
 ]
 ]
 
 
 
 
@@ -181,6 +178,8 @@ class Gt2YoloTarget(BaseOperator):
         h, w = samples[0]['image'].shape[1:3]
         h, w = samples[0]['image'].shape[1:3]
         an_hw = np.array(self.anchors) / np.array([[w, h]])
         an_hw = np.array(self.anchors) / np.array([[w, h]])
         for sample in samples:
         for sample in samples:
+            # im, gt_bbox, gt_class, gt_score = sample
+            im = sample['image']
             gt_bbox = sample['gt_bbox']
             gt_bbox = sample['gt_bbox']
             gt_class = sample['gt_class']
             gt_class = sample['gt_class']
             if 'gt_score' not in sample:
             if 'gt_score' not in sample:
@@ -371,6 +370,7 @@ class Gt2FCOSTarget(BaseOperator):
             "object_sizes_of_interest', and 'downsample_ratios' should have same length."
             "object_sizes_of_interest', and 'downsample_ratios' should have same length."
 
 
         for sample in samples:
         for sample in samples:
+            # im, gt_bbox, gt_class, gt_score = sample
             im = sample['image']
             im = sample['image']
             bboxes = sample['gt_bbox']
             bboxes = sample['gt_bbox']
             gt_class = sample['gt_class']
             gt_class = sample['gt_class']
@@ -472,134 +472,6 @@ class Gt2FCOSTarget(BaseOperator):
 
 
 
 
 @register_op
 @register_op
-class Gt2GFLTarget(BaseOperator):
-    """
-    Generate GFocal loss targets by groud truth data
-    """
-
-    def __init__(self,
-                 num_classes=80,
-                 downsample_ratios=[8, 16, 32, 64, 128],
-                 grid_cell_scale=4,
-                 cell_offset=0):
-        super(Gt2GFLTarget, self).__init__()
-        self.num_classes = num_classes
-        self.downsample_ratios = downsample_ratios
-        self.grid_cell_scale = grid_cell_scale
-        self.cell_offset = cell_offset
-
-        self.assigner = ATSSAssigner()
-
-    def get_grid_cells(self, featmap_size, scale, stride, offset=0):
-        """
-        Generate grid cells of a feature map for target assignment.
-        Args:
-            featmap_size: Size of a single level feature map.
-            scale: Grid cell scale.
-            stride: Down sample stride of the feature map.
-            offset: Offset of grid cells.
-        return:
-            Grid_cells xyxy position. Size should be [feat_w * feat_h, 4]
-        """
-        cell_size = stride * scale
-        h, w = featmap_size
-        x_range = (np.arange(w, dtype=np.float32) + offset) * stride
-        y_range = (np.arange(h, dtype=np.float32) + offset) * stride
-        x, y = np.meshgrid(x_range, y_range)
-        y = y.flatten()
-        x = x.flatten()
-        grid_cells = np.stack(
-            [
-                x - 0.5 * cell_size, y - 0.5 * cell_size, x + 0.5 * cell_size,
-                y + 0.5 * cell_size
-            ],
-            axis=-1)
-        return grid_cells
-
-    def get_sample(self, assign_gt_inds, gt_bboxes):
-        pos_inds = np.unique(np.nonzero(assign_gt_inds > 0)[0])
-        neg_inds = np.unique(np.nonzero(assign_gt_inds == 0)[0])
-        pos_assigned_gt_inds = assign_gt_inds[pos_inds] - 1
-
-        if gt_bboxes.size == 0:
-            # hack for index error case
-            assert pos_assigned_gt_inds.size == 0
-            pos_gt_bboxes = np.empty_like(gt_bboxes).reshape(-1, 4)
-        else:
-            if len(gt_bboxes.shape) < 2:
-                gt_bboxes = gt_bboxes.resize(-1, 4)
-            pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
-        return pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds
-
-    def __call__(self, samples, context=None):
-        assert len(samples) > 0
-        batch_size = len(samples)
-        # get grid cells of image
-        h, w = samples[0]['image'].shape[1:3]
-        multi_level_grid_cells = []
-        for stride in self.downsample_ratios:
-            featmap_size = (int(math.ceil(h / stride)),
-                            int(math.ceil(w / stride)))
-            multi_level_grid_cells.append(
-                self.get_grid_cells(featmap_size, self.grid_cell_scale, stride,
-                                    self.cell_offset))
-        mlvl_grid_cells_list = [
-            multi_level_grid_cells for i in range(batch_size)
-        ]
-        # pixel cell number of multi-level feature maps
-        num_level_cells = [
-            grid_cells.shape[0] for grid_cells in mlvl_grid_cells_list[0]
-        ]
-        num_level_cells_list = [num_level_cells] * batch_size
-        # concat all level cells and to a single array
-        for i in range(batch_size):
-            mlvl_grid_cells_list[i] = np.concatenate(mlvl_grid_cells_list[i])
-        # target assign on all images
-        for sample, grid_cells, num_level_cells in zip(
-                samples, mlvl_grid_cells_list, num_level_cells_list):
-            gt_bboxes = sample['gt_bbox']
-            gt_labels = sample['gt_class'].squeeze()
-            if gt_labels.size == 1:
-                gt_labels = np.array([gt_labels]).astype(np.int32)
-            gt_bboxes_ignore = None
-            assign_gt_inds, _ = self.assigner(grid_cells, num_level_cells,
-                                              gt_bboxes, gt_bboxes_ignore,
-                                              gt_labels)
-            pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds = self.get_sample(
-                assign_gt_inds, gt_bboxes)
-
-            num_cells = grid_cells.shape[0]
-            bbox_targets = np.zeros_like(grid_cells)
-            bbox_weights = np.zeros_like(grid_cells)
-            labels = np.ones([num_cells], dtype=np.int64) * self.num_classes
-            label_weights = np.zeros([num_cells], dtype=np.float32)
-
-            if len(pos_inds) > 0:
-                pos_bbox_targets = pos_gt_bboxes
-                bbox_targets[pos_inds, :] = pos_bbox_targets
-                bbox_weights[pos_inds, :] = 1.0
-                if not np.any(gt_labels):
-                    labels[pos_inds] = 0
-                else:
-                    labels[pos_inds] = gt_labels[pos_assigned_gt_inds]
-
-                label_weights[pos_inds] = 1.0
-            if len(neg_inds) > 0:
-                label_weights[neg_inds] = 1.0
-            sample['grid_cells'] = grid_cells
-            sample['labels'] = labels
-            sample['label_weights'] = label_weights
-            sample['bbox_targets'] = bbox_targets
-            sample['pos_num'] = max(pos_inds.size, 1)
-            sample.pop('is_crowd', None)
-            sample.pop('difficult', None)
-            sample.pop('gt_class', None)
-            sample.pop('gt_bbox', None)
-            sample.pop('gt_score', None)
-        return samples
-
-
-@register_op
 class Gt2TTFTarget(BaseOperator):
 class Gt2TTFTarget(BaseOperator):
     __shared__ = ['num_classes']
     __shared__ = ['num_classes']
     """
     """

+ 4 - 6
paddlex/ppdet/engine/export_utils.py

@@ -42,8 +42,6 @@ TRT_MIN_SUBGRAPH = {
     'DeepSORT': 3,
     'DeepSORT': 3,
     'JDE': 10,
     'JDE': 10,
     'FairMOT': 5,
     'FairMOT': 5,
-    'GFL': 16,
-    'PicoDet': 3,
 }
 }
 
 
 KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet']
 KEYPOINT_ARCH = ['HigherHRNet', 'TopDownHRNet']
@@ -118,11 +116,11 @@ def _dump_infer_config(config, path, image_shape, model):
             break
             break
     if not arch_state:
     if not arch_state:
         logger.error(
         logger.error(
-            'Architecture: {} is not supported for exporting model now.\n'.
-            format(infer_arch) +
-            'Please set TRT_MIN_SUBGRAPH in ppdet/engine/export_utils.py')
+            'Architecture: {} is not supported for exporting model now'.format(
+                infer_arch))
         os._exit(0)
         os._exit(0)
-    if 'Mask' in infer_arch:
+    if 'mask_head' in config[config['architecture']] and config[config[
+            'architecture']]['mask_head']:
         infer_cfg['mask'] = True
         infer_cfg['mask'] = True
     label_arch = 'detection_arch'
     label_arch = 'detection_arch'
     if infer_arch in KEYPOINT_ARCH:
     if infer_arch in KEYPOINT_ARCH:

+ 2 - 2
paddlex/ppdet/engine/tracker.py

@@ -333,7 +333,7 @@ class Tracker(object):
             if save_videos:
             if save_videos:
                 output_video_path = os.path.join(save_dir, '..',
                 output_video_path = os.path.join(save_dir, '..',
                                                  '{}_vis.mp4'.format(seq))
                                                  '{}_vis.mp4'.format(seq))
-                cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg -vf "scale=trunc(iw/2)*2:trunc(ih/2)*2" {}'.format(
+                cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
                     save_dir, output_video_path)
                     save_dir, output_video_path)
                 os.system(cmd_str)
                 os.system(cmd_str)
                 logger.info('Save video in {}.'.format(output_video_path))
                 logger.info('Save video in {}.'.format(output_video_path))
@@ -451,7 +451,7 @@ class Tracker(object):
         if save_videos:
         if save_videos:
             output_video_path = os.path.join(save_dir, '..',
             output_video_path = os.path.join(save_dir, '..',
                                              '{}_vis.mp4'.format(seq))
                                              '{}_vis.mp4'.format(seq))
-            cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg -vf "scale=trunc(iw/2)*2:trunc(ih/2)*2" {}'.format(
+            cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
                 save_dir, output_video_path)
                 save_dir, output_video_path)
             os.system(cmd_str)
             os.system(cmd_str)
             logger.info('Save video in {}'.format(output_video_path))
             logger.info('Save video in {}'.format(output_video_path))

+ 11 - 14
paddlex/ppdet/engine/trainer.py

@@ -228,27 +228,19 @@ class Trainer(object):
             eval_dataset = self.cfg['EvalDataset']
             eval_dataset = self.cfg['EvalDataset']
             eval_dataset.check_or_download_dataset()
             eval_dataset.check_or_download_dataset()
             anno_file = eval_dataset.get_anno()
             anno_file = eval_dataset.get_anno()
-            save_prediction_only = self.cfg.get('save_prediction_only', False)
             self._metrics = [
             self._metrics = [
-                KeyPointTopDownCOCOEval(
-                    anno_file,
-                    len(eval_dataset),
-                    self.cfg.num_joints,
-                    self.cfg.save_dir,
-                    save_prediction_only=save_prediction_only)
+                KeyPointTopDownCOCOEval(anno_file,
+                                        len(eval_dataset), self.cfg.num_joints,
+                                        self.cfg.save_dir)
             ]
             ]
         elif self.cfg.metric == 'KeyPointTopDownMPIIEval':
         elif self.cfg.metric == 'KeyPointTopDownMPIIEval':
             eval_dataset = self.cfg['EvalDataset']
             eval_dataset = self.cfg['EvalDataset']
             eval_dataset.check_or_download_dataset()
             eval_dataset.check_or_download_dataset()
             anno_file = eval_dataset.get_anno()
             anno_file = eval_dataset.get_anno()
-            save_prediction_only = self.cfg.get('save_prediction_only', False)
             self._metrics = [
             self._metrics = [
-                KeyPointTopDownMPIIEval(
-                    anno_file,
-                    len(eval_dataset),
-                    self.cfg.num_joints,
-                    self.cfg.save_dir,
-                    save_prediction_only=save_prediction_only)
+                KeyPointTopDownMPIIEval(anno_file,
+                                        len(eval_dataset), self.cfg.num_joints,
+                                        self.cfg.save_dir)
             ]
             ]
         elif self.cfg.metric == 'MOTDet':
         elif self.cfg.metric == 'MOTDet':
             self._metrics = [JDEDetMetric(), ]
             self._metrics = [JDEDetMetric(), ]
@@ -303,6 +295,11 @@ class Trainer(object):
         assert self.mode == 'train', "Model not in 'train' mode"
         assert self.mode == 'train', "Model not in 'train' mode"
         Init_mark = False
         Init_mark = False
 
 
+        # if validation in training is enabled, metrics should be re-init
+        if validate:
+            self._init_metrics(validate=validate)
+            self._reset_metrics()
+
         model = self.model
         model = self.model
         if self.cfg.get('fleet', False):
         if self.cfg.get('fleet', False):
             model = fleet.distributed_model(model)
             model = fleet.distributed_model(model)

+ 2 - 39
paddlex/ppdet/metrics/keypoint_metrics.py

@@ -20,8 +20,6 @@ from pycocotools.coco import COCO
 from pycocotools.cocoeval import COCOeval
 from pycocotools.cocoeval import COCOeval
 from ..modeling.keypoint_utils import oks_nms
 from ..modeling.keypoint_utils import oks_nms
 from scipy.io import loadmat, savemat
 from scipy.io import loadmat, savemat
-from paddlex.ppdet.utils.logger import setup_logger
-logger = setup_logger(__name__)
 
 
 __all__ = ['KeyPointTopDownCOCOEval', 'KeyPointTopDownMPIIEval']
 __all__ = ['KeyPointTopDownCOCOEval', 'KeyPointTopDownMPIIEval']
 
 
@@ -40,8 +38,7 @@ class KeyPointTopDownCOCOEval(object):
                  output_eval,
                  output_eval,
                  iou_type='keypoints',
                  iou_type='keypoints',
                  in_vis_thre=0.2,
                  in_vis_thre=0.2,
-                 oks_thre=0.9,
-                 save_prediction_only=False):
+                 oks_thre=0.9):
         super(KeyPointTopDownCOCOEval, self).__init__()
         super(KeyPointTopDownCOCOEval, self).__init__()
         self.coco = COCO(anno_file)
         self.coco = COCO(anno_file)
         self.num_samples = num_samples
         self.num_samples = num_samples
@@ -51,7 +48,6 @@ class KeyPointTopDownCOCOEval(object):
         self.oks_thre = oks_thre
         self.oks_thre = oks_thre
         self.output_eval = output_eval
         self.output_eval = output_eval
         self.res_file = os.path.join(output_eval, "keypoints_results.json")
         self.res_file = os.path.join(output_eval, "keypoints_results.json")
-        self.save_prediction_only = save_prediction_only
         self.reset()
         self.reset()
 
 
     def reset(self):
     def reset(self):
@@ -94,7 +90,6 @@ class KeyPointTopDownCOCOEval(object):
             os.makedirs(self.output_eval)
             os.makedirs(self.output_eval)
         with open(self.res_file, 'w') as f:
         with open(self.res_file, 'w') as f:
             json.dump(results, f, sort_keys=True, indent=4)
             json.dump(results, f, sort_keys=True, indent=4)
-            logger.info(f'The keypoint result is saved to {self.res_file}.')
         try:
         try:
             json.load(open(self.res_file))
             json.load(open(self.res_file))
         except Exception:
         except Exception:
@@ -183,10 +178,6 @@ class KeyPointTopDownCOCOEval(object):
         self.get_final_results(self.results['all_preds'],
         self.get_final_results(self.results['all_preds'],
                                self.results['all_boxes'],
                                self.results['all_boxes'],
                                self.results['image_path'])
                                self.results['image_path'])
-        if self.save_prediction_only:
-            logger.info(f'The keypoint result is saved to {self.res_file} '
-                        'and do not evaluate the mAP.')
-            return
         coco_dt = self.coco.loadRes(self.res_file)
         coco_dt = self.coco.loadRes(self.res_file)
         coco_eval = COCOeval(self.coco, coco_dt, 'keypoints')
         coco_eval = COCOeval(self.coco, coco_dt, 'keypoints')
         coco_eval.params.useSegm = None
         coco_eval.params.useSegm = None
@@ -200,8 +191,6 @@ class KeyPointTopDownCOCOEval(object):
         self.eval_results['keypoint'] = keypoint_stats
         self.eval_results['keypoint'] = keypoint_stats
 
 
     def log(self):
     def log(self):
-        if self.save_prediction_only:
-            return
         stats_names = [
         stats_names = [
             'AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
             'AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5',
             'AR .75', 'AR (M)', 'AR (L)'
             'AR .75', 'AR (M)', 'AR (L)'
@@ -224,12 +213,9 @@ class KeyPointTopDownMPIIEval(object):
                  num_samples,
                  num_samples,
                  num_joints,
                  num_joints,
                  output_eval,
                  output_eval,
-                 oks_thre=0.9,
-                 save_prediction_only=False):
+                 oks_thre=0.9):
         super(KeyPointTopDownMPIIEval, self).__init__()
         super(KeyPointTopDownMPIIEval, self).__init__()
         self.ann_file = anno_file
         self.ann_file = anno_file
-        self.res_file = os.path.join(output_eval, "keypoints_results.json")
-        self.save_prediction_only = save_prediction_only
         self.reset()
         self.reset()
 
 
     def reset(self):
     def reset(self):
@@ -253,32 +239,9 @@ class KeyPointTopDownMPIIEval(object):
         self.results.append(results)
         self.results.append(results)
 
 
     def accumulate(self):
     def accumulate(self):
-        self._mpii_keypoint_results_save()
-        if self.save_prediction_only:
-            logger.info(f'The keypoint result is saved to {self.res_file} '
-                        'and do not evaluate the mAP.')
-            return
-
         self.eval_results = self.evaluate(self.results)
         self.eval_results = self.evaluate(self.results)
 
 
-    def _mpii_keypoint_results_save(self):
-        results = []
-        for res in self.results:
-            if len(res) == 0:
-                continue
-            result = [{
-                'preds': res['preds'][k].tolist(),
-                'boxes': res['boxes'][k].tolist(),
-                'image_path': res['image_path'][k],
-            } for k in range(len(res))]
-            results.extend(result)
-        with open(self.res_file, 'w') as f:
-            json.dump(results, f, sort_keys=True, indent=4)
-            logger.info(f'The keypoint result is saved to {self.res_file}.')
-
     def log(self):
     def log(self):
-        if self.save_prediction_only:
-            return
         for item, value in self.eval_results.items():
         for item, value in self.eval_results.items():
             print("{} : {}".format(item, value))
             print("{} : {}".format(item, value))
 
 

+ 0 - 192
paddlex/ppdet/metrics/mot_eval_utils.py

@@ -1,192 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import numpy as np
-import copy
-import motmetrics as mm
-mm.lap.default_solver = 'lap'
-
-__all__ = [
-    'read_mot_results',
-    'unzip_objs',
-    'MOTEvaluator',
-]
-
-
-def read_mot_results(filename, is_gt=False, is_ignore=False):
-    valid_labels = {1}
-    ignore_labels = {2, 7, 8, 12}
-    results_dict = dict()
-    if os.path.isfile(filename):
-        with open(filename, 'r') as f:
-            for line in f.readlines():
-                linelist = line.split(',')
-                if len(linelist) < 7:
-                    continue
-                fid = int(linelist[0])
-                if fid < 1:
-                    continue
-                results_dict.setdefault(fid, list())
-
-                box_size = float(linelist[4]) * float(linelist[5])
-
-                if is_gt:
-                    if 'MOT16-' in filename or 'MOT17-' in filename:
-                        label = int(float(linelist[7]))
-                        mark = int(float(linelist[6]))
-                        if mark == 0 or label not in valid_labels:
-                            continue
-                    score = 1
-                elif is_ignore:
-                    if 'MOT16-' in filename or 'MOT17-' in filename:
-                        label = int(float(linelist[7]))
-                        vis_ratio = float(linelist[8])
-                        if label not in ignore_labels and vis_ratio >= 0:
-                            continue
-                    else:
-                        continue
-                    score = 1
-                else:
-                    score = float(linelist[6])
-
-                tlwh = tuple(map(float, linelist[2:6]))
-                target_id = int(linelist[1])
-
-                results_dict[fid].append((tlwh, target_id, score))
-    return results_dict
-
-
-"""
-labels={'ped', ...			    % 1
-        'person_on_vhcl', ...	% 2
-        'car', ...				% 3
-        'bicycle', ...			% 4
-        'mbike', ...			% 5
-        'non_mot_vhcl', ...		% 6
-        'static_person', ...	% 7
-        'distractor', ...		% 8
-        'occluder', ...			% 9
-        'occluder_on_grnd', ...	% 10
-        'occluder_full', ...	% 11
-        'reflection', ...		% 12
-        'crowd' ...			    % 13
-};
-"""
-
-
-def unzip_objs(objs):
-    if len(objs) > 0:
-        tlwhs, ids, scores = zip(*objs)
-    else:
-        tlwhs, ids, scores = [], [], []
-    tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
-    return tlwhs, ids, scores
-
-
-class MOTEvaluator(object):
-    def __init__(self, data_root, seq_name, data_type):
-        self.data_root = data_root
-        self.seq_name = seq_name
-        self.data_type = data_type
-
-        self.load_annotations()
-        self.reset_accumulator()
-
-    def load_annotations(self):
-        assert self.data_type == 'mot'
-        gt_filename = os.path.join(self.data_root, self.seq_name, 'gt',
-                                   'gt.txt')
-        self.gt_frame_dict = read_mot_results(gt_filename, is_gt=True)
-        self.gt_ignore_frame_dict = read_mot_results(
-            gt_filename, is_ignore=True)
-
-    def reset_accumulator(self):
-        self.acc = mm.MOTAccumulator(auto_id=True)
-
-    def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
-        # results
-        trk_tlwhs = np.copy(trk_tlwhs)
-        trk_ids = np.copy(trk_ids)
-
-        # gts
-        gt_objs = self.gt_frame_dict.get(frame_id, [])
-        gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
-
-        # ignore boxes
-        ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
-        ignore_tlwhs = unzip_objs(ignore_objs)[0]
-
-        # remove ignored results
-        keep = np.ones(len(trk_tlwhs), dtype=bool)
-        iou_distance = mm.distances.iou_matrix(
-            ignore_tlwhs, trk_tlwhs, max_iou=0.5)
-        if len(iou_distance) > 0:
-            match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
-            match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
-            match_ious = iou_distance[match_is, match_js]
-
-            match_js = np.asarray(match_js, dtype=int)
-            match_js = match_js[np.logical_not(np.isnan(match_ious))]
-            keep[match_js] = False
-            trk_tlwhs = trk_tlwhs[keep]
-            trk_ids = trk_ids[keep]
-
-        # get distance matrix
-        iou_distance = mm.distances.iou_matrix(
-            gt_tlwhs, trk_tlwhs, max_iou=0.5)
-
-        # acc
-        self.acc.update(gt_ids, trk_ids, iou_distance)
-
-        if rtn_events and iou_distance.size > 0 and hasattr(self.acc,
-                                                            'last_mot_events'):
-            events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
-        else:
-            events = None
-        return events
-
-    def eval_file(self, filename):
-        self.reset_accumulator()
-
-        result_frame_dict = read_mot_results(filename, is_gt=False)
-        frames = sorted(list(set(result_frame_dict.keys())))
-        for frame_id in frames:
-            trk_objs = result_frame_dict.get(frame_id, [])
-            trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
-            self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
-
-        return self.acc
-
-    @staticmethod
-    def get_summary(accs,
-                    names,
-                    metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1',
-                             'precision', 'recall')):
-        names = copy.deepcopy(names)
-        if metrics is None:
-            metrics = mm.metrics.motchallenge_metrics
-        metrics = copy.deepcopy(metrics)
-
-        mh = mm.metrics.create()
-        summary = mh.compute_many(
-            accs, metrics=metrics, names=names, generate_overall=True)
-        return summary
-
-    @staticmethod
-    def save_summary(summary, filename):
-        import pandas as pd
-        writer = pd.ExcelWriter(filename)
-        summary.to_excel(writer)
-        writer.save()

+ 2 - 2
paddlex/ppdet/metrics/mot_metrics.py

@@ -539,7 +539,7 @@ class KITTIEvaluation(object):
                         return
                         return
 
 
                 # do not consider objects marked as invalid
                 # do not consider objects marked as invalid
-                if t_data.track_id is -1 and t_data.obj_type != "dontcare":
+                if t_data.track_id == -1 and t_data.obj_type != "dontcare":
                     continue
                     continue
 
 
                 idx = t_data.frame
                 idx = t_data.frame
@@ -718,7 +718,7 @@ class KITTIEvaluation(object):
                     seq_trajectories[gg.track_id].append(-1)
                     seq_trajectories[gg.track_id].append(-1)
                     seq_ignored[gg.track_id].append(False)
                     seq_ignored[gg.track_id].append(False)
 
 
-                if len(g) is 0:
+                if len(g) == 0:
                     cost_matrix = [[]]
                     cost_matrix = [[]]
                 # associate
                 # associate
                 association_matrix = hm.compute(cost_matrix)
                 association_matrix = hm.compute(cost_matrix)

+ 0 - 4
paddlex/ppdet/modeling/architectures/__init__.py

@@ -21,8 +21,6 @@ from . import jde
 from . import deepsort
 from . import deepsort
 from . import fairmot
 from . import fairmot
 from . import centernet
 from . import centernet
-from . import gfl
-from . import picodet
 from . import detr
 from . import detr
 from . import sparse_rcnn
 from . import sparse_rcnn
 
 
@@ -43,7 +41,5 @@ from .deepsort import *
 from .fairmot import *
 from .fairmot import *
 from .centernet import *
 from .centernet import *
 from .blazeface import *
 from .blazeface import *
-from .gfl import *
-from .picodet import *
 from .detr import *
 from .detr import *
 from .sparse_rcnn import *
 from .sparse_rcnn import *

+ 0 - 87
paddlex/ppdet/modeling/architectures/gfl.py

@@ -1,87 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle
-from paddlex.ppdet.core.workspace import register, create
-from .meta_arch import BaseArch
-
-__all__ = ['GFL']
-
-
-@register
-class GFL(BaseArch):
-    """
-    Generalized Focal Loss network, see https://arxiv.org/abs/2006.04388
-
-    Args:
-        backbone (object): backbone instance
-        neck (object): 'FPN' instance
-        head (object): 'GFLHead' instance
-    """
-
-    __category__ = 'architecture'
-
-    def __init__(self, backbone, neck, head='GFLHead'):
-        super(GFL, self).__init__()
-        self.backbone = backbone
-        self.neck = neck
-        self.head = head
-
-    @classmethod
-    def from_config(cls, cfg, *args, **kwargs):
-        backbone = create(cfg['backbone'])
-
-        kwargs = {'input_shape': backbone.out_shape}
-        neck = create(cfg['neck'], **kwargs)
-
-        kwargs = {'input_shape': neck.out_shape}
-        head = create(cfg['head'], **kwargs)
-
-        return {
-            'backbone': backbone,
-            'neck': neck,
-            "head": head,
-        }
-
-    def _forward(self):
-        body_feats = self.backbone(self.inputs)
-        fpn_feats = self.neck(body_feats)
-        head_outs = self.head(fpn_feats)
-        if not self.training:
-            im_shape = self.inputs['im_shape']
-            scale_factor = self.inputs['scale_factor']
-            bboxes, bbox_num = self.head.post_process(head_outs, im_shape,
-                                                      scale_factor)
-            return bboxes, bbox_num
-        else:
-            return head_outs
-
-    def get_loss(self, ):
-        loss = {}
-
-        head_outs = self._forward()
-        loss_gfl = self.head.get_loss(head_outs, self.inputs)
-        loss.update(loss_gfl)
-        total_loss = paddle.add_n(list(loss.values()))
-        loss.update({'loss': total_loss})
-        return loss
-
-    def get_pred(self):
-        bbox_pred, bbox_num = self._forward()
-        output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
-        return output

+ 4 - 5
paddlex/ppdet/modeling/architectures/keypoint_hrnet.py

@@ -41,20 +41,18 @@ class TopDownHRNet(BaseArch):
                  post_process='HRNetPostProcess',
                  post_process='HRNetPostProcess',
                  flip_perm=None,
                  flip_perm=None,
                  flip=True,
                  flip=True,
-                 shift_heatmap=True,
-                 use_dark=True):
+                 shift_heatmap=True):
         """
         """
-        HRNet network, see https://arxiv.org/abs/1902.09212
+        HRNnet network, see https://arxiv.org/abs/1902.09212
 
 
         Args:
         Args:
             backbone (nn.Layer): backbone instance
             backbone (nn.Layer): backbone instance
             post_process (object): `HRNetPostProcess` instance
             post_process (object): `HRNetPostProcess` instance
             flip_perm (list): The left-right joints exchange order list
             flip_perm (list): The left-right joints exchange order list
-            use_dark(bool): Whether to use DARK in post processing
         """
         """
         super(TopDownHRNet, self).__init__()
         super(TopDownHRNet, self).__init__()
         self.backbone = backbone
         self.backbone = backbone
-        self.post_process = HRNetPostProcess(use_dark)
+        self.post_process = HRNetPostProcess()
         self.loss = loss
         self.loss = loss
         self.flip_perm = flip_perm
         self.flip_perm = flip_perm
         self.flip = flip
         self.flip = flip
@@ -220,6 +218,7 @@ class HRNetPostProcess(object):
             preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
             preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
             maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
             maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
         """
         """
+
         coords, maxvals = self.get_max_preds(heatmaps)
         coords, maxvals = self.get_max_preds(heatmaps)
 
 
         heatmap_height = heatmaps.shape[2]
         heatmap_height = heatmaps.shape[2]

+ 0 - 91
paddlex/ppdet/modeling/architectures/picodet.py

@@ -1,91 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle
-from paddlex.ppdet.core.workspace import register, create
-from .meta_arch import BaseArch
-
-__all__ = ['PicoDet']
-
-
-@register
-class PicoDet(BaseArch):
-    """
-    Generalized Focal Loss network, see https://arxiv.org/abs/2006.04388
-
-    Args:
-        backbone (object): backbone instance
-        neck (object): 'FPN' instance
-        head (object): 'PicoHead' instance
-    """
-
-    __category__ = 'architecture'
-
-    def __init__(self, backbone, neck, head='PicoHead'):
-        super(PicoDet, self).__init__()
-        self.backbone = backbone
-        self.neck = neck
-        self.head = head
-        self.deploy = False
-
-    @classmethod
-    def from_config(cls, cfg, *args, **kwargs):
-        backbone = create(cfg['backbone'])
-
-        kwargs = {'input_shape': backbone.out_shape}
-        neck = create(cfg['neck'], **kwargs)
-
-        kwargs = {'input_shape': neck.out_shape}
-        head = create(cfg['head'], **kwargs)
-
-        return {
-            'backbone': backbone,
-            'neck': neck,
-            "head": head,
-        }
-
-    def _forward(self):
-        body_feats = self.backbone(self.inputs)
-        fpn_feats = self.neck(body_feats)
-        head_outs = self.head(fpn_feats)
-        if self.training or self.deploy:
-            return head_outs
-        else:
-            im_shape = self.inputs['im_shape']
-            scale_factor = self.inputs['scale_factor']
-            bboxes, bbox_num = self.head.post_process(head_outs, im_shape,
-                                                      scale_factor)
-            return bboxes, bbox_num
-
-    def get_loss(self, ):
-        loss = {}
-
-        head_outs = self._forward()
-        loss_gfl = self.head.get_loss(head_outs, self.inputs)
-        loss.update(loss_gfl)
-        total_loss = paddle.add_n(list(loss.values()))
-        loss.update({'loss': total_loss})
-        return loss
-
-    def get_pred(self):
-        if self.deploy:
-            return {'picodet': self._forward()[0]}
-        else:
-            bbox_pred, bbox_num = self._forward()
-            output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
-            return output

+ 0 - 4
paddlex/ppdet/modeling/backbones/__init__.py

@@ -18,13 +18,11 @@ from . import darknet
 from . import mobilenet_v1
 from . import mobilenet_v1
 from . import mobilenet_v3
 from . import mobilenet_v3
 from . import hrnet
 from . import hrnet
-from . import lite_hrnet
 from . import blazenet
 from . import blazenet
 from . import ghostnet
 from . import ghostnet
 from . import senet
 from . import senet
 from . import res2net
 from . import res2net
 from . import dla
 from . import dla
-from . import shufflenet_v2
 
 
 from .vgg import *
 from .vgg import *
 from .resnet import *
 from .resnet import *
@@ -32,10 +30,8 @@ from .darknet import *
 from .mobilenet_v1 import *
 from .mobilenet_v1 import *
 from .mobilenet_v3 import *
 from .mobilenet_v3 import *
 from .hrnet import *
 from .hrnet import *
-from .lite_hrnet import *
 from .blazenet import *
 from .blazenet import *
 from .ghostnet import *
 from .ghostnet import *
 from .senet import *
 from .senet import *
 from .res2net import *
 from .res2net import *
 from .dla import *
 from .dla import *
-from .shufflenet_v2 import *

+ 14 - 3
paddlex/ppdet/modeling/backbones/blazenet.py

@@ -55,14 +55,25 @@ class ConvBNLayer(nn.Layer):
             padding=padding,
             padding=padding,
             groups=num_groups,
             groups=num_groups,
             weight_attr=ParamAttr(
             weight_attr=ParamAttr(
-                learning_rate=conv_lr, initializer=KaimingNormal()),
+                learning_rate=conv_lr,
+                initializer=KaimingNormal(),
+                name=name + "_weights"),
             bias_attr=False)
             bias_attr=False)
 
 
+        param_attr = ParamAttr(name=name + "_bn_scale")
+        bias_attr = ParamAttr(name=name + "_bn_offset")
         if norm_type == 'sync_bn':
         if norm_type == 'sync_bn':
-            self._batch_norm = nn.SyncBatchNorm(out_channels)
+            self._batch_norm = nn.SyncBatchNorm(
+                out_channels, weight_attr=param_attr, bias_attr=bias_attr)
         else:
         else:
             self._batch_norm = nn.BatchNorm(
             self._batch_norm = nn.BatchNorm(
-                out_channels, act=None, use_global_stats=False)
+                out_channels,
+                act=None,
+                param_attr=param_attr,
+                bias_attr=bias_attr,
+                use_global_stats=False,
+                moving_mean_name=name + '_bn_mean',
+                moving_variance_name=name + '_bn_variance')
 
 
     def forward(self, x):
     def forward(self, x):
         x = self._conv(x)
         x = self._conv(x)

+ 10 - 4
paddlex/ppdet/modeling/backbones/ghostnet.py

@@ -100,15 +100,21 @@ class SEBlock(nn.Layer):
             num_channels,
             num_channels,
             med_ch,
             med_ch,
             weight_attr=ParamAttr(
             weight_attr=ParamAttr(
-                learning_rate=lr_mult, initializer=Uniform(-stdv, stdv)),
-            bias_attr=ParamAttr(learning_rate=lr_mult))
+                learning_rate=lr_mult,
+                initializer=Uniform(-stdv, stdv),
+                name=name + "_1_weights"),
+            bias_attr=ParamAttr(
+                learning_rate=lr_mult, name=name + "_1_offset"))
         stdv = 1.0 / math.sqrt(med_ch * 1.0)
         stdv = 1.0 / math.sqrt(med_ch * 1.0)
         self.excitation = Linear(
         self.excitation = Linear(
             med_ch,
             med_ch,
             num_channels,
             num_channels,
             weight_attr=ParamAttr(
             weight_attr=ParamAttr(
-                learning_rate=lr_mult, initializer=Uniform(-stdv, stdv)),
-            bias_attr=ParamAttr(learning_rate=lr_mult))
+                learning_rate=lr_mult,
+                initializer=Uniform(-stdv, stdv),
+                name=name + "_2_weights"),
+            bias_attr=ParamAttr(
+                learning_rate=lr_mult, name=name + "_2_offset"))
 
 
     def forward(self, inputs):
     def forward(self, inputs):
         pool = self.pool2d_gap(inputs)
         pool = self.pool2d_gap(inputs)

+ 19 - 7
paddlex/ppdet/modeling/backbones/hrnet.py

@@ -52,23 +52,31 @@ class ConvNormLayer(nn.Layer):
             stride=stride,
             stride=stride,
             padding=(filter_size - 1) // 2,
             padding=(filter_size - 1) // 2,
             groups=1,
             groups=1,
-            weight_attr=ParamAttr(initializer=Normal(
-                mean=0., std=0.01)),
+            weight_attr=ParamAttr(
+                name=name + "_weights", initializer=Normal(
+                    mean=0., std=0.01)),
             bias_attr=False)
             bias_attr=False)
 
 
         norm_lr = 0. if freeze_norm else 1.
         norm_lr = 0. if freeze_norm else 1.
 
 
+        norm_name = name + '_bn'
         param_attr = ParamAttr(
         param_attr = ParamAttr(
-            learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
+            name=norm_name + "_scale",
+            learning_rate=norm_lr,
+            regularizer=L2Decay(norm_decay))
         bias_attr = ParamAttr(
         bias_attr = ParamAttr(
-            learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
+            name=norm_name + "_offset",
+            learning_rate=norm_lr,
+            regularizer=L2Decay(norm_decay))
         global_stats = True if freeze_norm else False
         global_stats = True if freeze_norm else False
         if norm_type in ['bn', 'sync_bn']:
         if norm_type in ['bn', 'sync_bn']:
             self.norm = nn.BatchNorm(
             self.norm = nn.BatchNorm(
                 ch_out,
                 ch_out,
                 param_attr=param_attr,
                 param_attr=param_attr,
                 bias_attr=bias_attr,
                 bias_attr=bias_attr,
-                use_global_stats=global_stats)
+                use_global_stats=global_stats,
+                moving_mean_name=norm_name + '_mean',
+                moving_variance_name=norm_name + '_variance')
         elif norm_type == 'gn':
         elif norm_type == 'gn':
             self.norm = nn.GroupNorm(
             self.norm = nn.GroupNorm(
                 num_groups=norm_groups,
                 num_groups=norm_groups,
@@ -368,13 +376,17 @@ class SELayer(nn.Layer):
         self.squeeze = Linear(
         self.squeeze = Linear(
             num_channels,
             num_channels,
             med_ch,
             med_ch,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
+            weight_attr=ParamAttr(
+                initializer=Uniform(-stdv, stdv), name=name + "_sqz_weights"),
+            bias_attr=ParamAttr(name=name + '_sqz_offset'))
 
 
         stdv = 1.0 / math.sqrt(med_ch * 1.0)
         stdv = 1.0 / math.sqrt(med_ch * 1.0)
         self.excitation = Linear(
         self.excitation = Linear(
             med_ch,
             med_ch,
             num_filters,
             num_filters,
-            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))
+            weight_attr=ParamAttr(
+                initializer=Uniform(-stdv, stdv), name=name + "_exc_weights"),
+            bias_attr=ParamAttr(name=name + '_exc_offset'))
 
 
     def forward(self, input):
     def forward(self, input):
         pool = self.pool2d_gap(input)
         pool = self.pool2d_gap(input)

+ 0 - 886
paddlex/ppdet/modeling/backbones/lite_hrnet.py

@@ -1,886 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-
-from numbers import Integral
-from paddle import ParamAttr
-from paddle.regularizer import L2Decay
-from paddle.nn.initializer import Normal, Constant
-from paddlex.ppdet.core.workspace import register
-from paddlex.ppdet.modeling.shape_spec import ShapeSpec
-from paddlex.ppdet.modeling.ops import channel_shuffle
-from .. import layers as L
-
-__all__ = ['LiteHRNet']
-
-
-class ConvNormLayer(nn.Layer):
-    def __init__(self,
-                 ch_in,
-                 ch_out,
-                 filter_size,
-                 stride=1,
-                 groups=1,
-                 norm_type=None,
-                 norm_groups=32,
-                 norm_decay=0.,
-                 freeze_norm=False,
-                 act=None):
-        super(ConvNormLayer, self).__init__()
-        self.act = act
-        norm_lr = 0. if freeze_norm else 1.
-        if norm_type is not None:
-            assert (norm_type in [
-                'bn', 'sync_bn', 'gn'
-            ], "norm_type should be one of ['bn', 'sync_bn', 'gn'], but got {}".
-                    format(norm_type))
-            param_attr = ParamAttr(
-                initializer=Constant(1.0),
-                learning_rate=norm_lr,
-                regularizer=L2Decay(norm_decay), )
-            bias_attr = ParamAttr(
-                learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
-            global_stats = True if freeze_norm else False
-            if norm_type in ['bn', 'sync_bn']:
-                self.norm = nn.BatchNorm(
-                    ch_out,
-                    param_attr=param_attr,
-                    bias_attr=bias_attr,
-                    use_global_stats=global_stats, )
-            elif norm_type == 'gn':
-                self.norm = nn.GroupNorm(
-                    num_groups=norm_groups,
-                    num_channels=ch_out,
-                    weight_attr=param_attr,
-                    bias_attr=bias_attr)
-            norm_params = self.norm.parameters()
-            if freeze_norm:
-                for param in norm_params:
-                    param.stop_gradient = True
-            conv_bias_attr = False
-        else:
-            conv_bias_attr = True
-            self.norm = None
-
-        self.conv = nn.Conv2D(
-            in_channels=ch_in,
-            out_channels=ch_out,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(initializer=Normal(
-                mean=0., std=0.001)),
-            bias_attr=conv_bias_attr)
-
-    def forward(self, inputs):
-        out = self.conv(inputs)
-        if self.norm is not None:
-            out = self.norm(out)
-
-        if self.act == 'relu':
-            out = F.relu(out)
-        elif self.act == 'sigmoid':
-            out = F.sigmoid(out)
-        return out
-
-
-class DepthWiseSeparableConvNormLayer(nn.Layer):
-    def __init__(self,
-                 ch_in,
-                 ch_out,
-                 filter_size,
-                 stride=1,
-                 dw_norm_type=None,
-                 pw_norm_type=None,
-                 norm_decay=0.,
-                 freeze_norm=False,
-                 dw_act=None,
-                 pw_act=None):
-        super(DepthWiseSeparableConvNormLayer, self).__init__()
-        self.depthwise_conv = ConvNormLayer(
-            ch_in=ch_in,
-            ch_out=ch_in,
-            filter_size=filter_size,
-            stride=stride,
-            groups=ch_in,
-            norm_type=dw_norm_type,
-            act=dw_act,
-            norm_decay=norm_decay,
-            freeze_norm=freeze_norm, )
-        self.pointwise_conv = ConvNormLayer(
-            ch_in=ch_in,
-            ch_out=ch_out,
-            filter_size=1,
-            stride=1,
-            norm_type=pw_norm_type,
-            act=pw_act,
-            norm_decay=norm_decay,
-            freeze_norm=freeze_norm, )
-
-    def forward(self, x):
-        x = self.depthwise_conv(x)
-        x = self.pointwise_conv(x)
-        return x
-
-
-class CrossResolutionWeightingModule(nn.Layer):
-    def __init__(self,
-                 channels,
-                 ratio=16,
-                 norm_type='bn',
-                 freeze_norm=False,
-                 norm_decay=0.):
-        super(CrossResolutionWeightingModule, self).__init__()
-        self.channels = channels
-        total_channel = sum(channels)
-        self.conv1 = ConvNormLayer(
-            ch_in=total_channel,
-            ch_out=total_channel // ratio,
-            filter_size=1,
-            stride=1,
-            norm_type=norm_type,
-            act='relu',
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-        self.conv2 = ConvNormLayer(
-            ch_in=total_channel // ratio,
-            ch_out=total_channel,
-            filter_size=1,
-            stride=1,
-            norm_type=norm_type,
-            act='sigmoid',
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-
-    def forward(self, x):
-        mini_size = x[-1].shape[-2:]
-        out = [F.adaptive_avg_pool2d(s, mini_size) for s in x[:-1]] + [x[-1]]
-        out = paddle.concat(out, 1)
-        out = self.conv1(out)
-        out = self.conv2(out)
-        out = paddle.split(out, self.channels, 1)
-        out = [
-            s * F.interpolate(
-                a, s.shape[-2:], mode='nearest') for s, a in zip(x, out)
-        ]
-        return out
-
-
-class SpatialWeightingModule(nn.Layer):
-    def __init__(self, in_channel, ratio=16, freeze_norm=False, norm_decay=0.):
-        super(SpatialWeightingModule, self).__init__()
-        self.global_avgpooling = nn.AdaptiveAvgPool2D(1)
-        self.conv1 = ConvNormLayer(
-            ch_in=in_channel,
-            ch_out=in_channel // ratio,
-            filter_size=1,
-            stride=1,
-            act='relu',
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-        self.conv2 = ConvNormLayer(
-            ch_in=in_channel // ratio,
-            ch_out=in_channel,
-            filter_size=1,
-            stride=1,
-            act='sigmoid',
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-
-    def forward(self, x):
-        out = self.global_avgpooling(x)
-        out = self.conv1(out)
-        out = self.conv2(out)
-        return x * out
-
-
-class ConditionalChannelWeightingBlock(nn.Layer):
-    def __init__(self,
-                 in_channels,
-                 stride,
-                 reduce_ratio,
-                 norm_type='bn',
-                 freeze_norm=False,
-                 norm_decay=0.):
-        super(ConditionalChannelWeightingBlock, self).__init__()
-        assert stride in [1, 2]
-        branch_channels = [channel // 2 for channel in in_channels]
-
-        self.cross_resolution_weighting = CrossResolutionWeightingModule(
-            branch_channels,
-            ratio=reduce_ratio,
-            norm_type=norm_type,
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-        self.depthwise_convs = nn.LayerList([
-            ConvNormLayer(
-                channel,
-                channel,
-                filter_size=3,
-                stride=stride,
-                groups=channel,
-                norm_type=norm_type,
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay) for channel in branch_channels
-        ])
-
-        self.spatial_weighting = nn.LayerList([
-            SpatialWeightingModule(
-                channel,
-                ratio=4,
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay) for channel in branch_channels
-        ])
-
-    def forward(self, x):
-        x = [s.chunk(2, axis=1) for s in x]
-        x1 = [s[0] for s in x]
-        x2 = [s[1] for s in x]
-
-        x2 = self.cross_resolution_weighting(x2)
-        x2 = [dw(s) for s, dw in zip(x2, self.depthwise_convs)]
-        x2 = [sw(s) for s, sw in zip(x2, self.spatial_weighting)]
-
-        out = [paddle.concat([s1, s2], axis=1) for s1, s2 in zip(x1, x2)]
-        out = [channel_shuffle(s, groups=2) for s in out]
-        return out
-
-
-class ShuffleUnit(nn.Layer):
-    def __init__(self,
-                 in_channel,
-                 out_channel,
-                 stride,
-                 norm_type='bn',
-                 freeze_norm=False,
-                 norm_decay=0.):
-        super(ShuffleUnit, self).__init__()
-        branch_channel = out_channel // 2
-        stride = self.stride
-        if self.stride == 1:
-            assert (
-                in_channel == branch_channel * 2,
-                "when stride=1, in_channel {} should equal to branch_channel*2 {}"
-                .format(in_channel, branch_channel * 2))
-        if stride > 1:
-            self.branch1 = nn.Sequential(
-                ConvNormLayer(
-                    ch_in=in_channel,
-                    ch_out=in_channel,
-                    filter_size=3,
-                    stride=self.stride,
-                    groups=in_channel,
-                    norm_type=norm_type,
-                    freeze_norm=freeze_norm,
-                    norm_decay=norm_decay),
-                ConvNormLayer(
-                    ch_in=in_channel,
-                    ch_out=branch_channel,
-                    filter_size=1,
-                    stride=1,
-                    norm_type=norm_type,
-                    act='relu',
-                    freeze_norm=freeze_norm,
-                    norm_decay=norm_decay), )
-        self.branch2 = nn.Sequential(
-            ConvNormLayer(
-                ch_in=branch_channel if stride == 1 else in_channel,
-                ch_out=branch_channel,
-                filter_size=1,
-                stride=1,
-                norm_type=norm_type,
-                act='relu',
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay),
-            ConvNormLayer(
-                ch_in=branch_channel,
-                ch_out=branch_channel,
-                filter_size=3,
-                stride=self.stride,
-                groups=branch_channel,
-                norm_type=norm_type,
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay),
-            ConvNormLayer(
-                ch_in=branch_channel,
-                ch_out=branch_channel,
-                filter_size=1,
-                stride=1,
-                norm_type=norm_type,
-                act='relu',
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay), )
-
-    def forward(self, x):
-        if self.stride > 1:
-            x1 = self.branch1(x)
-            x2 = self.branch2(x)
-        else:
-            x1, x2 = x.chunk(2, axis=1)
-            x2 = self.branch2(x2)
-        out = paddle.concat([x1, x2], axis=1)
-        out = channel_shuffle(out, groups=2)
-        return out
-
-
-class IterativeHead(nn.Layer):
-    def __init__(self,
-                 in_channels,
-                 norm_type='bn',
-                 freeze_norm=False,
-                 norm_decay=0.):
-        super(IterativeHead, self).__init__()
-        num_branches = len(in_channels)
-        self.in_channels = in_channels[::-1]
-
-        projects = []
-        for i in range(num_branches):
-            if i != num_branches - 1:
-                projects.append(
-                    DepthWiseSeparableConvNormLayer(
-                        ch_in=self.in_channels[i],
-                        ch_out=self.in_channels[i + 1],
-                        filter_size=3,
-                        stride=1,
-                        dw_act=None,
-                        pw_act='relu',
-                        dw_norm_type=norm_type,
-                        pw_norm_type=norm_type,
-                        freeze_norm=freeze_norm,
-                        norm_decay=norm_decay))
-            else:
-                projects.append(
-                    DepthWiseSeparableConvNormLayer(
-                        ch_in=self.in_channels[i],
-                        ch_out=self.in_channels[i],
-                        filter_size=3,
-                        stride=1,
-                        dw_act=None,
-                        pw_act='relu',
-                        dw_norm_type=norm_type,
-                        pw_norm_type=norm_type,
-                        freeze_norm=freeze_norm,
-                        norm_decay=norm_decay))
-        self.projects = nn.LayerList(projects)
-
-    def forward(self, x):
-        x = x[::-1]
-        y = []
-        last_x = None
-        for i, s in enumerate(x):
-            if last_x is not None:
-                last_x = F.interpolate(
-                    last_x,
-                    size=s.shape[-2:],
-                    mode='bilinear',
-                    align_corners=True)
-                s = s + last_x
-            s = self.projects[i](s)
-            y.append(s)
-            last_x = s
-
-        return y[::-1]
-
-
-class Stem(nn.Layer):
-    def __init__(self,
-                 in_channel,
-                 stem_channel,
-                 out_channel,
-                 expand_ratio,
-                 norm_type='bn',
-                 freeze_norm=False,
-                 norm_decay=0.):
-        super(Stem, self).__init__()
-        self.conv1 = ConvNormLayer(
-            in_channel,
-            stem_channel,
-            filter_size=3,
-            stride=2,
-            norm_type=norm_type,
-            act='relu',
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-        mid_channel = int(round(stem_channel * expand_ratio))
-        branch_channel = stem_channel // 2
-        if stem_channel == out_channel:
-            inc_channel = out_channel - branch_channel
-        else:
-            inc_channel = out_channel - stem_channel
-        self.branch1 = nn.Sequential(
-            ConvNormLayer(
-                ch_in=branch_channel,
-                ch_out=branch_channel,
-                filter_size=3,
-                stride=2,
-                groups=branch_channel,
-                norm_type=norm_type,
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay),
-            ConvNormLayer(
-                ch_in=branch_channel,
-                ch_out=inc_channel,
-                filter_size=1,
-                stride=1,
-                norm_type=norm_type,
-                act='relu',
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay), )
-        self.expand_conv = ConvNormLayer(
-            ch_in=branch_channel,
-            ch_out=mid_channel,
-            filter_size=1,
-            stride=1,
-            norm_type=norm_type,
-            act='relu',
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-        self.depthwise_conv = ConvNormLayer(
-            ch_in=mid_channel,
-            ch_out=mid_channel,
-            filter_size=3,
-            stride=2,
-            groups=mid_channel,
-            norm_type=norm_type,
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-        self.linear_conv = ConvNormLayer(
-            ch_in=mid_channel,
-            ch_out=branch_channel
-            if stem_channel == out_channel else stem_channel,
-            filter_size=1,
-            stride=1,
-            norm_type=norm_type,
-            act='relu',
-            freeze_norm=freeze_norm,
-            norm_decay=norm_decay)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x1, x2 = x.chunk(2, axis=1)
-        x1 = self.branch1(x1)
-        x2 = self.expand_conv(x2)
-        x2 = self.depthwise_conv(x2)
-        x2 = self.linear_conv(x2)
-        out = paddle.concat([x1, x2], axis=1)
-        out = channel_shuffle(out, groups=2)
-
-        return out
-
-
-class LiteHRNetModule(nn.Layer):
-    def __init__(self,
-                 num_branches,
-                 num_blocks,
-                 in_channels,
-                 reduce_ratio,
-                 module_type,
-                 multiscale_output=False,
-                 with_fuse=True,
-                 norm_type='bn',
-                 freeze_norm=False,
-                 norm_decay=0.):
-        super(LiteHRNetModule, self).__init__()
-        assert (num_branches == len(in_channels),
-                "num_branches {} should equal to num_in_channels {}"
-                .format(num_branches, len(in_channels)))
-        assert (module_type in ['LITE', 'NAIVE'],
-                "module_type should be one of ['LITE', 'NAIVE']")
-        self.num_branches = num_branches
-        self.in_channels = in_channels
-        self.multiscale_output = multiscale_output
-        self.with_fuse = with_fuse
-        self.norm_type = 'bn'
-        self.module_type = module_type
-
-        if self.module_type == 'LITE':
-            self.layers = self._make_weighting_blocks(
-                num_blocks,
-                reduce_ratio,
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay)
-        elif self.module_type == 'NAIVE':
-            self.layers = self._make_naive_branches(
-                num_branches,
-                num_blocks,
-                freeze_norm=freeze_norm,
-                norm_decay=norm_decay)
-
-        if self.with_fuse:
-            self.fuse_layers = self._make_fuse_layers(
-                freeze_norm=freeze_norm, norm_decay=norm_decay)
-            self.relu = nn.ReLU()
-
-    def _make_weighting_blocks(self,
-                               num_blocks,
-                               reduce_ratio,
-                               stride=1,
-                               freeze_norm=False,
-                               norm_decay=0.):
-        layers = []
-        for i in range(num_blocks):
-            layers.append(
-                ConditionalChannelWeightingBlock(
-                    self.in_channels,
-                    stride=stride,
-                    reduce_ratio=reduce_ratio,
-                    norm_type=self.norm_type,
-                    freeze_norm=freeze_norm,
-                    norm_decay=norm_decay))
-        return nn.Sequential(*layers)
-
-    def _make_naive_branchs(self,
-                            num_branches,
-                            num_blocks,
-                            freeze_norm=False,
-                            norm_decay=0.):
-        branches = []
-        for branch_idx in range(num_branches):
-            layers = []
-            for i in range(num_blocks):
-                layers.append(
-                    ShuffleUnit(
-                        self.in_channels[branch_idx],
-                        self.in_channels[branch_idx],
-                        stride=1,
-                        norm_type=self.norm_type,
-                        freeze_norm=freeze_norm,
-                        norm_decay=norm_decay))
-            branches.append(nn.Sequential(*layers))
-        return nn.LayerList(branches)
-
-    def _make_fuse_layers(self, freeze_norm=False, norm_decay=0.):
-        if self.num_branches == 1:
-            return None
-        fuse_layers = []
-        num_out_branches = self.num_branches if self.multiscale_output else 1
-        for i in range(num_out_branches):
-            fuse_layer = []
-            for j in range(self.num_branches):
-                if j > i:
-                    fuse_layer.append(
-                        nn.Sequential(
-                            L.Conv2d(
-                                self.in_channels[j],
-                                self.in_channels[i],
-                                kernel_size=1,
-                                stride=1,
-                                padding=0,
-                                bias=False, ),
-                            nn.BatchNorm(self.in_channels[i]),
-                            nn.Upsample(
-                                scale_factor=2**(j - i), mode='nearest')))
-                elif j == i:
-                    fuse_layer.append(None)
-                else:
-                    conv_downsamples = []
-                    for k in range(i - j):
-                        if k == i - j - 1:
-                            conv_downsamples.append(
-                                nn.Sequential(
-                                    L.Conv2d(
-                                        self.in_channels[j],
-                                        self.in_channels[j],
-                                        kernel_size=3,
-                                        stride=2,
-                                        padding=1,
-                                        groups=self.in_channels[j],
-                                        bias=False, ),
-                                    nn.BatchNorm(self.in_channels[j]),
-                                    L.Conv2d(
-                                        self.in_channels[j],
-                                        self.in_channels[i],
-                                        kernel_size=1,
-                                        stride=1,
-                                        padding=0,
-                                        bias=False, ),
-                                    nn.BatchNorm(self.in_channels[i])))
-                        else:
-                            conv_downsamples.append(
-                                nn.Sequential(
-                                    L.Conv2d(
-                                        self.in_channels[j],
-                                        self.in_channels[j],
-                                        kernel_size=3,
-                                        stride=2,
-                                        padding=1,
-                                        groups=self.in_channels[j],
-                                        bias=False, ),
-                                    nn.BatchNorm(self.in_channels[j]),
-                                    L.Conv2d(
-                                        self.in_channels[j],
-                                        self.in_channels[j],
-                                        kernel_size=1,
-                                        stride=1,
-                                        padding=0,
-                                        bias=False, ),
-                                    nn.BatchNorm(self.in_channels[j]),
-                                    nn.ReLU()))
-
-                    fuse_layer.append(nn.Sequential(*conv_downsamples))
-            fuse_layers.append(nn.LayerList(fuse_layer))
-
-        return nn.LayerList(fuse_layers)
-
-    def forward(self, x):
-        if self.num_branches == 1:
-            return [self.layers[0](x[0])]
-        if self.module_type == 'LITE':
-            out = self.layers(x)
-        elif self.module_type == 'NAIVE':
-            for i in range(self.num_branches):
-                x[i] = self.layers(x[i])
-            out = x
-        if self.with_fuse:
-            out_fuse = []
-            for i in range(len(self.fuse_layers)):
-                y = out[0] if i == 0 else self.fuse_layers[i][0](out[0])
-                for j in range(self.num_branches):
-                    if i == j:
-                        y += out[j]
-                    else:
-                        y += self.fuse_layers[i][j](out[j])
-                    if i == 0:
-                        out[i] = y
-                out_fuse.append(self.relu(y))
-            out = out_fuse
-        elif not self.multiscale_output:
-            out = [out[0]]
-        return out
-
-
-@register
-class LiteHRNet(nn.Layer):
-    """
-    @inproceedings{Yulitehrnet21,
-    title={Lite-HRNet: A Lightweight High-Resolution Network},
-        author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
-        booktitle={CVPR},year={2021}
-    }
-    Args:
-        network_type (str): the network_type should be one of ["lite_18", "lite_30", "naive", "wider_naive"],
-            "naive": Simply combining the shuffle block in ShuffleNet and the highresolution design pattern in HRNet.
-            "wider_naive": Naive network with wider channels in each block.
-            "lite_18": Lite-HRNet-18, which replaces the pointwise convolution in a shuffle block by conditional channel weighting.
-            "lite_30": Lite-HRNet-30, with more blocks compared with Lite-HRNet-18.
-        freeze_at (int): the stage to freeze
-        freeze_norm (bool): whether to freeze norm in HRNet
-        norm_decay (float): weight decay for normalization layer weights
-        return_idx (List): the stage to return
-    """
-
-    def __init__(self,
-                 network_type,
-                 freeze_at=0,
-                 freeze_norm=True,
-                 norm_decay=0.,
-                 return_idx=[0, 1, 2, 3]):
-        super(LiteHRNet, self).__init__()
-        if isinstance(return_idx, Integral):
-            return_idx = [return_idx]
-        assert (
-            network_type in ["lite_18", "lite_30", "naive", "wider_naive"],
-            "the network_type should be one of [lite_18, lite_30, naive, wider_naive]"
-        )
-        assert len(return_idx) > 0, "need one or more return index"
-        self.freeze_at = freeze_at
-        self.freeze_norm = freeze_norm
-        self.norm_decay = norm_decay
-        self.return_idx = return_idx
-        self.norm_type = 'bn'
-
-        self.module_configs = {
-            "lite_18": {
-                "num_modules": [2, 4, 2],
-                "num_branches": [2, 3, 4],
-                "num_blocks": [2, 2, 2],
-                "module_type": ["LITE", "LITE", "LITE"],
-                "reduce_ratios": [8, 8, 8],
-                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
-            },
-            "lite_30": {
-                "num_modules": [3, 8, 3],
-                "num_branches": [2, 3, 4],
-                "num_blocks": [2, 2, 2],
-                "module_type": ["LITE", "LITE", "LITE"],
-                "reduce_ratios": [8, 8, 8],
-                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
-            },
-            "naive": {
-                "num_modules": [2, 4, 2],
-                "num_branches": [2, 3, 4],
-                "num_blocks": [2, 2, 2],
-                "module_type": ["NAIVE", "NAIVE", "NAIVE"],
-                "reduce_ratios": [1, 1, 1],
-                "num_channels": [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
-            },
-            "wider_naive": {
-                "num_modules": [2, 4, 2],
-                "num_branches": [2, 3, 4],
-                "num_blocks": [2, 2, 2],
-                "module_type": ["NAIVE", "NAIVE", "NAIVE"],
-                "reduce_ratios": [1, 1, 1],
-                "num_channels": [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
-            },
-        }
-
-        self.stages_config = self.module_configs[network_type]
-
-        self.stem = Stem(3, 32, 32, 1)
-        num_channels_pre_layer = [32]
-        for stage_idx in range(3):
-            num_channels = self.stages_config["num_channels"][stage_idx]
-            setattr(self, 'transition{}'.format(stage_idx),
-                    self._make_transition_layer(num_channels_pre_layer,
-                                                num_channels, self.freeze_norm,
-                                                self.norm_decay))
-            stage, num_channels_pre_layer = self._make_stage(
-                self.stages_config, stage_idx, num_channels, True,
-                self.freeze_norm, self.norm_decay)
-            setattr(self, 'stage{}'.format(stage_idx), stage)
-        self.head_layer = IterativeHead(num_channels_pre_layer, 'bn',
-                                        self.freeze_norm, self.norm_decay)
-
-    def _make_transition_layer(self,
-                               num_channels_pre_layer,
-                               num_channels_cur_layer,
-                               freeze_norm=False,
-                               norm_decay=0.):
-        num_branches_pre = len(num_channels_pre_layer)
-        num_branches_cur = len(num_channels_cur_layer)
-        transition_layers = []
-        for i in range(num_branches_cur):
-            if i < num_branches_pre:
-                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
-                    transition_layers.append(
-                        nn.Sequential(
-                            L.Conv2d(
-                                num_channels_pre_layer[i],
-                                num_channels_pre_layer[i],
-                                kernel_size=3,
-                                stride=1,
-                                padding=1,
-                                groups=num_channels_pre_layer[i],
-                                bias=False),
-                            nn.BatchNorm(num_channels_pre_layer[i]),
-                            L.Conv2d(
-                                num_channels_pre_layer[i],
-                                num_channels_cur_layer[i],
-                                kernel_size=1,
-                                stride=1,
-                                padding=0,
-                                bias=False, ),
-                            nn.BatchNorm(num_channels_cur_layer[i]),
-                            nn.ReLU()))
-                else:
-                    transition_layers.append(None)
-            else:
-                conv_downsamples = []
-                for j in range(i + 1 - num_branches_pre):
-                    conv_downsamples.append(
-                        nn.Sequential(
-                            L.Conv2d(
-                                num_channels_pre_layer[-1],
-                                num_channels_pre_layer[-1],
-                                groups=num_channels_pre_layer[-1],
-                                kernel_size=3,
-                                stride=2,
-                                padding=1,
-                                bias=False, ),
-                            nn.BatchNorm(num_channels_pre_layer[-1]),
-                            L.Conv2d(
-                                num_channels_pre_layer[-1],
-                                num_channels_cur_layer[i]
-                                if j == i - num_branches_pre else
-                                num_channels_pre_layer[-1],
-                                kernel_size=1,
-                                stride=1,
-                                padding=0,
-                                bias=False, ),
-                            nn.BatchNorm(num_channels_cur_layer[i]
-                                         if j == i - num_branches_pre else
-                                         num_channels_pre_layer[-1]),
-                            nn.ReLU()))
-                transition_layers.append(nn.Sequential(*conv_downsamples))
-        return nn.LayerList(transition_layers)
-
-    def _make_stage(self,
-                    stages_config,
-                    stage_idx,
-                    in_channels,
-                    multiscale_output,
-                    freeze_norm=False,
-                    norm_decay=0.):
-        num_modules = stages_config["num_modules"][stage_idx]
-        num_branches = stages_config["num_branches"][stage_idx]
-        num_blocks = stages_config["num_blocks"][stage_idx]
-        reduce_ratio = stages_config['reduce_ratios'][stage_idx]
-        module_type = stages_config['module_type'][stage_idx]
-
-        modules = []
-        for i in range(num_modules):
-            if not multiscale_output and i == num_modules - 1:
-                reset_multiscale_output = False
-            else:
-                reset_multiscale_output = True
-            modules.append(
-                LiteHRNetModule(
-                    num_branches,
-                    num_blocks,
-                    in_channels,
-                    reduce_ratio,
-                    module_type,
-                    multiscale_output=reset_multiscale_output,
-                    with_fuse=True,
-                    freeze_norm=freeze_norm,
-                    norm_decay=norm_decay))
-            in_channels = modules[-1].in_channels
-        return nn.Sequential(*modules), in_channels
-
-    def forward(self, inputs):
-        x = inputs['image']
-        x = self.stem(x)
-        y_list = [x]
-        for stage_idx in range(3):
-            x_list = []
-            transition = getattr(self, 'transition{}'.format(stage_idx))
-            for j in range(self.stages_config["num_branches"][stage_idx]):
-                if transition[j] is not None:
-                    if j >= len(y_list):
-                        x_list.append(transition[j](y_list[-1]))
-                    else:
-                        x_list.append(transition[j](y_list[j]))
-                else:
-                    x_list.append(y_list[j])
-            y_list = getattr(self, 'stage{}'.format(stage_idx))(x_list)
-        x = self.head_layer(y_list)
-        res = []
-        for i, layer in enumerate(x):
-            if i == self.freeze_at:
-                layer.stop_gradient = True
-            if i in self.return_idx:
-                res.append(layer)
-        return res
-
-    @property
-    def out_shape(self):
-        return [
-            ShapeSpec(
-                channels=self._out_channels[i], stride=self._out_strides[i])
-            for i in self.return_idx
-        ]

+ 20 - 6
paddlex/ppdet/modeling/backbones/mobilenet_v3.py

@@ -62,17 +62,21 @@ class ConvBNLayer(nn.Layer):
             padding=padding,
             padding=padding,
             groups=num_groups,
             groups=num_groups,
             weight_attr=ParamAttr(
             weight_attr=ParamAttr(
-                learning_rate=lr_mult, regularizer=L2Decay(conv_decay)),
+                learning_rate=lr_mult,
+                regularizer=L2Decay(conv_decay),
+                name=name + "_weights"),
             bias_attr=False)
             bias_attr=False)
 
 
         norm_lr = 0. if freeze_norm else lr_mult
         norm_lr = 0. if freeze_norm else lr_mult
         param_attr = ParamAttr(
         param_attr = ParamAttr(
             learning_rate=norm_lr,
             learning_rate=norm_lr,
             regularizer=L2Decay(norm_decay),
             regularizer=L2Decay(norm_decay),
+            name=name + "_bn_scale",
             trainable=False if freeze_norm else True)
             trainable=False if freeze_norm else True)
         bias_attr = ParamAttr(
         bias_attr = ParamAttr(
             learning_rate=norm_lr,
             learning_rate=norm_lr,
             regularizer=L2Decay(norm_decay),
             regularizer=L2Decay(norm_decay),
+            name=name + "_bn_offset",
             trainable=False if freeze_norm else True)
             trainable=False if freeze_norm else True)
         global_stats = True if freeze_norm else False
         global_stats = True if freeze_norm else False
         if norm_type == 'sync_bn':
         if norm_type == 'sync_bn':
@@ -84,7 +88,9 @@ class ConvBNLayer(nn.Layer):
                 act=None,
                 act=None,
                 param_attr=param_attr,
                 param_attr=param_attr,
                 bias_attr=bias_attr,
                 bias_attr=bias_attr,
-                use_global_stats=global_stats)
+                use_global_stats=global_stats,
+                moving_mean_name=name + '_bn_mean',
+                moving_variance_name=name + '_bn_variance')
         norm_params = self.bn.parameters()
         norm_params = self.bn.parameters()
         if freeze_norm:
         if freeze_norm:
             for param in norm_params:
             for param in norm_params:
@@ -197,9 +203,13 @@ class SEModule(nn.Layer):
             stride=1,
             stride=1,
             padding=0,
             padding=0,
             weight_attr=ParamAttr(
             weight_attr=ParamAttr(
-                learning_rate=lr_mult, regularizer=L2Decay(conv_decay)),
+                learning_rate=lr_mult,
+                regularizer=L2Decay(conv_decay),
+                name=name + "_1_weights"),
             bias_attr=ParamAttr(
             bias_attr=ParamAttr(
-                learning_rate=lr_mult, regularizer=L2Decay(conv_decay)))
+                learning_rate=lr_mult,
+                regularizer=L2Decay(conv_decay),
+                name=name + "_1_offset"))
         self.conv2 = nn.Conv2D(
         self.conv2 = nn.Conv2D(
             in_channels=mid_channels,
             in_channels=mid_channels,
             out_channels=channel,
             out_channels=channel,
@@ -207,9 +217,13 @@ class SEModule(nn.Layer):
             stride=1,
             stride=1,
             padding=0,
             padding=0,
             weight_attr=ParamAttr(
             weight_attr=ParamAttr(
-                learning_rate=lr_mult, regularizer=L2Decay(conv_decay)),
+                learning_rate=lr_mult,
+                regularizer=L2Decay(conv_decay),
+                name=name + "_2_weights"),
             bias_attr=ParamAttr(
             bias_attr=ParamAttr(
-                learning_rate=lr_mult, regularizer=L2Decay(conv_decay)))
+                learning_rate=lr_mult,
+                regularizer=L2Decay(conv_decay),
+                name=name + "_2_offset"))
 
 
     def forward(self, inputs):
     def forward(self, inputs):
         outputs = self.avg_pool(inputs)
         outputs = self.avg_pool(inputs)

+ 0 - 262
paddlex/ppdet/modeling/backbones/shufflenet_v2.py

@@ -1,262 +0,0 @@
-# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import paddle
-import paddle.nn as nn
-from paddle import ParamAttr
-from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm
-from paddle.nn.initializer import KaimingNormal
-
-from paddlex.ppdet.core.workspace import register, serializable
-from numbers import Integral
-from ..shape_spec import ShapeSpec
-from paddlex.ppdet.modeling.ops import channel_shuffle
-
-__all__ = ['ShuffleNetV2']
-
-
-class ConvBNLayer(nn.Layer):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride,
-                 padding,
-                 groups=1,
-                 act=None):
-        super(ConvBNLayer, self).__init__()
-        self._conv = Conv2D(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            weight_attr=ParamAttr(initializer=KaimingNormal()),
-            bias_attr=False)
-
-        self._batch_norm = BatchNorm(out_channels, act=act)
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-
-
-class InvertedResidual(nn.Layer):
-    def __init__(self, in_channels, out_channels, stride, act="relu"):
-        super(InvertedResidual, self).__init__()
-        self._conv_pw = ConvBNLayer(
-            in_channels=in_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act)
-        self._conv_dw = ConvBNLayer(
-            in_channels=out_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=out_channels // 2,
-            act=None)
-        self._conv_linear = ConvBNLayer(
-            in_channels=out_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act)
-
-    def forward(self, inputs):
-        x1, x2 = paddle.split(
-            inputs,
-            num_or_sections=[inputs.shape[1] // 2, inputs.shape[1] // 2],
-            axis=1)
-        x2 = self._conv_pw(x2)
-        x2 = self._conv_dw(x2)
-        x2 = self._conv_linear(x2)
-        out = paddle.concat([x1, x2], axis=1)
-        return channel_shuffle(out, 2)
-
-
-class InvertedResidualDS(nn.Layer):
-    def __init__(self, in_channels, out_channels, stride, act="relu"):
-        super(InvertedResidualDS, self).__init__()
-
-        # branch1
-        self._conv_dw_1 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=in_channels,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=in_channels,
-            act=None)
-        self._conv_linear_1 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act)
-        # branch2
-        self._conv_pw_2 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act)
-        self._conv_dw_2 = ConvBNLayer(
-            in_channels=out_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=3,
-            stride=stride,
-            padding=1,
-            groups=out_channels // 2,
-            act=None)
-        self._conv_linear_2 = ConvBNLayer(
-            in_channels=out_channels // 2,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            act=act)
-
-    def forward(self, inputs):
-        x1 = self._conv_dw_1(inputs)
-        x1 = self._conv_linear_1(x1)
-        x2 = self._conv_pw_2(inputs)
-        x2 = self._conv_dw_2(x2)
-        x2 = self._conv_linear_2(x2)
-        out = paddle.concat([x1, x2], axis=1)
-
-        return channel_shuffle(out, 2)
-
-
-@register
-@serializable
-class ShuffleNetV2(nn.Layer):
-    def __init__(self,
-                 scale=1.0,
-                 act="relu",
-                 feature_maps=[5, 13, 17],
-                 with_last_conv=False):
-        super(ShuffleNetV2, self).__init__()
-        self.scale = scale
-        self.with_last_conv = with_last_conv
-        if isinstance(feature_maps, Integral):
-            feature_maps = [feature_maps]
-        self.feature_maps = feature_maps
-        stage_repeats = [4, 8, 4]
-
-        if scale == 0.25:
-            stage_out_channels = [-1, 24, 24, 48, 96, 512]
-        elif scale == 0.33:
-            stage_out_channels = [-1, 24, 32, 64, 128, 512]
-        elif scale == 0.5:
-            stage_out_channels = [-1, 24, 48, 96, 192, 1024]
-        elif scale == 1.0:
-            stage_out_channels = [-1, 24, 116, 232, 464, 1024]
-        elif scale == 1.5:
-            stage_out_channels = [-1, 24, 176, 352, 704, 1024]
-        elif scale == 2.0:
-            stage_out_channels = [-1, 24, 224, 488, 976, 2048]
-        else:
-            raise NotImplementedError("This scale size:[" + str(scale) +
-                                      "] is not implemented!")
-
-        self._out_channels = []
-        self._feature_idx = 0
-        # 1. conv1
-        self._conv1 = ConvBNLayer(
-            in_channels=3,
-            out_channels=stage_out_channels[1],
-            kernel_size=3,
-            stride=2,
-            padding=1,
-            act=act)
-        self._max_pool = MaxPool2D(kernel_size=3, stride=2, padding=1)
-        self._feature_idx += 1
-
-        # 2. bottleneck sequences
-        self._block_list = []
-        for stage_id, num_repeat in enumerate(stage_repeats):
-            for i in range(num_repeat):
-                if i == 0:
-                    block = self.add_sublayer(
-                        name=str(stage_id + 2) + '_' + str(i + 1),
-                        sublayer=InvertedResidualDS(
-                            in_channels=stage_out_channels[stage_id + 1],
-                            out_channels=stage_out_channels[stage_id + 2],
-                            stride=2,
-                            act=act))
-                else:
-                    block = self.add_sublayer(
-                        name=str(stage_id + 2) + '_' + str(i + 1),
-                        sublayer=InvertedResidual(
-                            in_channels=stage_out_channels[stage_id + 2],
-                            out_channels=stage_out_channels[stage_id + 2],
-                            stride=1,
-                            act=act))
-                self._block_list.append(block)
-                self._feature_idx += 1
-                self._update_out_channels(stage_out_channels[stage_id + 2],
-                                          self._feature_idx, self.feature_maps)
-
-        if self.with_last_conv:
-            # last_conv
-            self._last_conv = ConvBNLayer(
-                in_channels=stage_out_channels[-2],
-                out_channels=stage_out_channels[-1],
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                act=act)
-            self._feature_idx += 1
-            self._update_out_channels(stage_out_channels[-1],
-                                      self._feature_idx, self.feature_maps)
-
-    def _update_out_channels(self, channel, feature_idx, feature_maps):
-        if feature_idx in feature_maps:
-            self._out_channels.append(channel)
-
-    def forward(self, inputs):
-        y = self._conv1(inputs['image'])
-        y = self._max_pool(y)
-        outs = []
-        for i, inv in enumerate(self._block_list):
-            y = inv(y)
-            if i + 2 in self.feature_maps:
-                outs.append(y)
-
-        if self.with_last_conv:
-            y = self._last_conv(y)
-            outs.append(y)
-        return outs
-
-    @property
-    def out_shape(self):
-        return [ShapeSpec(channels=c) for c in self._out_channels]

+ 7 - 2
paddlex/ppdet/modeling/backbones/vgg.py

@@ -30,7 +30,9 @@ class ConvBlock(nn.Layer):
             out_channels=out_channels,
             out_channels=out_channels,
             kernel_size=3,
             kernel_size=3,
             stride=1,
             stride=1,
-            padding=1)
+            padding=1,
+            weight_attr=ParamAttr(name=name + "1_weights"),
+            bias_attr=ParamAttr(name=name + "1_bias"))
         self.conv_out_list = []
         self.conv_out_list = []
         for i in range(1, groups):
         for i in range(1, groups):
             conv_out = self.add_sublayer(
             conv_out = self.add_sublayer(
@@ -40,7 +42,10 @@ class ConvBlock(nn.Layer):
                     out_channels=out_channels,
                     out_channels=out_channels,
                     kernel_size=3,
                     kernel_size=3,
                     stride=1,
                     stride=1,
-                    padding=1))
+                    padding=1,
+                    weight_attr=ParamAttr(
+                        name=name + "{}_weights".format(i + 1)),
+                    bias_attr=ParamAttr(name=name + "{}_bias".format(i + 1))))
             self.conv_out_list.append(conv_out)
             self.conv_out_list.append(conv_out)
 
 
         self.pool = MaxPool2D(
         self.pool = MaxPool2D(

+ 1 - 45
paddlex/ppdet/modeling/bbox_utils.py

@@ -100,7 +100,7 @@ def clip_bbox(boxes, im_shape):
 def nonempty_bbox(boxes, min_size=0, return_mask=False):
 def nonempty_bbox(boxes, min_size=0, return_mask=False):
     w = boxes[:, 2] - boxes[:, 0]
     w = boxes[:, 2] - boxes[:, 0]
     h = boxes[:, 3] - boxes[:, 1]
     h = boxes[:, 3] - boxes[:, 1]
-    mask = paddle.logical_and(h > min_size, w > min_size)
+    mask = paddle.logical_and(w > min_size, w > min_size)
     if return_mask:
     if return_mask:
         return mask
         return mask
     keep = paddle.nonzero(mask).flatten()
     keep = paddle.nonzero(mask).flatten()
@@ -604,47 +604,3 @@ def bbox_iou_np_expand(box1, box2, x1y1x2y2=True, eps=1e-16):
 
 
     ious = inter_area / (b1_area + b2_area - inter_area + eps)
     ious = inter_area / (b1_area + b2_area - inter_area + eps)
     return ious
     return ious
-
-
-def bbox2distance(points, bbox, max_dis=None, eps=0.1):
-    """Decode bounding box based on distances.
-    Args:
-        points (Tensor): Shape (n, 2), [x, y].
-        bbox (Tensor): Shape (n, 4), "xyxy" format
-        max_dis (float): Upper bound of the distance.
-        eps (float): a small value to ensure target < max_dis, instead <=
-    Returns:
-        Tensor: Decoded distances.
-    """
-    left = points[:, 0] - bbox[:, 0]
-    top = points[:, 1] - bbox[:, 1]
-    right = bbox[:, 2] - points[:, 0]
-    bottom = bbox[:, 3] - points[:, 1]
-    if max_dis is not None:
-        left = left.clip(min=0, max=max_dis - eps)
-        top = top.clip(min=0, max=max_dis - eps)
-        right = right.clip(min=0, max=max_dis - eps)
-        bottom = bottom.clip(min=0, max=max_dis - eps)
-    return paddle.stack([left, top, right, bottom], -1)
-
-
-def distance2bbox(points, distance, max_shape=None):
-    """Decode distance prediction to bounding box.
-        Args:
-            points (Tensor): Shape (n, 2), [x, y].
-            distance (Tensor): Distance from the given point to 4
-                boundaries (left, top, right, bottom).
-            max_shape (tuple): Shape of the image.
-        Returns:
-            Tensor: Decoded bboxes.
-        """
-    x1 = points[:, 0] - distance[:, 0]
-    y1 = points[:, 1] - distance[:, 1]
-    x2 = points[:, 0] + distance[:, 2]
-    y2 = points[:, 1] + distance[:, 3]
-    if max_shape is not None:
-        x1 = x1.clip(min=0, max=max_shape[1])
-        y1 = y1.clip(min=0, max=max_shape[0])
-        x2 = x2.clip(min=0, max=max_shape[1])
-        y2 = y2.clip(min=0, max=max_shape[0])
-    return paddle.stack([x1, y1, x2, y2], -1)

+ 0 - 4
paddlex/ppdet/modeling/heads/__init__.py

@@ -25,8 +25,6 @@ from . import face_head
 from . import s2anet_head
 from . import s2anet_head
 from . import keypoint_hrhrnet_head
 from . import keypoint_hrhrnet_head
 from . import centernet_head
 from . import centernet_head
-from . import gfl_head
-from . import pico_head
 from . import detr_head
 from . import detr_head
 from . import sparsercnn_head
 from . import sparsercnn_head
 
 
@@ -43,7 +41,5 @@ from .face_head import *
 from .s2anet_head import *
 from .s2anet_head import *
 from .keypoint_hrhrnet_head import *
 from .keypoint_hrhrnet_head import *
 from .centernet_head import *
 from .centernet_head import *
-from .gfl_head import *
-from .pico_head import *
 from .detr_head import *
 from .detr_head import *
 from .sparsercnn_head import *
 from .sparsercnn_head import *

+ 2 - 1
paddlex/ppdet/modeling/heads/centernet_head.py

@@ -98,7 +98,8 @@ class CenterNetHead(nn.Layer):
                 stride=1,
                 stride=1,
                 padding=0,
                 padding=0,
                 bias=True))
                 bias=True))
-        self.heatmap[2].conv.bias[:] = -2.19
+        with paddle.no_grad():
+            self.heatmap[2].conv.bias[:] = -2.19
         self.size = nn.Sequential(
         self.size = nn.Sequential(
             ConvLayer(
             ConvLayer(
                 in_channels, head_planes, kernel_size=3, padding=1, bias=True),
                 in_channels, head_planes, kernel_size=3, padding=1, bias=True),

+ 5 - 3
paddlex/ppdet/modeling/heads/detr_head.py

@@ -311,9 +311,11 @@ class DeformableDETRHead(nn.Layer):
         linear_init_(self.score_head)
         linear_init_(self.score_head)
         constant_(self.score_head.bias, -4.595)
         constant_(self.score_head.bias, -4.595)
         constant_(self.bbox_head.layers[-1].weight)
         constant_(self.bbox_head.layers[-1].weight)
-        bias = paddle.zeros_like(self.bbox_head.layers[-1].bias)
-        bias[2:] = -2.0
-        self.bbox_head.layers[-1].bias.set_value(bias)
+
+        with paddle.no_grad():
+            bias = paddle.zeros_like(self.bbox_head.layers[-1].bias)
+            bias[2:] = -2.0
+            self.bbox_head.layers[-1].bias.set_value(bias)
 
 
     @classmethod
     @classmethod
     def from_config(cls, cfg, hidden_dim, nhead, input_shape):
     def from_config(cls, cfg, hidden_dim, nhead, input_shape):

+ 19 - 8
paddlex/ppdet/modeling/heads/fcos_head.py

@@ -151,9 +151,12 @@ class FCOSHead(nn.Layer):
                 kernel_size=3,
                 kernel_size=3,
                 stride=1,
                 stride=1,
                 padding=1,
                 padding=1,
-                weight_attr=ParamAttr(initializer=Normal(
-                    mean=0., std=0.01)),
+                weight_attr=ParamAttr(
+                    name=conv_cls_name + "_weights",
+                    initializer=Normal(
+                        mean=0., std=0.01)),
                 bias_attr=ParamAttr(
                 bias_attr=ParamAttr(
+                    name=conv_cls_name + "_bias",
                     initializer=Constant(value=bias_init_value))))
                     initializer=Constant(value=bias_init_value))))
 
 
         conv_reg_name = "fcos_head_reg"
         conv_reg_name = "fcos_head_reg"
@@ -165,9 +168,13 @@ class FCOSHead(nn.Layer):
                 kernel_size=3,
                 kernel_size=3,
                 stride=1,
                 stride=1,
                 padding=1,
                 padding=1,
-                weight_attr=ParamAttr(initializer=Normal(
-                    mean=0., std=0.01)),
-                bias_attr=ParamAttr(initializer=Constant(value=0))))
+                weight_attr=ParamAttr(
+                    name=conv_reg_name + "_weights",
+                    initializer=Normal(
+                        mean=0., std=0.01)),
+                bias_attr=ParamAttr(
+                    name=conv_reg_name + "_bias",
+                    initializer=Constant(value=0))))
 
 
         conv_centerness_name = "fcos_head_centerness"
         conv_centerness_name = "fcos_head_centerness"
         self.fcos_head_centerness = self.add_sublayer(
         self.fcos_head_centerness = self.add_sublayer(
@@ -178,9 +185,13 @@ class FCOSHead(nn.Layer):
                 kernel_size=3,
                 kernel_size=3,
                 stride=1,
                 stride=1,
                 padding=1,
                 padding=1,
-                weight_attr=ParamAttr(initializer=Normal(
-                    mean=0., std=0.01)),
-                bias_attr=ParamAttr(initializer=Constant(value=0))))
+                weight_attr=ParamAttr(
+                    name=conv_centerness_name + "_weights",
+                    initializer=Normal(
+                        mean=0., std=0.01)),
+                bias_attr=ParamAttr(
+                    name=conv_centerness_name + "_bias",
+                    initializer=Constant(value=0))))
 
 
         self.scales_regs = []
         self.scales_regs = []
         for i in range(len(self.fpn_stride)):
         for i in range(len(self.fpn_stride)):

+ 0 - 476
paddlex/ppdet/modeling/heads/gfl_head.py

@@ -1,476 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import numpy as np
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle import ParamAttr
-from paddle.nn.initializer import Normal, Constant
-
-from paddlex.ppdet.core.workspace import register
-from paddlex.ppdet.modeling.layers import ConvNormLayer
-from paddlex.ppdet.modeling.bbox_utils import distance2bbox, bbox2distance
-from paddlex.ppdet.data.transform.atss_assigner import bbox_overlaps
-
-
-class ScaleReg(nn.Layer):
-    """
-    Parameter for scaling the regression outputs.
-    """
-
-    def __init__(self):
-        super(ScaleReg, self).__init__()
-        self.scale_reg = self.create_parameter(
-            shape=[1],
-            attr=ParamAttr(initializer=Constant(value=1.)),
-            dtype="float32")
-
-    def forward(self, inputs):
-        out = inputs * self.scale_reg
-        return out
-
-
-class Integral(nn.Layer):
-    """A fixed layer for calculating integral result from distribution.
-    This layer calculates the target location by :math: `sum{P(y_i) * y_i}`,
-    P(y_i) denotes the softmax vector that represents the discrete distribution
-    y_i denotes the discrete set, usually {0, 1, 2, ..., reg_max}
-
-    Args:
-        reg_max (int): The maximal value of the discrete set. Default: 16. You
-            may want to reset it according to your new dataset or related
-            settings.
-    """
-
-    def __init__(self, reg_max=16):
-        super(Integral, self).__init__()
-        self.reg_max = reg_max
-        self.register_buffer(
-            'project', paddle.linspace(0, self.reg_max, self.reg_max + 1))
-
-    def forward(self, x):
-        """Forward feature from the regression head to get integral result of
-        bounding box location.
-        Args:
-            x (Tensor): Features of the regression head, shape (N, 4*(n+1)),
-                n is self.reg_max.
-        Returns:
-            x (Tensor): Integral result of box locations, i.e., distance
-                offsets from the box center in four directions, shape (N, 4).
-        """
-        x = F.softmax(x.reshape([-1, self.reg_max + 1]), axis=1)
-        x = F.linear(x, self.project).reshape([-1, 4])
-        return x
-
-
-@register
-class DGQP(nn.Layer):
-    """Distribution-Guided Quality Predictor of GFocal head
-
-    Args:
-        reg_topk (int): top-k statistics of distribution to guide LQE
-        reg_channels (int): hidden layer unit to generate LQE
-        add_mean (bool): Whether to calculate the mean of top-k statistics
-    """
-
-    def __init__(self, reg_topk=4, reg_channels=64, add_mean=True):
-        super(DGQP, self).__init__()
-        self.reg_topk = reg_topk
-        self.reg_channels = reg_channels
-        self.add_mean = add_mean
-        self.total_dim = reg_topk
-        if add_mean:
-            self.total_dim += 1
-        self.reg_conv1 = self.add_sublayer(
-            'dgqp_reg_conv1',
-            nn.Conv2D(
-                in_channels=4 * self.total_dim,
-                out_channels=self.reg_channels,
-                kernel_size=1,
-                weight_attr=ParamAttr(initializer=Normal(
-                    mean=0., std=0.01)),
-                bias_attr=ParamAttr(initializer=Constant(value=0))))
-        self.reg_conv2 = self.add_sublayer(
-            'dgqp_reg_conv2',
-            nn.Conv2D(
-                in_channels=self.reg_channels,
-                out_channels=1,
-                kernel_size=1,
-                weight_attr=ParamAttr(initializer=Normal(
-                    mean=0., std=0.01)),
-                bias_attr=ParamAttr(initializer=Constant(value=0))))
-
-    def forward(self, x):
-        """Forward feature from the regression head to get integral result of
-        bounding box location.
-        Args:
-            x (Tensor): Features of the regression head, shape (N, 4*(n+1)),
-                n is self.reg_max.
-        Returns:
-            x (Tensor): Integral result of box locations, i.e., distance
-                offsets from the box center in four directions, shape (N, 4).
-        """
-        N, _, H, W = x.shape[:]
-        prob = F.softmax(x.reshape([N, 4, -1, H, W]), axis=2)
-        prob_topk, _ = prob.topk(self.reg_topk, axis=2)
-        if self.add_mean:
-            stat = paddle.concat(
-                [prob_topk, prob_topk.mean(
-                    axis=2, keepdim=True)], axis=2)
-        else:
-            stat = prob_topk
-        y = F.relu(self.reg_conv1(stat.reshape([N, -1, H, W])))
-        y = F.sigmoid(self.reg_conv2(y))
-        return y
-
-
-@register
-class GFLHead(nn.Layer):
-    """
-    GFLHead
-    Args:
-        conv_feat (object): Instance of 'FCOSFeat'
-        num_classes (int): Number of classes
-        fpn_stride (list): The stride of each FPN Layer
-        prior_prob (float): Used to set the bias init for the class prediction layer
-        loss_qfl (object):
-        loss_dfl (object):
-        loss_bbox (object):
-        reg_max: Max value of integral set :math: `{0, ..., reg_max}`
-                n QFL setting. Default: 16.
-    """
-    __inject__ = [
-        'conv_feat', 'dgqp_module', 'loss_qfl', 'loss_dfl', 'loss_bbox', 'nms'
-    ]
-    __shared__ = ['num_classes']
-
-    def __init__(self,
-                 conv_feat='FCOSFeat',
-                 dgqp_module=None,
-                 num_classes=80,
-                 fpn_stride=[8, 16, 32, 64, 128],
-                 prior_prob=0.01,
-                 loss_qfl='QualityFocalLoss',
-                 loss_dfl='DistributionFocalLoss',
-                 loss_bbox='GIoULoss',
-                 reg_max=16,
-                 feat_in_chan=256,
-                 nms=None,
-                 nms_pre=1000,
-                 cell_offset=0):
-        super(GFLHead, self).__init__()
-        self.conv_feat = conv_feat
-        self.dgqp_module = dgqp_module
-        self.num_classes = num_classes
-        self.fpn_stride = fpn_stride
-        self.prior_prob = prior_prob
-        self.loss_qfl = loss_qfl
-        self.loss_dfl = loss_dfl
-        self.loss_bbox = loss_bbox
-        self.reg_max = reg_max
-        self.feat_in_chan = feat_in_chan
-        self.nms = nms
-        self.nms_pre = nms_pre
-        self.cell_offset = cell_offset
-        self.use_sigmoid = self.loss_qfl.use_sigmoid
-        if self.use_sigmoid:
-            self.cls_out_channels = self.num_classes
-        else:
-            self.cls_out_channels = self.num_classes + 1
-
-        conv_cls_name = "gfl_head_cls"
-        bias_init_value = -math.log((1 - self.prior_prob) / self.prior_prob)
-        self.gfl_head_cls = self.add_sublayer(
-            conv_cls_name,
-            nn.Conv2D(
-                in_channels=self.feat_in_chan,
-                out_channels=self.cls_out_channels,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                weight_attr=ParamAttr(initializer=Normal(
-                    mean=0., std=0.01)),
-                bias_attr=ParamAttr(
-                    initializer=Constant(value=bias_init_value))))
-
-        conv_reg_name = "gfl_head_reg"
-        self.gfl_head_reg = self.add_sublayer(
-            conv_reg_name,
-            nn.Conv2D(
-                in_channels=self.feat_in_chan,
-                out_channels=4 * (self.reg_max + 1),
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                weight_attr=ParamAttr(initializer=Normal(
-                    mean=0., std=0.01)),
-                bias_attr=ParamAttr(initializer=Constant(value=0))))
-
-        self.scales_regs = []
-        for i in range(len(self.fpn_stride)):
-            lvl = int(math.log(int(self.fpn_stride[i]), 2))
-            feat_name = 'p{}_feat'.format(lvl)
-            scale_reg = self.add_sublayer(feat_name, ScaleReg())
-            self.scales_regs.append(scale_reg)
-
-        self.distribution_project = Integral(self.reg_max)
-
-    def forward(self, fpn_feats):
-        assert len(fpn_feats) == len(
-            self.fpn_stride
-        ), "The size of fpn_feats is not equal to size of fpn_stride"
-        cls_logits_list = []
-        bboxes_reg_list = []
-        for scale_reg, fpn_feat in zip(self.scales_regs, fpn_feats):
-            conv_cls_feat, conv_reg_feat = self.conv_feat(fpn_feat)
-            cls_logits = self.gfl_head_cls(conv_cls_feat)
-            bbox_reg = scale_reg(self.gfl_head_reg(conv_reg_feat))
-            if self.dgqp_module:
-                quality_score = self.dgqp_module(bbox_reg)
-                cls_logits = F.sigmoid(cls_logits) * quality_score
-            cls_logits_list.append(cls_logits)
-            bboxes_reg_list.append(bbox_reg)
-
-        return (cls_logits_list, bboxes_reg_list)
-
-    def _images_to_levels(self, target, num_level_anchors):
-        """
-        Convert targets by image to targets by feature level.
-        """
-        level_targets = []
-        start = 0
-        for n in num_level_anchors:
-            end = start + n
-            level_targets.append(target[:, start:end].squeeze(0))
-            start = end
-        return level_targets
-
-    def _grid_cells_to_center(self, grid_cells):
-        """
-        Get center location of each gird cell
-        Args:
-            grid_cells: grid cells of a feature map
-        Returns:
-            center points
-        """
-        cells_cx = (grid_cells[:, 2] + grid_cells[:, 0]) / 2
-        cells_cy = (grid_cells[:, 3] + grid_cells[:, 1]) / 2
-        return paddle.stack([cells_cx, cells_cy], axis=-1)
-
-    def get_loss(self, gfl_head_outs, gt_meta):
-        cls_logits, bboxes_reg = gfl_head_outs
-        num_level_anchors = [
-            featmap.shape[-2] * featmap.shape[-1] for featmap in cls_logits
-        ]
-        grid_cells_list = self._images_to_levels(gt_meta['grid_cells'],
-                                                 num_level_anchors)
-        labels_list = self._images_to_levels(gt_meta['labels'],
-                                             num_level_anchors)
-        label_weights_list = self._images_to_levels(gt_meta['label_weights'],
-                                                    num_level_anchors)
-        bbox_targets_list = self._images_to_levels(gt_meta['bbox_targets'],
-                                                   num_level_anchors)
-        num_total_pos = sum(gt_meta['pos_num'])
-
-        loss_bbox_list, loss_dfl_list, loss_qfl_list, avg_factor = [], [], [], []
-        for cls_score, bbox_pred, grid_cells, labels, label_weights, bbox_targets, stride in zip(
-                cls_logits, bboxes_reg, grid_cells_list, labels_list,
-                label_weights_list, bbox_targets_list, self.fpn_stride):
-            grid_cells = grid_cells.reshape([-1, 4])
-            cls_score = cls_score.transpose([0, 2, 3, 1]).reshape(
-                [-1, self.cls_out_channels])
-            bbox_pred = bbox_pred.transpose([0, 2, 3, 1]).reshape(
-                [-1, 4 * (self.reg_max + 1)])
-            bbox_targets = bbox_targets.reshape([-1, 4])
-            labels = labels.reshape([-1])
-            label_weights = label_weights.reshape([-1])
-
-            bg_class_ind = self.num_classes
-            pos_inds = paddle.nonzero(
-                paddle.logical_and((labels >= 0), (labels < bg_class_ind)),
-                as_tuple=False).squeeze(1)
-            score = np.zeros(labels.shape)
-            if len(pos_inds) > 0:
-                pos_bbox_targets = paddle.gather(
-                    bbox_targets, pos_inds, axis=0)
-                pos_bbox_pred = paddle.gather(bbox_pred, pos_inds, axis=0)
-                pos_grid_cells = paddle.gather(grid_cells, pos_inds, axis=0)
-                pos_grid_cell_centers = self._grid_cells_to_center(
-                    pos_grid_cells) / stride
-
-                weight_targets = F.sigmoid(cls_score.detach())
-                weight_targets = paddle.gather(
-                    weight_targets.max(axis=1), pos_inds, axis=0)
-                pos_bbox_pred_corners = self.distribution_project(
-                    pos_bbox_pred)
-                pos_decode_bbox_pred = distance2bbox(pos_grid_cell_centers,
-                                                     pos_bbox_pred_corners)
-                pos_decode_bbox_targets = pos_bbox_targets / stride
-                bbox_iou = bbox_overlaps(
-                    pos_decode_bbox_pred.detach().numpy(),
-                    pos_decode_bbox_targets.detach().numpy(),
-                    is_aligned=True)
-                score[pos_inds.numpy()] = bbox_iou
-                pred_corners = pos_bbox_pred.reshape([-1, self.reg_max + 1])
-                target_corners = bbox2distance(pos_grid_cell_centers,
-                                               pos_decode_bbox_targets,
-                                               self.reg_max).reshape([-1])
-                # regression loss
-                loss_bbox = paddle.sum(
-                    self.loss_bbox(pos_decode_bbox_pred,
-                                   pos_decode_bbox_targets) *
-                    weight_targets.mean(axis=-1))
-
-                # dfl loss
-                loss_dfl = self.loss_dfl(
-                    pred_corners,
-                    target_corners,
-                    weight=weight_targets.unsqueeze(-1).expand(
-                        [-1, 4]).reshape([-1]),
-                    avg_factor=4.0)
-            else:
-                loss_bbox = bbox_pred.sum() * 0
-                loss_dfl = bbox_pred.sum() * 0
-                weight_targets = paddle.to_tensor([0])
-
-            # qfl loss
-            score = paddle.to_tensor(score)
-            loss_qfl = self.loss_qfl(
-                cls_score, (labels, score),
-                weight=label_weights,
-                avg_factor=num_total_pos)
-            loss_bbox_list.append(loss_bbox)
-            loss_dfl_list.append(loss_dfl)
-            loss_qfl_list.append(loss_qfl)
-            avg_factor.append(weight_targets.sum())
-
-        avg_factor = sum(avg_factor)
-        if avg_factor <= 0:
-            loss_qfl = paddle.to_tensor(
-                0, dtype='float32', stop_gradient=False)
-            loss_bbox = paddle.to_tensor(
-                0, dtype='float32', stop_gradient=False)
-            loss_dfl = paddle.to_tensor(
-                0, dtype='float32', stop_gradient=False)
-        else:
-            losses_bbox = list(map(lambda x: x / avg_factor, loss_bbox_list))
-            losses_dfl = list(map(lambda x: x / avg_factor, loss_dfl_list))
-            loss_qfl = sum(loss_qfl_list)
-            loss_bbox = sum(losses_bbox)
-            loss_dfl = sum(losses_dfl)
-
-        loss_states = dict(
-            loss_qfl=loss_qfl, loss_bbox=loss_bbox, loss_dfl=loss_dfl)
-
-        return loss_states
-
-    def get_single_level_center_point(self,
-                                      featmap_size,
-                                      stride,
-                                      cell_offset=0):
-        """
-        Generate pixel centers of a single stage feature map.
-        Args:
-            featmap_size: height and width of the feature map
-            stride: down sample stride of the feature map
-        Returns:
-            y and x of the center points
-        """
-        h, w = featmap_size
-        x_range = (paddle.arange(w, dtype='float32') + cell_offset) * stride
-        y_range = (paddle.arange(h, dtype='float32') + cell_offset) * stride
-        y, x = paddle.meshgrid(y_range, x_range)
-        y = y.flatten()
-        x = x.flatten()
-        return y, x
-
-    def get_bboxes_single(self,
-                          cls_scores,
-                          bbox_preds,
-                          img_shape,
-                          scale_factor,
-                          rescale=True,
-                          cell_offset=0):
-        assert len(cls_scores) == len(bbox_preds)
-        mlvl_bboxes = []
-        mlvl_scores = []
-        for stride, cls_score, bbox_pred in zip(self.fpn_stride, cls_scores,
-                                                bbox_preds):
-            featmap_size = cls_score.shape[-2:]
-            y, x = self.get_single_level_center_point(
-                featmap_size, stride, cell_offset=cell_offset)
-            center_points = paddle.stack([x, y], axis=-1)
-            scores = F.sigmoid(
-                cls_score.transpose([1, 2, 0]).reshape(
-                    [-1, self.cls_out_channels]))
-            bbox_pred = bbox_pred.transpose([1, 2, 0])
-            bbox_pred = self.distribution_project(bbox_pred) * stride
-
-            if scores.shape[0] > self.nms_pre:
-                max_scores = scores.max(axis=1)
-                _, topk_inds = max_scores.topk(self.nms_pre)
-                center_points = center_points.gather(topk_inds)
-                bbox_pred = bbox_pred.gather(topk_inds)
-                scores = scores.gather(topk_inds)
-
-            bboxes = distance2bbox(
-                center_points, bbox_pred, max_shape=img_shape)
-            mlvl_bboxes.append(bboxes)
-            mlvl_scores.append(scores)
-        mlvl_bboxes = paddle.concat(mlvl_bboxes)
-        if rescale:
-            # [h_scale, w_scale] to [w_scale, h_scale, w_scale, h_scale]
-            im_scale = paddle.concat([scale_factor[::-1], scale_factor[::-1]])
-            mlvl_bboxes /= im_scale
-        mlvl_scores = paddle.concat(mlvl_scores)
-        if self.use_sigmoid:
-            # add a dummy background class to the backend when use_sigmoid
-            padding = paddle.zeros([mlvl_scores.shape[0], 1])
-            mlvl_scores = paddle.concat([mlvl_scores, padding], axis=1)
-        mlvl_scores = mlvl_scores.transpose([1, 0])
-        return mlvl_bboxes, mlvl_scores
-
-    def decode(self, cls_scores, bbox_preds, im_shape, scale_factor,
-               cell_offset):
-        batch_bboxes = []
-        batch_scores = []
-        for img_id in range(cls_scores[0].shape[0]):
-            num_levels = len(cls_scores)
-            cls_score_list = [cls_scores[i][img_id] for i in range(num_levels)]
-            bbox_pred_list = [bbox_preds[i][img_id] for i in range(num_levels)]
-            bboxes, scores = self.get_bboxes_single(
-                cls_score_list,
-                bbox_pred_list,
-                im_shape[img_id],
-                scale_factor[img_id],
-                cell_offset=cell_offset)
-            batch_bboxes.append(bboxes)
-            batch_scores.append(scores)
-        batch_bboxes = paddle.stack(batch_bboxes, axis=0)
-        batch_scores = paddle.stack(batch_scores, axis=0)
-
-        return batch_bboxes, batch_scores
-
-    def post_process(self, gfl_head_outs, im_shape, scale_factor):
-        cls_scores, bboxes_reg = gfl_head_outs
-        bboxes, score = self.decode(cls_scores, bboxes_reg, im_shape,
-                                    scale_factor, self.cell_offset)
-        bbox_pred, bbox_num, _ = self.nms(bboxes, score)
-        return bbox_pred, bbox_num

+ 0 - 329
paddlex/ppdet/modeling/heads/pico_head.py

@@ -1,329 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import numpy as np
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle import ParamAttr
-from paddle.nn.initializer import Normal, Constant
-
-from paddlex.ppdet.core.workspace import register
-from paddlex.ppdet.modeling.layers import ConvNormLayer
-from paddlex.ppdet.modeling.bbox_utils import distance2bbox, bbox2distance
-from paddlex.ppdet.data.transform.atss_assigner import bbox_overlaps
-from .gfl_head import GFLHead
-
-
-@register
-class PicoFeat(nn.Layer):
-    """
-    PicoFeat of PicoDet
-
-    Args:
-        feat_in (int): The channel number of input Tensor.
-        feat_out (int): The channel number of output Tensor.
-        num_convs (int): The convolution number of the LiteGFLFeat.
-        norm_type (str): Normalization type, 'bn'/'sync_bn'/'gn'.
-    """
-
-    def __init__(self,
-                 feat_in=256,
-                 feat_out=96,
-                 num_fpn_stride=3,
-                 num_convs=2,
-                 norm_type='bn',
-                 share_cls_reg=False):
-        super(PicoFeat, self).__init__()
-        self.num_convs = num_convs
-        self.norm_type = norm_type
-        self.share_cls_reg = share_cls_reg
-        self.cls_convs = []
-        self.reg_convs = []
-        for stage_idx in range(num_fpn_stride):
-            cls_subnet_convs = []
-            reg_subnet_convs = []
-            for i in range(self.num_convs):
-                in_c = feat_in if i == 0 else feat_out
-                cls_conv_dw = self.add_sublayer(
-                    'cls_conv_dw{}.{}'.format(stage_idx, i),
-                    ConvNormLayer(
-                        ch_in=in_c,
-                        ch_out=feat_out,
-                        filter_size=3,
-                        stride=1,
-                        groups=feat_out,
-                        norm_type=norm_type,
-                        bias_on=False,
-                        lr_scale=2.))
-                cls_subnet_convs.append(cls_conv_dw)
-                cls_conv_pw = self.add_sublayer(
-                    'cls_conv_pw{}.{}'.format(stage_idx, i),
-                    ConvNormLayer(
-                        ch_in=in_c,
-                        ch_out=feat_out,
-                        filter_size=1,
-                        stride=1,
-                        norm_type=norm_type,
-                        bias_on=False,
-                        lr_scale=2.))
-                cls_subnet_convs.append(cls_conv_pw)
-
-                if not self.share_cls_reg:
-                    reg_conv_dw = self.add_sublayer(
-                        'reg_conv_dw{}.{}'.format(stage_idx, i),
-                        ConvNormLayer(
-                            ch_in=in_c,
-                            ch_out=feat_out,
-                            filter_size=3,
-                            stride=1,
-                            groups=feat_out,
-                            norm_type=norm_type,
-                            bias_on=False,
-                            lr_scale=2.))
-                    reg_subnet_convs.append(reg_conv_dw)
-                    reg_conv_pw = self.add_sublayer(
-                        'reg_conv_pw{}.{}'.format(stage_idx, i),
-                        ConvNormLayer(
-                            ch_in=in_c,
-                            ch_out=feat_out,
-                            filter_size=1,
-                            stride=1,
-                            norm_type=norm_type,
-                            bias_on=False,
-                            lr_scale=2.))
-                    reg_subnet_convs.append(reg_conv_pw)
-            self.cls_convs.append(cls_subnet_convs)
-            self.reg_convs.append(reg_subnet_convs)
-
-    def forward(self, fpn_feat, stage_idx):
-        assert stage_idx < len(self.cls_convs)
-        cls_feat = fpn_feat
-        reg_feat = fpn_feat
-        for i in range(len(self.cls_convs[stage_idx])):
-            cls_feat = F.leaky_relu(self.cls_convs[stage_idx][i](cls_feat),
-                                    0.1)
-            if not self.share_cls_reg:
-                reg_feat = F.leaky_relu(self.reg_convs[stage_idx][i](reg_feat),
-                                        0.1)
-        return cls_feat, reg_feat
-
-
-@register
-class PicoHead(GFLHead):
-    """
-    PicoHead
-    Args:
-        conv_feat (object): Instance of 'LiteGFLFeat'
-        num_classes (int): Number of classes
-        fpn_stride (list): The stride of each FPN Layer
-        prior_prob (float): Used to set the bias init for the class prediction layer
-        loss_qfl (object):
-        loss_dfl (object):
-        loss_bbox (object):
-        reg_max: Max value of integral set :math: `{0, ..., reg_max}`
-                n QFL setting. Default: 16.
-    """
-    __inject__ = [
-        'conv_feat', 'dgqp_module', 'loss_qfl', 'loss_dfl', 'loss_bbox', 'nms'
-    ]
-    __shared__ = ['num_classes']
-
-    def __init__(self,
-                 conv_feat='PicoFeat',
-                 dgqp_module=None,
-                 num_classes=80,
-                 fpn_stride=[8, 16, 32],
-                 prior_prob=0.01,
-                 loss_qfl='QualityFocalLoss',
-                 loss_dfl='DistributionFocalLoss',
-                 loss_bbox='GIoULoss',
-                 reg_max=16,
-                 feat_in_chan=96,
-                 nms=None,
-                 nms_pre=1000,
-                 cell_offset=0):
-        super(PicoHead, self).__init__(
-            conv_feat=conv_feat,
-            dgqp_module=dgqp_module,
-            num_classes=num_classes,
-            fpn_stride=fpn_stride,
-            prior_prob=prior_prob,
-            loss_qfl=loss_qfl,
-            loss_dfl=loss_dfl,
-            loss_bbox=loss_bbox,
-            reg_max=reg_max,
-            feat_in_chan=feat_in_chan,
-            nms=nms,
-            nms_pre=nms_pre,
-            cell_offset=cell_offset)
-        self.conv_feat = conv_feat
-        self.num_classes = num_classes
-        self.fpn_stride = fpn_stride
-        self.prior_prob = prior_prob
-        self.loss_qfl = loss_qfl
-        self.loss_dfl = loss_dfl
-        self.loss_bbox = loss_bbox
-        self.reg_max = reg_max
-        self.feat_in_chan = feat_in_chan
-        self.nms = nms
-        self.nms_pre = nms_pre
-        self.cell_offset = cell_offset
-        self.use_sigmoid = self.loss_qfl.use_sigmoid
-        if self.use_sigmoid:
-            self.cls_out_channels = self.num_classes
-        else:
-            self.cls_out_channels = self.num_classes + 1
-        bias_init_value = -math.log((1 - self.prior_prob) / self.prior_prob)
-        # Clear the super class initialization
-        self.gfl_head_cls = None
-        self.gfl_head_reg = None
-        self.scales_regs = None
-
-        self.head_cls_list = []
-        self.head_reg_list = []
-        for i in range(len(fpn_stride)):
-            head_cls = self.add_sublayer(
-                "head_cls" + str(i),
-                nn.Conv2D(
-                    in_channels=self.feat_in_chan,
-                    out_channels=self.cls_out_channels + 4 * (self.reg_max + 1)
-                    if self.conv_feat.share_cls_reg else self.cls_out_channels,
-                    kernel_size=1,
-                    stride=1,
-                    padding=0,
-                    weight_attr=ParamAttr(initializer=Normal(
-                        mean=0., std=0.01)),
-                    bias_attr=ParamAttr(
-                        initializer=Constant(value=bias_init_value))))
-            self.head_cls_list.append(head_cls)
-            if not self.conv_feat.share_cls_reg:
-                head_reg = self.add_sublayer(
-                    "head_reg" + str(i),
-                    nn.Conv2D(
-                        in_channels=self.feat_in_chan,
-                        out_channels=4 * (self.reg_max + 1),
-                        kernel_size=1,
-                        stride=1,
-                        padding=0,
-                        weight_attr=ParamAttr(initializer=Normal(
-                            mean=0., std=0.01)),
-                        bias_attr=ParamAttr(initializer=Constant(value=0))))
-                self.head_reg_list.append(head_reg)
-
-    def forward(self, fpn_feats):
-        assert len(fpn_feats) == len(
-            self.fpn_stride
-        ), "The size of fpn_feats is not equal to size of fpn_stride"
-        cls_logits_list = []
-        bboxes_reg_list = []
-        for i, fpn_feat in enumerate(fpn_feats):
-            conv_cls_feat, conv_reg_feat = self.conv_feat(fpn_feat, i)
-            if self.conv_feat.share_cls_reg:
-                cls_logits = self.head_cls_list[i](conv_cls_feat)
-                cls_score, bbox_pred = paddle.split(
-                    cls_logits,
-                    [self.cls_out_channels, 4 * (self.reg_max + 1)],
-                    axis=1)
-            else:
-                cls_score = self.head_cls_list[i](conv_cls_feat)
-                bbox_pred = self.head_reg_list[i](conv_reg_feat)
-            if self.dgqp_module:
-                quality_score = self.dgqp_module(bbox_pred)
-                cls_score = F.sigmoid(cls_score) * quality_score
-
-            if not self.training:
-                cls_score = F.sigmoid(cls_score.transpose([0, 2, 3, 1]))
-                bbox_pred = self.distribution_project(
-                    bbox_pred.transpose([0, 2, 3, 1])) * self.fpn_stride[i]
-
-            cls_logits_list.append(cls_score)
-            bboxes_reg_list.append(bbox_pred)
-
-        return (cls_logits_list, bboxes_reg_list)
-
-    def get_bboxes_single(self,
-                          cls_scores,
-                          bbox_preds,
-                          img_shape,
-                          scale_factor,
-                          rescale=True,
-                          cell_offset=0):
-        assert len(cls_scores) == len(bbox_preds)
-        mlvl_bboxes = []
-        mlvl_scores = []
-        for stride, cls_score, bbox_pred in zip(self.fpn_stride, cls_scores,
-                                                bbox_preds):
-            featmap_size = cls_score.shape[0:2]
-            y, x = self.get_single_level_center_point(
-                featmap_size, stride, cell_offset=cell_offset)
-            center_points = paddle.stack([x, y], axis=-1)
-            scores = cls_score.reshape([-1, self.cls_out_channels])
-
-            if scores.shape[0] > self.nms_pre:
-                max_scores = scores.max(axis=1)
-                _, topk_inds = max_scores.topk(self.nms_pre)
-                center_points = center_points.gather(topk_inds)
-                bbox_pred = bbox_pred.gather(topk_inds)
-                scores = scores.gather(topk_inds)
-
-            bboxes = distance2bbox(
-                center_points, bbox_pred, max_shape=img_shape)
-            mlvl_bboxes.append(bboxes)
-            mlvl_scores.append(scores)
-        mlvl_bboxes = paddle.concat(mlvl_bboxes)
-        if rescale:
-            # [h_scale, w_scale] to [w_scale, h_scale, w_scale, h_scale]
-            im_scale = paddle.concat([scale_factor[::-1], scale_factor[::-1]])
-            mlvl_bboxes /= im_scale
-        mlvl_scores = paddle.concat(mlvl_scores)
-        mlvl_scores = mlvl_scores.transpose([1, 0])
-        return mlvl_bboxes, mlvl_scores
-
-    def decode(self, cls_scores, bbox_preds, im_shape, scale_factor,
-               cell_offset):
-        batch_bboxes = []
-        batch_scores = []
-        batch_size = cls_scores[0].shape[0]
-        for img_id in range(batch_size):
-            num_levels = len(cls_scores)
-            cls_score_list = [cls_scores[i][img_id] for i in range(num_levels)]
-            bbox_pred_list = [
-                bbox_preds[i].reshape([batch_size, -1, 4])[img_id]
-                for i in range(num_levels)
-            ]
-            bboxes, scores = self.get_bboxes_single(
-                cls_score_list,
-                bbox_pred_list,
-                im_shape[img_id],
-                scale_factor[img_id],
-                cell_offset=cell_offset)
-            batch_bboxes.append(bboxes)
-            batch_scores.append(scores)
-        batch_bboxes = paddle.stack(batch_bboxes, axis=0)
-        batch_scores = paddle.stack(batch_scores, axis=0)
-
-        return batch_bboxes, batch_scores
-
-    def post_process(self, gfl_head_outs, im_shape, scale_factor):
-        cls_scores, bboxes_reg = gfl_head_outs
-        bboxes, score = self.decode(cls_scores, bboxes_reg, im_shape,
-                                    scale_factor, self.cell_offset)
-        bbox_pred, bbox_num, _ = self.nms(bboxes, score)
-        return bbox_pred, bbox_num

+ 0 - 2
paddlex/ppdet/modeling/losses/__init__.py

@@ -22,7 +22,6 @@ from . import ctfocal_loss
 from . import keypoint_loss
 from . import keypoint_loss
 from . import jde_loss
 from . import jde_loss
 from . import fairmot_loss
 from . import fairmot_loss
-from . import gfocal_loss
 from . import detr_loss
 from . import detr_loss
 from . import sparsercnn_loss
 from . import sparsercnn_loss
 
 
@@ -36,6 +35,5 @@ from .ctfocal_loss import *
 from .keypoint_loss import *
 from .keypoint_loss import *
 from .jde_loss import *
 from .jde_loss import *
 from .fairmot_loss import *
 from .fairmot_loss import *
-from .gfocal_loss import *
 from .detr_loss import *
 from .detr_loss import *
 from .sparsercnn_loss import *
 from .sparsercnn_loss import *

+ 0 - 214
paddlex/ppdet/modeling/losses/gfocal_loss.py

@@ -1,214 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import numpy as np
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddlex.ppdet.core.workspace import register, serializable
-from paddlex.ppdet.modeling import ops
-
-__all__ = ['QualityFocalLoss', 'DistributionFocalLoss']
-
-
-def quality_focal_loss(pred, target, beta=2.0, use_sigmoid=True):
-    """
-    Quality Focal Loss (QFL) is from `Generalized Focal Loss: Learning
-    Qualified and Distributed Bounding Boxes for Dense Object Detection
-    <https://arxiv.org/abs/2006.04388>`_.
-    Args:
-        pred (Tensor): Predicted joint representation of classification
-            and quality (IoU) estimation with shape (N, C), C is the number of
-            classes.
-        target (tuple([Tensor])): Target category label with shape (N,)
-            and target quality label with shape (N,).
-        beta (float): The beta parameter for calculating the modulating factor.
-            Defaults to 2.0.
-    Returns:
-        Tensor: Loss tensor with shape (N,).
-    """
-    assert len(target) == 2, """target for QFL must be a tuple of two elements,
-        including category label and quality label, respectively"""
-    # label denotes the category id, score denotes the quality score
-    label, score = target
-    if use_sigmoid:
-        func = F.binary_cross_entropy_with_logits
-    else:
-        func = F.binary_cross_entropy
-
-    # negatives are supervised by 0 quality score
-    pred_sigmoid = F.sigmoid(pred) if use_sigmoid else pred
-    scale_factor = pred_sigmoid
-    zerolabel = paddle.zeros(pred.shape, dtype='float32')
-    loss = func(pred, zerolabel, reduction='none') * scale_factor.pow(beta)
-
-    # FG cat_id: [0, num_classes -1], BG cat_id: num_classes
-    bg_class_ind = pred.shape[1]
-    pos = paddle.logical_and((label >= 0),
-                             (label < bg_class_ind)).nonzero().squeeze(1)
-    if pos.shape[0] == 0:
-        return loss.sum(axis=1)
-    pos_label = paddle.gather(label, pos, axis=0)
-    pos_mask = np.zeros(pred.shape, dtype=np.int32)
-    pos_mask[pos.numpy(), pos_label.numpy()] = 1
-    pos_mask = paddle.to_tensor(pos_mask, dtype='bool')
-    score = score.unsqueeze(-1).expand([-1, pred.shape[1]]).cast('float32')
-    # positives are supervised by bbox quality (IoU) score
-    scale_factor_new = score - pred_sigmoid
-
-    loss_pos = func(
-        pred, score, reduction='none') * scale_factor_new.abs().pow(beta)
-    loss = loss * paddle.logical_not(pos_mask) + loss_pos * pos_mask
-    loss = loss.sum(axis=1)
-    return loss
-
-
-def distribution_focal_loss(pred, label):
-    """Distribution Focal Loss (DFL) is from `Generalized Focal Loss: Learning
-    Qualified and Distributed Bounding Boxes for Dense Object Detection
-    <https://arxiv.org/abs/2006.04388>`_.
-    Args:
-        pred (Tensor): Predicted general distribution of bounding boxes
-            (before softmax) with shape (N, n+1), n is the max value of the
-            integral set `{0, ..., n}` in paper.
-        label (Tensor): Target distance label for bounding boxes with
-            shape (N,).
-    Returns:
-        Tensor: Loss tensor with shape (N,).
-    """
-    dis_left = label.cast('int64')
-    dis_right = dis_left + 1
-    weight_left = dis_right.cast('float32') - label
-    weight_right = label - dis_left.cast('float32')
-    loss = F.cross_entropy(pred, dis_left, reduction='none') * weight_left \
-        + F.cross_entropy(pred, dis_right, reduction='none') * weight_right
-    return loss
-
-
-@register
-@serializable
-class QualityFocalLoss(nn.Layer):
-    r"""Quality Focal Loss (QFL) is a variant of `Generalized Focal Loss:
-    Learning Qualified and Distributed Bounding Boxes for Dense Object
-    Detection <https://arxiv.org/abs/2006.04388>`_.
-    Args:
-        use_sigmoid (bool): Whether sigmoid operation is conducted in QFL.
-            Defaults to True.
-        beta (float): The beta parameter for calculating the modulating factor.
-            Defaults to 2.0.
-        reduction (str): Options are "none", "mean" and "sum".
-        loss_weight (float): Loss weight of current loss.
-    """
-
-    def __init__(self,
-                 use_sigmoid=True,
-                 beta=2.0,
-                 reduction='mean',
-                 loss_weight=1.0):
-        super(QualityFocalLoss, self).__init__()
-        self.use_sigmoid = use_sigmoid
-        self.beta = beta
-        assert reduction in ('none', 'mean', 'sum')
-        self.reduction = reduction
-        self.loss_weight = loss_weight
-
-    def forward(self, pred, target, weight=None, avg_factor=None):
-        """Forward function.
-        Args:
-            pred (Tensor): Predicted joint representation of
-                classification and quality (IoU) estimation with shape (N, C),
-                C is the number of classes.
-            target (tuple([Tensor])): Target category label with shape
-                (N,) and target quality label with shape (N,).
-            weight (Tensor, optional): The weight of loss for each
-                prediction. Defaults to None.
-            avg_factor (int, optional): Average factor that is used to average
-                the loss. Defaults to None.
-        """
-
-        loss = self.loss_weight * quality_focal_loss(
-            pred, target, beta=self.beta, use_sigmoid=self.use_sigmoid)
-
-        if weight is not None:
-            loss = loss * weight
-        if avg_factor is None:
-            if self.reduction == 'none':
-                return loss
-            elif self.reduction == 'mean':
-                return loss.mean()
-            elif self.reduction == 'sum':
-                return loss.sum()
-        else:
-            # if reduction is mean, then average the loss by avg_factor
-            if self.reduction == 'mean':
-                loss = loss.sum() / avg_factor
-            # if reduction is 'none', then do nothing, otherwise raise an error
-            elif self.reduction != 'none':
-                raise ValueError(
-                    'avg_factor can not be used with reduction="sum"')
-        return loss
-
-
-@register
-@serializable
-class DistributionFocalLoss(nn.Layer):
-    """Distribution Focal Loss (DFL) is a variant of `Generalized Focal Loss:
-    Learning Qualified and Distributed Bounding Boxes for Dense Object
-    Detection <https://arxiv.org/abs/2006.04388>`_.
-    Args:
-        reduction (str): Options are `'none'`, `'mean'` and `'sum'`.
-        loss_weight (float): Loss weight of current loss.
-    """
-
-    def __init__(self, reduction='mean', loss_weight=1.0):
-        super(DistributionFocalLoss, self).__init__()
-        assert reduction in ('none', 'mean', 'sum')
-        self.reduction = reduction
-        self.loss_weight = loss_weight
-
-    def forward(self, pred, target, weight=None, avg_factor=None):
-        """Forward function.
-        Args:
-            pred (Tensor): Predicted general distribution of bounding
-                boxes (before softmax) with shape (N, n+1), n is the max value
-                of the integral set `{0, ..., n}` in paper.
-            target (Tensor): Target distance label for bounding boxes
-                with shape (N,).
-            weight (Tensor, optional): The weight of loss for each
-                prediction. Defaults to None.
-            avg_factor (int, optional): Average factor that is used to average
-                the loss. Defaults to None.
-        """
-        loss = self.loss_weight * distribution_focal_loss(pred, target)
-        if weight is not None:
-            loss = loss * weight
-        if avg_factor is None:
-            if self.reduction == 'none':
-                return loss
-            elif self.reduction == 'mean':
-                return loss.mean()
-            elif self.reduction == 'sum':
-                return loss.sum()
-        else:
-            # if reduction is mean, then average the loss by avg_factor
-            if self.reduction == 'mean':
-                loss = loss.sum() / avg_factor
-            # if reduction is 'none', then do nothing, otherwise raise an error
-            elif self.reduction != 'none':
-                raise ValueError(
-                    'avg_factor can not be used with reduction="sum"')
-        return loss

+ 4 - 5
paddlex/ppdet/modeling/losses/keypoint_loss.py

@@ -29,7 +29,7 @@ __all__ = ['HrHRNetLoss', 'KeyPointMSELoss']
 @register
 @register
 @serializable
 @serializable
 class KeyPointMSELoss(nn.Layer):
 class KeyPointMSELoss(nn.Layer):
-    def __init__(self, use_target_weight=True, loss_scale=0.5):
+    def __init__(self, use_target_weight=True):
         """
         """
         KeyPointMSELoss layer
         KeyPointMSELoss layer
 
 
@@ -39,7 +39,6 @@ class KeyPointMSELoss(nn.Layer):
         super(KeyPointMSELoss, self).__init__()
         super(KeyPointMSELoss, self).__init__()
         self.criterion = nn.MSELoss(reduction='mean')
         self.criterion = nn.MSELoss(reduction='mean')
         self.use_target_weight = use_target_weight
         self.use_target_weight = use_target_weight
-        self.loss_scale = loss_scale
 
 
     def forward(self, output, records):
     def forward(self, output, records):
         target = records['target']
         target = records['target']
@@ -51,16 +50,16 @@ class KeyPointMSELoss(nn.Layer):
         heatmaps_gt = target.reshape(
         heatmaps_gt = target.reshape(
             (batch_size, num_joints, -1)).split(num_joints, 1)
             (batch_size, num_joints, -1)).split(num_joints, 1)
         loss = 0
         loss = 0
+
         for idx in range(num_joints):
         for idx in range(num_joints):
             heatmap_pred = heatmaps_pred[idx].squeeze()
             heatmap_pred = heatmaps_pred[idx].squeeze()
             heatmap_gt = heatmaps_gt[idx].squeeze()
             heatmap_gt = heatmaps_gt[idx].squeeze()
             if self.use_target_weight:
             if self.use_target_weight:
-                loss += self.loss_scale * self.criterion(
+                loss += 0.5 * self.criterion(
                     heatmap_pred.multiply(target_weight[:, idx]),
                     heatmap_pred.multiply(target_weight[:, idx]),
                     heatmap_gt.multiply(target_weight[:, idx]))
                     heatmap_gt.multiply(target_weight[:, idx]))
             else:
             else:
-                loss += self.loss_scale * self.criterion(heatmap_pred,
-                                                         heatmap_gt)
+                loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt)
         keypoint_losses = dict()
         keypoint_losses = dict()
         keypoint_losses['loss'] = loss / num_joints
         keypoint_losses['loss'] = loss / num_joints
         return keypoint_losses
         return keypoint_losses

+ 0 - 2
paddlex/ppdet/modeling/necks/__init__.py

@@ -17,7 +17,6 @@ from . import yolo_fpn
 from . import hrfpn
 from . import hrfpn
 from . import ttf_fpn
 from . import ttf_fpn
 from . import centernet_fpn
 from . import centernet_fpn
-from . import pan
 
 
 from .fpn import *
 from .fpn import *
 from .yolo_fpn import *
 from .yolo_fpn import *
@@ -25,4 +24,3 @@ from .hrfpn import *
 from .ttf_fpn import *
 from .ttf_fpn import *
 from .centernet_fpn import *
 from .centernet_fpn import *
 from .blazeface_fpn import *
 from .blazeface_fpn import *
-from .pan import *

+ 14 - 3
paddlex/ppdet/modeling/necks/blazeface_fpn.py

@@ -51,14 +51,25 @@ class ConvBNLayer(nn.Layer):
             padding=padding,
             padding=padding,
             groups=num_groups,
             groups=num_groups,
             weight_attr=ParamAttr(
             weight_attr=ParamAttr(
-                learning_rate=conv_lr, initializer=KaimingNormal()),
+                learning_rate=conv_lr,
+                initializer=KaimingNormal(),
+                name=name + "_weights"),
             bias_attr=False)
             bias_attr=False)
 
 
+        param_attr = ParamAttr(name=name + "_bn_scale")
+        bias_attr = ParamAttr(name=name + "_bn_offset")
         if norm_type == 'sync_bn':
         if norm_type == 'sync_bn':
-            self._batch_norm = nn.SyncBatchNorm(out_channels)
+            self._batch_norm = nn.SyncBatchNorm(
+                out_channels, weight_attr=param_attr, bias_attr=bias_attr)
         else:
         else:
             self._batch_norm = nn.BatchNorm(
             self._batch_norm = nn.BatchNorm(
-                out_channels, act=None, use_global_stats=False)
+                out_channels,
+                act=None,
+                param_attr=param_attr,
+                bias_attr=bias_attr,
+                use_global_stats=False,
+                moving_mean_name=name + '_bn_mean',
+                moving_variance_name=name + '_bn_variance')
 
 
     def forward(self, x):
     def forward(self, x):
         x = self._conv(x)
         x = self._conv(x)

+ 4 - 0
paddlex/ppdet/modeling/necks/hrfpn.py

@@ -14,6 +14,7 @@
 
 
 import paddle
 import paddle
 import paddle.nn.functional as F
 import paddle.nn.functional as F
+from paddle import ParamAttr
 import paddle.nn as nn
 import paddle.nn as nn
 from paddlex.ppdet.core.workspace import register
 from paddlex.ppdet.core.workspace import register
 from ..shape_spec import ShapeSpec
 from ..shape_spec import ShapeSpec
@@ -52,6 +53,7 @@ class HRFPN(nn.Layer):
             in_channels=in_channel,
             in_channels=in_channel,
             out_channels=out_channel,
             out_channels=out_channel,
             kernel_size=1,
             kernel_size=1,
+            weight_attr=ParamAttr(name='hrfpn_reduction_weights'),
             bias_attr=False)
             bias_attr=False)
 
 
         if share_conv:
         if share_conv:
@@ -60,6 +62,7 @@ class HRFPN(nn.Layer):
                 out_channels=out_channel,
                 out_channels=out_channel,
                 kernel_size=3,
                 kernel_size=3,
                 padding=1,
                 padding=1,
+                weight_attr=ParamAttr(name='fpn_conv_weights'),
                 bias_attr=False)
                 bias_attr=False)
         else:
         else:
             self.fpn_conv = []
             self.fpn_conv = []
@@ -72,6 +75,7 @@ class HRFPN(nn.Layer):
                         out_channels=out_channel,
                         out_channels=out_channel,
                         kernel_size=3,
                         kernel_size=3,
                         padding=1,
                         padding=1,
+                        weight_attr=ParamAttr(name=conv_name + "_weights"),
                         bias_attr=False))
                         bias_attr=False))
                 self.fpn_conv.append(conv)
                 self.fpn_conv.append(conv)
 
 

+ 0 - 135
paddlex/ppdet/modeling/necks/pan.py

@@ -1,135 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import paddle
-import paddle.nn as nn
-import paddle.nn.functional as F
-from paddle import ParamAttr
-from paddle.nn.initializer import XavierUniform
-from paddle.regularizer import L2Decay
-from paddlex.ppdet.core.workspace import register, serializable
-from paddlex.ppdet.modeling.layers import ConvNormLayer
-from ..shape_spec import ShapeSpec
-
-__all__ = ['PAN']
-
-
-@register
-@serializable
-class PAN(nn.Layer):
-    """
-    Path Aggregation Network, see https://arxiv.org/abs/1803.01534
-
-    Args:
-        in_channels (list[int]): input channels of each level which can be
-            derived from the output shape of backbone by from_config
-        out_channel (list[int]): output channel of each level
-        spatial_scales (list[float]): the spatial scales between input feature
-            maps and original input image which can be derived from the output
-            shape of backbone by from_config
-        has_extra_convs (bool): whether to add extra conv to the last level.
-            default False
-        extra_stage (int): the number of extra stages added to the last level.
-            default 1
-        use_c5 (bool): Whether to use c5 as the input of extra stage,
-            otherwise p5 is used. default True
-        norm_type (string|None): The normalization type in FPN module. If
-            norm_type is None, norm will not be used after conv and if
-            norm_type is string, bn, gn, sync_bn are available. default None
-        norm_decay (float): weight decay for normalization layer weights.
-            default 0.
-        freeze_norm (bool): whether to freeze normalization layer.
-            default False
-        relu_before_extra_convs (bool): whether to add relu before extra convs.
-            default False
-    """
-
-    def __init__(self,
-                 in_channels,
-                 out_channel,
-                 spatial_scales=[0.125, 0.0625, 0.03125],
-                 start_level=0,
-                 end_level=-1,
-                 norm_type=None):
-        super(PAN, self).__init__()
-        self.out_channel = out_channel
-        self.num_ins = len(in_channels)
-        self.spatial_scales = spatial_scales
-        if end_level == -1:
-            self.end_level = self.num_ins
-        else:
-            # if end_level < inputs, no extra level is allowed
-            self.end_level = end_level
-            assert end_level <= len(in_channels)
-        self.start_level = start_level
-        self.norm_type = norm_type
-        self.lateral_convs = []
-
-        for i in range(self.start_level, self.end_level):
-            in_c = in_channels[i - self.start_level]
-            if self.norm_type is not None:
-                lateral = self.add_sublayer(
-                    'pan_lateral' + str(i),
-                    ConvNormLayer(
-                        ch_in=in_c,
-                        ch_out=self.out_channel,
-                        filter_size=1,
-                        stride=1,
-                        norm_type=self.norm_type,
-                        norm_decay=self.norm_decay,
-                        freeze_norm=self.freeze_norm,
-                        initializer=XavierUniform(fan_out=in_c)))
-            else:
-                lateral = self.add_sublayer(
-                    'pan_lateral' + str(i),
-                    nn.Conv2D(
-                        in_channels=in_c,
-                        out_channels=self.out_channel,
-                        kernel_size=1,
-                        weight_attr=ParamAttr(
-                            initializer=XavierUniform(fan_out=in_c))))
-            self.lateral_convs.append(lateral)
-
-    @classmethod
-    def from_config(cls, cfg, input_shape):
-        return {'in_channels': [i.channels for i in input_shape], }
-
-    def forward(self, body_feats):
-        laterals = []
-        for i, lateral_conv in enumerate(self.lateral_convs):
-            laterals.append(lateral_conv(body_feats[i + self.start_level]))
-        num_levels = len(laterals)
-        for i in range(1, num_levels):
-            lvl = num_levels - i
-            upsample = F.interpolate(
-                laterals[lvl],
-                scale_factor=2.,
-                mode='bilinear', )
-            laterals[lvl - 1] += upsample
-
-        outs = [laterals[i] for i in range(num_levels)]
-        for i in range(0, num_levels - 1):
-            outs[i + 1] += F.interpolate(
-                outs[i], scale_factor=0.5, mode='bilinear')
-
-        return outs
-
-    @property
-    def out_shape(self):
-        return [
-            ShapeSpec(
-                channels=self.out_channel, stride=1. / s)
-            for s in self.spatial_scales
-        ]

+ 0 - 12
paddlex/ppdet/modeling/ops.py

@@ -1592,15 +1592,3 @@ def smooth_l1(input,
     out = paddle.reshape(out, shape=[out.shape[0], -1])
     out = paddle.reshape(out, shape=[out.shape[0], -1])
     out = paddle.sum(out, axis=1)
     out = paddle.sum(out, axis=1)
     return out
     return out
-
-
-def channel_shuffle(x, groups):
-    batch_size, num_channels, height, width = x.shape[0:4]
-    assert (num_channels % groups == 0,
-            'num_channels should be divisible by groups')
-    channels_per_group = num_channels // groups
-    x = paddle.reshape(
-        x=x, shape=[batch_size, groups, channels_per_group, height, width])
-    x = paddle.transpose(x=x, perm=[0, 2, 1, 3, 4])
-    x = paddle.reshape(x=x, shape=[batch_size, num_channels, height, width])
-    return x

+ 3 - 1
paddlex/ppdet/modeling/reid/jde_embedding_head.py

@@ -92,7 +92,9 @@ class JDEEmbeddingHead(nn.Layer):
                     kernel_size=3,
                     kernel_size=3,
                     stride=1,
                     stride=1,
                     padding=1,
                     padding=1,
-                    bias_attr=ParamAttr(regularizer=L2Decay(0.))))
+                    weight_attr=ParamAttr(name=name + '.conv.weights'),
+                    bias_attr=ParamAttr(
+                        name=name + '.conv.bias', regularizer=L2Decay(0.))))
             self.identify_outputs.append(identify_output)
             self.identify_outputs.append(identify_output)
 
 
             loss_p_cls = self.add_sublayer('cls.{}'.format(i),
             loss_p_cls = self.add_sublayer('cls.{}'.format(i),

+ 7 - 3
paddlex/ppdet/modeling/reid/pyramidal_embedding.py

@@ -89,12 +89,16 @@ class PCBPyramid(nn.Layer):
             if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
             if idx_branches >= sum(self.num_in_each_level[0:idx_levels + 1]):
                 idx_levels += 1
                 idx_levels += 1
 
 
+            name = "Linear_branch_id_{}".format(idx_branches)
             fc = nn.Linear(
             fc = nn.Linear(
                 in_features=num_conv_out_channels,
                 in_features=num_conv_out_channels,
                 out_features=self.num_classes,
                 out_features=self.num_classes,
-                weight_attr=ParamAttr(initializer=Normal(
-                    mean=0., std=0.001)),
-                bias_attr=ParamAttr(initializer=Constant(value=0.)))
+                weight_attr=ParamAttr(
+                    name=name + "_weights",
+                    initializer=Normal(
+                        mean=0., std=0.001)),
+                bias_attr=ParamAttr(
+                    name=name + "_bias", initializer=Constant(value=0.)))
             pyramid_fc_list.append(fc)
             pyramid_fc_list.append(fc)
         return pyramid_conv_list, pyramid_fc_list
         return pyramid_conv_list, pyramid_fc_list
 
 

+ 12 - 2
paddlex/ppdet/modeling/reid/resnet.py

@@ -50,13 +50,23 @@ class ConvBNLayer(nn.Layer):
             dilation=dilation,
             dilation=dilation,
             groups=groups,
             groups=groups,
             weight_attr=ParamAttr(
             weight_attr=ParamAttr(
+                name=name + "_weights",
                 learning_rate=lr_mult,
                 learning_rate=lr_mult,
                 initializer=Normal(0, math.sqrt(2. / conv_stdv))),
                 initializer=Normal(0, math.sqrt(2. / conv_stdv))),
             bias_attr=False,
             bias_attr=False,
             data_format=data_format)
             data_format=data_format)
-
+        if name == "conv1":
+            bn_name = "bn_" + name
+        else:
+            bn_name = "bn" + name[3:]
         self._batch_norm = nn.BatchNorm(
         self._batch_norm = nn.BatchNorm(
-            num_filters, act=act, data_layout=data_format)
+            num_filters,
+            act=act,
+            param_attr=ParamAttr(name=bn_name + "_scale"),
+            bias_attr=ParamAttr(bn_name + "_offset"),
+            moving_mean_name=bn_name + "_mean",
+            moving_variance_name=bn_name + "_variance",
+            data_layout=data_format)
 
 
     def forward(self, inputs):
     def forward(self, inputs):
         y = self._conv(inputs)
         y = self._conv(inputs)

+ 0 - 10
paddlex/ppdet/modeling/tests/test_architectures.py

@@ -55,15 +55,5 @@ class TestSSD(TestFasterRCNN):
         self.cfg_file = 'configs/ssd/ssd_vgg16_300_240e_voc.yml'
         self.cfg_file = 'configs/ssd/ssd_vgg16_300_240e_voc.yml'
 
 
 
 
-class TestGFL(TestFasterRCNN):
-    def set_config(self):
-        self.cfg_file = 'configs/gfl/gfl_r50_fpn_1x_coco.yml'
-
-
-class TestPicoDet(TestFasterRCNN):
-    def set_config(self):
-        self.cfg_file = 'configs/picodet/picodet_s_shufflenetv2_320_coco.yml'
-
-
 if __name__ == '__main__':
 if __name__ == '__main__':
     unittest.main()
     unittest.main()

+ 2 - 5
paddlex/ppdet/utils/download.py

@@ -99,10 +99,7 @@ DATASETS = {
         'https://paddledet.bj.bcebos.com/data/spine_coco.tar',
         'https://paddledet.bj.bcebos.com/data/spine_coco.tar',
         '7ed69ae73f842cd2a8cf4f58dc3c5535', ), ], ['annotations', 'images']),
         '7ed69ae73f842cd2a8cf4f58dc3c5535', ), ], ['annotations', 'images']),
     'mot': (),
     'mot': (),
-    'objects365': (),
-    'coco_ce': ([(
-        'https://paddledet.bj.bcebos.com/data/coco_ce.tar',
-        'eadd1b79bc2f069f2744b1dd4e0c0329', ), ], [])
+    'objects365': ()
 }
 }
 
 
 DOWNLOAD_RETRY_LIMIT = 3
 DOWNLOAD_RETRY_LIMIT = 3
@@ -140,7 +137,7 @@ def get_config_path(url):
 
 
     # 2. get url
     # 2. get url
     try:
     try:
-        from ppdet import __version__ as version
+        from paddlex.ppdet import __version__ as version
     except ImportError:
     except ImportError:
         version = None
         version = None
 
 

+ 9 - 0
paddlex_restful/restful/app.py

@@ -207,6 +207,9 @@ def get_image_file():
     data = request.get_json()
     data = request.get_json()
     if request.method == 'GET':
     if request.method == 'GET':
         ret = data['path']
         ret = data['path']
+        assert os.path.abspath(ret).startswith(
+            os.path.abspath(SD.workspace_dir)
+        ) and ".." not in ret, "Illegal path {}.".format(ret)
         return send_file(ret)
         return send_file(ret)
 
 
 
 
@@ -584,6 +587,9 @@ def task_evaluate_file():
     if request.method == 'GET':
     if request.method == 'GET':
         if 'path' in data:
         if 'path' in data:
             ret = data['path']
             ret = data['path']
+            assert os.path.abspath(ret).startswith(
+                os.path.abspath(SD.workspace_dir)
+            ) and ".." not in ret, "Illegal path {}.".format(ret)
             return send_file(ret)
             return send_file(ret)
         else:
         else:
             from .project.task import get_evaluate_result
             from .project.task import get_evaluate_result
@@ -920,6 +926,9 @@ def model_file():
     data = request.get_json()
     data = request.get_json()
     if request.method == 'GET':
     if request.method == 'GET':
         ret = data['path']
         ret = data['path']
+        assert os.path.abspath(ret).startswith(
+            os.path.abspath(SD.workspace_dir)
+        ) and ".." not in ret, "Illegal path {}.".format(ret)
         return send_file(ret)
         return send_file(ret)
 
 
 
 

+ 9 - 0
static/paddlex_restful/restful/app.py

@@ -206,6 +206,9 @@ def get_image_file():
     data = request.get_json()
     data = request.get_json()
     if request.method == 'GET':
     if request.method == 'GET':
         ret = data['path']
         ret = data['path']
+        assert os.path.abspath(ret).startswith(
+            os.path.abspath(SD.workspace_dir)
+        ) and ".." not in ret, "Illegal path {}.".format(ret)
         return send_file(ret)
         return send_file(ret)
 
 
 
 
@@ -583,6 +586,9 @@ def task_evaluate_file():
     if request.method == 'GET':
     if request.method == 'GET':
         if 'path' in data:
         if 'path' in data:
             ret = data['path']
             ret = data['path']
+            assert os.path.abspath(ret).startswith(
+                os.path.abspath(SD.workspace_dir)
+            ) and ".." not in ret, "Illegal path {}.".format(ret)
             return send_file(ret)
             return send_file(ret)
         else:
         else:
             from .project.task import get_evaluate_result
             from .project.task import get_evaluate_result
@@ -919,6 +925,9 @@ def model_file():
     data = request.get_json()
     data = request.get_json()
     if request.method == 'GET':
     if request.method == 'GET':
         ret = data['path']
         ret = data['path']
+        assert os.path.abspath(ret).startswith(
+            os.path.abspath(SD.workspace_dir)
+        ) and ".." not in ret, "Illegal path {}.".format(ret)
         return send_file(ret)
         return send_file(ret)