Эх сурвалжийг харах

Merge pull request #1321 from will-jl944/develop_jf

Training rcnn mdoel w/ negative sample supports bs>1
FlyingQianMM 3 жил өмнө
parent
commit
25d48a59e6

+ 1 - 1
PaddleDetection

@@ -1 +1 @@
-Subproject commit 60674617b7bfff187e6eefd432e826c8d801fdad
+Subproject commit 692d732994660ceba82c75034c802eb1138239cf

+ 14 - 54
paddlex/cv/models/detector.py

@@ -1352,42 +1352,22 @@ class FasterRCNN(BaseDetector):
         """
         """
         if train_dataset.pos_num < len(train_dataset.file_list):
         if train_dataset.pos_num < len(train_dataset.file_list):
             train_dataset.num_workers = 0
             train_dataset.num_workers = 0
-            if train_batch_size != 1:
-                train_batch_size = 1
-                logging.warning(
-                    "Training RCNN models with negative samples only support batch size equals to 1 "
-                    "on a single gpu/cpu card, `train_batch_size` is forcibly set to 1."
-                )
-            nranks = paddle.distributed.get_world_size()
-            local_rank = paddle.distributed.get_rank()
-            # single card training
-            if nranks < 2 or local_rank == 0:
-                super(FasterRCNN, self).train(
-                    num_epochs, train_dataset, train_batch_size, eval_dataset,
-                    optimizer, save_interval_epochs, log_interval_steps,
-                    save_dir, pretrain_weights, learning_rate, warmup_steps,
-                    warmup_start_lr, lr_decay_epochs, lr_decay_gamma, metric,
-                    use_ema, early_stop, early_stop_patience, use_vdl,
-                    resume_checkpoint)
-        else:
-            super(FasterRCNN, self).train(
-                num_epochs, train_dataset, train_batch_size, eval_dataset,
-                optimizer, save_interval_epochs, log_interval_steps, save_dir,
-                pretrain_weights, learning_rate, warmup_steps, warmup_start_lr,
-                lr_decay_epochs, lr_decay_gamma, metric, use_ema, early_stop,
-                early_stop_patience, use_vdl, resume_checkpoint)
+        super(FasterRCNN, self).train(
+            num_epochs, train_dataset, train_batch_size, eval_dataset,
+            optimizer, save_interval_epochs, log_interval_steps, save_dir,
+            pretrain_weights, learning_rate, warmup_steps, warmup_start_lr,
+            lr_decay_epochs, lr_decay_gamma, metric, use_ema, early_stop,
+            early_stop_patience, use_vdl, resume_checkpoint)
 
 
     def _compose_batch_transform(self, transforms, mode='train'):
     def _compose_batch_transform(self, transforms, mode='train'):
         if mode == 'train':
         if mode == 'train':
             default_batch_transforms = [
             default_batch_transforms = [
                 _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
                 _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
             ]
             ]
-            collate_batch = False
         else:
         else:
             default_batch_transforms = [
             default_batch_transforms = [
                 _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
                 _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
             ]
             ]
-            collate_batch = True
         custom_batch_transforms = []
         custom_batch_transforms = []
         for i, op in enumerate(transforms.transforms):
         for i, op in enumerate(transforms.transforms):
             if isinstance(op, (BatchRandomResize, BatchRandomResizeByShort)):
             if isinstance(op, (BatchRandomResize, BatchRandomResizeByShort)):
@@ -1400,7 +1380,7 @@ class FasterRCNN(BaseDetector):
 
 
         batch_transforms = BatchCompose(
         batch_transforms = BatchCompose(
             custom_batch_transforms + default_batch_transforms,
             custom_batch_transforms + default_batch_transforms,
-            collate_batch=collate_batch)
+            collate_batch=False)
 
 
         return batch_transforms
         return batch_transforms
 
 
@@ -2200,42 +2180,22 @@ class MaskRCNN(BaseDetector):
         """
         """
         if train_dataset.pos_num < len(train_dataset.file_list):
         if train_dataset.pos_num < len(train_dataset.file_list):
             train_dataset.num_workers = 0
             train_dataset.num_workers = 0
-            if train_batch_size != 1:
-                train_batch_size = 1
-                logging.warning(
-                    "Training RCNN models with negative samples only support batch size equals to 1 "
-                    "on a single gpu/cpu card, `train_batch_size` is forcibly set to 1."
-                )
-            nranks = paddle.distributed.get_world_size()
-            local_rank = paddle.distributed.get_rank()
-            # single card training
-            if nranks < 2 or local_rank == 0:
-                super(MaskRCNN, self).train(
-                    num_epochs, train_dataset, train_batch_size, eval_dataset,
-                    optimizer, save_interval_epochs, log_interval_steps,
-                    save_dir, pretrain_weights, learning_rate, warmup_steps,
-                    warmup_start_lr, lr_decay_epochs, lr_decay_gamma, metric,
-                    use_ema, early_stop, early_stop_patience, use_vdl,
-                    resume_checkpoint)
-        else:
-            super(MaskRCNN, self).train(
-                num_epochs, train_dataset, train_batch_size, eval_dataset,
-                optimizer, save_interval_epochs, log_interval_steps, save_dir,
-                pretrain_weights, learning_rate, warmup_steps, warmup_start_lr,
-                lr_decay_epochs, lr_decay_gamma, metric, use_ema, early_stop,
-                early_stop_patience, use_vdl, resume_checkpoint)
+        super(MaskRCNN, self).train(
+            num_epochs, train_dataset, train_batch_size, eval_dataset,
+            optimizer, save_interval_epochs, log_interval_steps, save_dir,
+            pretrain_weights, learning_rate, warmup_steps, warmup_start_lr,
+            lr_decay_epochs, lr_decay_gamma, metric, use_ema, early_stop,
+            early_stop_patience, use_vdl, resume_checkpoint)
 
 
     def _compose_batch_transform(self, transforms, mode='train'):
     def _compose_batch_transform(self, transforms, mode='train'):
         if mode == 'train':
         if mode == 'train':
             default_batch_transforms = [
             default_batch_transforms = [
                 _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
                 _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
             ]
             ]
-            collate_batch = False
         else:
         else:
             default_batch_transforms = [
             default_batch_transforms = [
                 _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
                 _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
             ]
             ]
-            collate_batch = True
         custom_batch_transforms = []
         custom_batch_transforms = []
         for i, op in enumerate(transforms.transforms):
         for i, op in enumerate(transforms.transforms):
             if isinstance(op, (BatchRandomResize, BatchRandomResizeByShort)):
             if isinstance(op, (BatchRandomResize, BatchRandomResizeByShort)):
@@ -2248,7 +2208,7 @@ class MaskRCNN(BaseDetector):
 
 
         batch_transforms = BatchCompose(
         batch_transforms = BatchCompose(
             custom_batch_transforms + default_batch_transforms,
             custom_batch_transforms + default_batch_transforms,
-            collate_batch=collate_batch)
+            collate_batch=False)
 
 
         return batch_transforms
         return batch_transforms
 
 

+ 24 - 5
paddlex/ppdet/engine/trainer.py

@@ -33,6 +33,7 @@ from paddle.static import InputSpec
 from paddlex.ppdet.optimizer import ModelEMA
 from paddlex.ppdet.optimizer import ModelEMA
 
 
 from paddlex.ppdet.core.workspace import create
 from paddlex.ppdet.core.workspace import create
+from paddlex.ppdet.modeling.architectures.meta_arch import BaseArch
 from paddlex.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
 from paddlex.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
 from paddlex.ppdet.utils.visualizer import visualize_results, save_result
 from paddlex.ppdet.utils.visualizer import visualize_results, save_result
 from paddlex.ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results, KeyPointTopDownCOCOEval, KeyPointTopDownMPIIEval
 from paddlex.ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results, KeyPointTopDownCOCOEval, KeyPointTopDownMPIIEval
@@ -111,8 +112,12 @@ class Trainer(object):
         if self.mode == 'eval':
         if self.mode == 'eval':
             self._eval_batch_sampler = paddle.io.BatchSampler(
             self._eval_batch_sampler = paddle.io.BatchSampler(
                 self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
                 self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
-            self.loader = create('{}Reader'.format(self.mode.capitalize()))(
-                self.dataset, cfg.worker_num, self._eval_batch_sampler)
+            reader_name = '{}Reader'.format(self.mode.capitalize())
+            # If metric is VOC, need to be set collate_batch=False.
+            if cfg.metric == 'VOC':
+                cfg[reader_name]['collate_batch'] = False
+            self.loader = create(reader_name)(self.dataset, cfg.worker_num,
+                                              self._eval_batch_sampler)
         # TestDataset build after user set images, skip loader creation here
         # TestDataset build after user set images, skip loader creation here
 
 
         # build optimizer in train mode
         # build optimizer in train mode
@@ -336,6 +341,12 @@ class Trainer(object):
         assert self.mode == 'train', "Model not in 'train' mode"
         assert self.mode == 'train', "Model not in 'train' mode"
         Init_mark = False
         Init_mark = False
 
 
+        sync_bn = (
+            getattr(self.cfg, 'norm_type', None) in [None, 'sync_bn'] and
+            self.cfg.use_gpu and self._nranks > 1)
+        if sync_bn:
+            self.model = BaseArch.convert_sync_batchnorm(self.model)
+
         model = self.model
         model = self.model
         if self.cfg.get('fleet', False):
         if self.cfg.get('fleet', False):
             model = fleet.distributed_model(model)
             model = fleet.distributed_model(model)
@@ -364,7 +375,9 @@ class Trainer(object):
         self.status['training_staus'] = stats.TrainingStats(self.cfg.log_iter)
         self.status['training_staus'] = stats.TrainingStats(self.cfg.log_iter)
 
 
         if self.cfg.get('print_flops', False):
         if self.cfg.get('print_flops', False):
-            self._flops(self.loader)
+            flops_loader = create('{}Reader'.format(self.mode.capitalize()))(
+                self.dataset, self.cfg.worker_num)
+            self._flops(flops_loader)
         profiler_options = self.cfg.get('profiler_options', None)
         profiler_options = self.cfg.get('profiler_options', None)
 
 
         self._compose_callback.on_train_begin(self.status)
         self._compose_callback.on_train_begin(self.status)
@@ -436,6 +449,9 @@ class Trainer(object):
                         paddle.io.BatchSampler(
                         paddle.io.BatchSampler(
                             self._eval_dataset,
                             self._eval_dataset,
                             batch_size=self.cfg.EvalReader['batch_size'])
                             batch_size=self.cfg.EvalReader['batch_size'])
+                    # If metric is VOC, need to be set collate_batch=False.
+                    if self.cfg.metric == 'VOC':
+                        self.cfg['EvalReader']['collate_batch'] = False
                     self._eval_loader = create('EvalReader')(
                     self._eval_loader = create('EvalReader')(
                         self._eval_dataset,
                         self._eval_dataset,
                         self.cfg.worker_num,
                         self.cfg.worker_num,
@@ -463,7 +479,9 @@ class Trainer(object):
         self.status['mode'] = 'eval'
         self.status['mode'] = 'eval'
         self.model.eval()
         self.model.eval()
         if self.cfg.get('print_flops', False):
         if self.cfg.get('print_flops', False):
-            self._flops(loader)
+            flops_loader = create('{}Reader'.format(self.mode.capitalize()))(
+                self.dataset, self.cfg.worker_num, self._eval_batch_sampler)
+            self._flops(flops_loader)
         for step_id, data in enumerate(loader):
         for step_id, data in enumerate(loader):
             self.status['step_id'] = step_id
             self.status['step_id'] = step_id
             self._compose_callback.on_step_begin(self.status)
             self._compose_callback.on_step_begin(self.status)
@@ -514,7 +532,8 @@ class Trainer(object):
         self.status['mode'] = 'test'
         self.status['mode'] = 'test'
         self.model.eval()
         self.model.eval()
         if self.cfg.get('print_flops', False):
         if self.cfg.get('print_flops', False):
-            self._flops(loader)
+            flops_loader = create('TestReader')(self.dataset, 0)
+            self._flops(flops_loader)
         results = []
         results = []
         for step_id, data in enumerate(loader):
         for step_id, data in enumerate(loader):
             self.status['step_id'] = step_id
             self.status['step_id'] = step_id

+ 13 - 0
paddlex/ppdet/modeling/architectures/meta_arch.py

@@ -126,3 +126,16 @@ class BaseArch(nn.Layer):
 
 
     def get_pred(self, ):
     def get_pred(self, ):
         raise NotImplementedError("Should implement get_pred method!")
         raise NotImplementedError("Should implement get_pred method!")
+
+    @classmethod
+    def convert_sync_batchnorm(cls, layer):
+        layer_output = layer
+        if getattr(layer, 'norm_type', None) == 'sync_bn':
+            layer_output = nn.SyncBatchNorm.convert_sync_batchnorm(layer)
+        else:
+            for name, sublayer in layer.named_children():
+                layer_output.add_sublayer(name,
+                                          cls.convert_sync_batchnorm(sublayer))
+
+        del layer
+        return layer_output

+ 2 - 5
paddlex/ppdet/modeling/backbones/blazenet.py

@@ -58,11 +58,8 @@ class ConvBNLayer(nn.Layer):
                 learning_rate=conv_lr, initializer=KaimingNormal()),
                 learning_rate=conv_lr, initializer=KaimingNormal()),
             bias_attr=False)
             bias_attr=False)
 
 
-        if norm_type == 'sync_bn':
-            self._batch_norm = nn.SyncBatchNorm(out_channels)
-        else:
-            self._batch_norm = nn.BatchNorm(
-                out_channels, act=None, use_global_stats=False)
+        if norm_type in ['bn', 'sync_bn']:
+            self._batch_norm = nn.BatchNorm2D(out_channels)
 
 
     def forward(self, x):
     def forward(self, x):
         x = self._conv(x)
         x = self._conv(x)

+ 1 - 1
paddlex/ppdet/modeling/backbones/esnet.py

@@ -20,7 +20,7 @@ import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 from paddle import ParamAttr
 from paddle import ParamAttr
-from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm
+from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D
 from paddle.nn.initializer import KaimingNormal
 from paddle.nn.initializer import KaimingNormal
 from paddle.regularizer import L2Decay
 from paddle.regularizer import L2Decay
 
 

+ 3 - 3
paddlex/ppdet/modeling/backbones/hrnet.py

@@ -62,11 +62,11 @@ class ConvNormLayer(nn.Layer):
             learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
             learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
         bias_attr = ParamAttr(
         bias_attr = ParamAttr(
             learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
             learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
-        global_stats = True if freeze_norm else False
+        global_stats = True if freeze_norm else None
         if norm_type in ['bn', 'sync_bn']:
         if norm_type in ['bn', 'sync_bn']:
-            self.norm = nn.BatchNorm(
+            self.norm = nn.BatchNorm2D(
                 ch_out,
                 ch_out,
-                param_attr=param_attr,
+                weight_attr=param_attr,
                 bias_attr=bias_attr,
                 bias_attr=bias_attr,
                 use_global_stats=global_stats)
                 use_global_stats=global_stats)
         elif norm_type == 'gn':
         elif norm_type == 'gn':

+ 3 - 3
paddlex/ppdet/modeling/backbones/lcnet.py

@@ -19,7 +19,7 @@ from __future__ import print_function
 import paddle
 import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 from paddle import ParamAttr
 from paddle import ParamAttr
-from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear
+from paddle.nn import AdaptiveAvgPool2D, Conv2D
 from paddle.regularizer import L2Decay
 from paddle.regularizer import L2Decay
 from paddle.nn.initializer import KaimingNormal
 from paddle.nn.initializer import KaimingNormal
 
 
@@ -81,9 +81,9 @@ class ConvBNLayer(nn.Layer):
             weight_attr=ParamAttr(initializer=KaimingNormal()),
             weight_attr=ParamAttr(initializer=KaimingNormal()),
             bias_attr=False)
             bias_attr=False)
 
 
-        self.bn = BatchNorm(
+        self.bn = nn.BatchNorm2D(
             num_filters,
             num_filters,
-            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
             bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
             bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
         self.hardswish = nn.Hardswish()
         self.hardswish = nn.Hardswish()
 
 

+ 14 - 14
paddlex/ppdet/modeling/backbones/lite_hrnet.py

@@ -56,11 +56,11 @@ class ConvNormLayer(nn.Layer):
                 regularizer=L2Decay(norm_decay), )
                 regularizer=L2Decay(norm_decay), )
             bias_attr = ParamAttr(
             bias_attr = ParamAttr(
                 learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
                 learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
-            global_stats = True if freeze_norm else False
+            global_stats = True if freeze_norm else None
             if norm_type in ['bn', 'sync_bn']:
             if norm_type in ['bn', 'sync_bn']:
-                self.norm = nn.BatchNorm(
+                self.norm = nn.BatchNorm2D(
                     ch_out,
                     ch_out,
-                    param_attr=param_attr,
+                    weight_attr=param_attr,
                     bias_attr=bias_attr,
                     bias_attr=bias_attr,
                     use_global_stats=global_stats, )
                     use_global_stats=global_stats, )
             elif norm_type == 'gn':
             elif norm_type == 'gn':
@@ -582,7 +582,7 @@ class LiteHRNetModule(nn.Layer):
                                 stride=1,
                                 stride=1,
                                 padding=0,
                                 padding=0,
                                 bias=False, ),
                                 bias=False, ),
-                            nn.BatchNorm(self.in_channels[i]),
+                            nn.BatchNorm2D(self.in_channels[i]),
                             nn.Upsample(
                             nn.Upsample(
                                 scale_factor=2**(j - i), mode='nearest')))
                                 scale_factor=2**(j - i), mode='nearest')))
                 elif j == i:
                 elif j == i:
@@ -601,7 +601,7 @@ class LiteHRNetModule(nn.Layer):
                                         padding=1,
                                         padding=1,
                                         groups=self.in_channels[j],
                                         groups=self.in_channels[j],
                                         bias=False, ),
                                         bias=False, ),
-                                    nn.BatchNorm(self.in_channels[j]),
+                                    nn.BatchNorm2D(self.in_channels[j]),
                                     L.Conv2d(
                                     L.Conv2d(
                                         self.in_channels[j],
                                         self.in_channels[j],
                                         self.in_channels[i],
                                         self.in_channels[i],
@@ -609,7 +609,7 @@ class LiteHRNetModule(nn.Layer):
                                         stride=1,
                                         stride=1,
                                         padding=0,
                                         padding=0,
                                         bias=False, ),
                                         bias=False, ),
-                                    nn.BatchNorm(self.in_channels[i])))
+                                    nn.BatchNorm2D(self.in_channels[i])))
                         else:
                         else:
                             conv_downsamples.append(
                             conv_downsamples.append(
                                 nn.Sequential(
                                 nn.Sequential(
@@ -621,7 +621,7 @@ class LiteHRNetModule(nn.Layer):
                                         padding=1,
                                         padding=1,
                                         groups=self.in_channels[j],
                                         groups=self.in_channels[j],
                                         bias=False, ),
                                         bias=False, ),
-                                    nn.BatchNorm(self.in_channels[j]),
+                                    nn.BatchNorm2D(self.in_channels[j]),
                                     L.Conv2d(
                                     L.Conv2d(
                                         self.in_channels[j],
                                         self.in_channels[j],
                                         self.in_channels[j],
                                         self.in_channels[j],
@@ -629,7 +629,7 @@ class LiteHRNetModule(nn.Layer):
                                         stride=1,
                                         stride=1,
                                         padding=0,
                                         padding=0,
                                         bias=False, ),
                                         bias=False, ),
-                                    nn.BatchNorm(self.in_channels[j]),
+                                    nn.BatchNorm2D(self.in_channels[j]),
                                     nn.ReLU()))
                                     nn.ReLU()))
 
 
                     fuse_layer.append(nn.Sequential(*conv_downsamples))
                     fuse_layer.append(nn.Sequential(*conv_downsamples))
@@ -777,7 +777,7 @@ class LiteHRNet(nn.Layer):
                                 padding=1,
                                 padding=1,
                                 groups=num_channels_pre_layer[i],
                                 groups=num_channels_pre_layer[i],
                                 bias=False),
                                 bias=False),
-                            nn.BatchNorm(num_channels_pre_layer[i]),
+                            nn.BatchNorm2D(num_channels_pre_layer[i]),
                             L.Conv2d(
                             L.Conv2d(
                                 num_channels_pre_layer[i],
                                 num_channels_pre_layer[i],
                                 num_channels_cur_layer[i],
                                 num_channels_cur_layer[i],
@@ -785,7 +785,7 @@ class LiteHRNet(nn.Layer):
                                 stride=1,
                                 stride=1,
                                 padding=0,
                                 padding=0,
                                 bias=False, ),
                                 bias=False, ),
-                            nn.BatchNorm(num_channels_cur_layer[i]),
+                            nn.BatchNorm2D(num_channels_cur_layer[i]),
                             nn.ReLU()))
                             nn.ReLU()))
                 else:
                 else:
                     transition_layers.append(None)
                     transition_layers.append(None)
@@ -802,7 +802,7 @@ class LiteHRNet(nn.Layer):
                                 stride=2,
                                 stride=2,
                                 padding=1,
                                 padding=1,
                                 bias=False, ),
                                 bias=False, ),
-                            nn.BatchNorm(num_channels_pre_layer[-1]),
+                            nn.BatchNorm2D(num_channels_pre_layer[-1]),
                             L.Conv2d(
                             L.Conv2d(
                                 num_channels_pre_layer[-1],
                                 num_channels_pre_layer[-1],
                                 num_channels_cur_layer[i]
                                 num_channels_cur_layer[i]
@@ -812,9 +812,9 @@ class LiteHRNet(nn.Layer):
                                 stride=1,
                                 stride=1,
                                 padding=0,
                                 padding=0,
                                 bias=False, ),
                                 bias=False, ),
-                            nn.BatchNorm(num_channels_cur_layer[i]
-                                         if j == i - num_branches_pre else
-                                         num_channels_pre_layer[-1]),
+                            nn.BatchNorm2D(num_channels_cur_layer[i]
+                                           if j == i - num_branches_pre else
+                                           num_channels_pre_layer[-1]),
                             nn.ReLU()))
                             nn.ReLU()))
                 transition_layers.append(nn.Sequential(*conv_downsamples))
                 transition_layers.append(nn.Sequential(*conv_downsamples))
         return nn.LayerList(transition_layers)
         return nn.LayerList(transition_layers)

+ 2 - 9
paddlex/ppdet/modeling/backbones/mobilenet_v1.py

@@ -59,16 +59,9 @@ class ConvBNLayer(nn.Layer):
 
 
         param_attr = ParamAttr(regularizer=L2Decay(norm_decay))
         param_attr = ParamAttr(regularizer=L2Decay(norm_decay))
         bias_attr = ParamAttr(regularizer=L2Decay(norm_decay))
         bias_attr = ParamAttr(regularizer=L2Decay(norm_decay))
-        if norm_type == 'sync_bn':
-            self._batch_norm = nn.SyncBatchNorm(
+        if norm_type in ['sync_bn', 'bn']:
+            self._batch_norm = nn.BatchNorm2D(
                 out_channels, weight_attr=param_attr, bias_attr=bias_attr)
                 out_channels, weight_attr=param_attr, bias_attr=bias_attr)
-        else:
-            self._batch_norm = nn.BatchNorm(
-                out_channels,
-                act=None,
-                param_attr=param_attr,
-                bias_attr=bias_attr,
-                use_global_stats=False)
 
 
     def forward(self, x):
     def forward(self, x):
         x = self._conv(x)
         x = self._conv(x)

+ 4 - 8
paddlex/ppdet/modeling/backbones/mobilenet_v3.py

@@ -74,15 +74,11 @@ class ConvBNLayer(nn.Layer):
             learning_rate=norm_lr,
             learning_rate=norm_lr,
             regularizer=L2Decay(norm_decay),
             regularizer=L2Decay(norm_decay),
             trainable=False if freeze_norm else True)
             trainable=False if freeze_norm else True)
-        global_stats = True if freeze_norm else False
-        if norm_type == 'sync_bn':
-            self.bn = nn.SyncBatchNorm(
-                out_c, weight_attr=param_attr, bias_attr=bias_attr)
-        else:
-            self.bn = nn.BatchNorm(
+        global_stats = True if freeze_norm else None
+        if norm_type in ['sync_bn', 'bn']:
+            self.bn = nn.BatchNorm2D(
                 out_c,
                 out_c,
-                act=None,
-                param_attr=param_attr,
+                weight_attr=param_attr,
                 bias_attr=bias_attr,
                 bias_attr=bias_attr,
                 use_global_stats=global_stats)
                 use_global_stats=global_stats)
         norm_params = self.bn.parameters()
         norm_params = self.bn.parameters()

+ 4 - 8
paddlex/ppdet/modeling/backbones/resnet.py

@@ -100,15 +100,11 @@ class ConvNormLayer(nn.Layer):
             regularizer=L2Decay(norm_decay),
             regularizer=L2Decay(norm_decay),
             trainable=False if freeze_norm else True)
             trainable=False if freeze_norm else True)
 
 
-        global_stats = True if freeze_norm else False
-        if norm_type == 'sync_bn':
-            self.norm = nn.SyncBatchNorm(
-                ch_out, weight_attr=param_attr, bias_attr=bias_attr)
-        else:
-            self.norm = nn.BatchNorm(
+        global_stats = True if freeze_norm else None
+        if norm_type in ['sync_bn', 'bn']:
+            self.norm = nn.BatchNorm2D(
                 ch_out,
                 ch_out,
-                act=None,
-                param_attr=param_attr,
+                weight_attr=param_attr,
                 bias_attr=bias_attr,
                 bias_attr=bias_attr,
                 use_global_stats=global_stats)
                 use_global_stats=global_stats)
         norm_params = self.norm.parameters()
         norm_params = self.norm.parameters()

+ 10 - 5
paddlex/ppdet/modeling/backbones/shufflenet_v2.py

@@ -19,7 +19,8 @@ from __future__ import print_function
 import paddle
 import paddle
 import paddle.nn as nn
 import paddle.nn as nn
 from paddle import ParamAttr
 from paddle import ParamAttr
-from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm
+import paddle.nn.functional as F
+from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm2D
 from paddle.nn.initializer import KaimingNormal
 from paddle.nn.initializer import KaimingNormal
 from paddle.regularizer import L2Decay
 from paddle.regularizer import L2Decay
 
 
@@ -51,15 +52,19 @@ class ConvBNLayer(nn.Layer):
             weight_attr=ParamAttr(initializer=KaimingNormal()),
             weight_attr=ParamAttr(initializer=KaimingNormal()),
             bias_attr=False)
             bias_attr=False)
 
 
-        self._batch_norm = BatchNorm(
+        self._batch_norm = BatchNorm2D(
             out_channels,
             out_channels,
-            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
-            bias_attr=ParamAttr(regularizer=L2Decay(0.0)),
-            act=act)
+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+        if act == "hard_swish":
+            act = 'hardswish'
+        self.act = act
 
 
     def forward(self, inputs):
     def forward(self, inputs):
         y = self._conv(inputs)
         y = self._conv(inputs)
         y = self._batch_norm(y)
         y = self._batch_norm(y)
+        if self.act:
+            y = getattr(F, self.act)(y)
         return y
         return y
 
 
 
 

+ 6 - 1
paddlex/ppdet/modeling/backbones/swin_transformer.py

@@ -493,8 +493,13 @@ class BasicLayer(nn.Layer):
         cnt = 0
         cnt = 0
         for h in h_slices:
         for h in h_slices:
             for w in w_slices:
             for w in w_slices:
-                img_mask[:, h, w, :] = cnt
+                try:
+                    img_mask[:, h, w, :] = cnt
+                except:
+                    pass
+
                 cnt += 1
                 cnt += 1
+
         mask_windows = window_partition(
         mask_windows = window_partition(
             img_mask, self.window_size)  # nW, window_size, window_size, 1
             img_mask, self.window_size)  # nW, window_size, window_size, 1
         mask_windows = mask_windows.reshape(
         mask_windows = mask_windows.reshape(

+ 1 - 4
paddlex/ppdet/modeling/layers.py

@@ -176,12 +176,9 @@ class ConvNormLayer(nn.Layer):
             learning_rate=norm_lr,
             learning_rate=norm_lr,
             regularizer=L2Decay(norm_decay)
             regularizer=L2Decay(norm_decay)
             if norm_decay is not None else None)
             if norm_decay is not None else None)
-        if norm_type == 'bn':
+        if norm_type in ['bn', 'sync_bn']:
             self.norm = nn.BatchNorm2D(
             self.norm = nn.BatchNorm2D(
                 ch_out, weight_attr=param_attr, bias_attr=bias_attr)
                 ch_out, weight_attr=param_attr, bias_attr=bias_attr)
-        elif norm_type == 'sync_bn':
-            self.norm = nn.SyncBatchNorm(
-                ch_out, weight_attr=param_attr, bias_attr=bias_attr)
         elif norm_type == 'gn':
         elif norm_type == 'gn':
             self.norm = nn.GroupNorm(
             self.norm = nn.GroupNorm(
                 num_groups=norm_groups,
                 num_groups=norm_groups,

+ 0 - 2
paddlex/ppdet/modeling/losses/__init__.py

@@ -25,7 +25,6 @@ from . import fairmot_loss
 from . import gfocal_loss
 from . import gfocal_loss
 from . import detr_loss
 from . import detr_loss
 from . import sparsercnn_loss
 from . import sparsercnn_loss
-from . import varifocal_loss
 
 
 from .yolo_loss import *
 from .yolo_loss import *
 from .iou_aware_loss import *
 from .iou_aware_loss import *
@@ -40,4 +39,3 @@ from .fairmot_loss import *
 from .gfocal_loss import *
 from .gfocal_loss import *
 from .detr_loss import *
 from .detr_loss import *
 from .sparsercnn_loss import *
 from .sparsercnn_loss import *
-from .varifocal_loss import VarifocalLoss

+ 15 - 19
paddlex/ppdet/modeling/mot/tracker/base_jde_tracker.py

@@ -102,31 +102,26 @@ class BaseTrack(object):
 @register
 @register
 @serializable
 @serializable
 class STrack(BaseTrack):
 class STrack(BaseTrack):
-    def __init__(self,
-                 tlwh,
-                 score,
-                 temp_feat,
-                 num_classes,
-                 cls_id,
-                 buff_size=30):
-        # object class id
-        self.cls_id = cls_id
+    def __init__(self, tlwh, score, cls_id, buff_size=30, temp_feat=None):
         # wait activate
         # wait activate
         self._tlwh = np.asarray(tlwh, dtype=np.float)
         self._tlwh = np.asarray(tlwh, dtype=np.float)
+        self.score = score
+        self.cls_id = cls_id
+        self.track_len = 0
+
         self.kalman_filter = None
         self.kalman_filter = None
         self.mean, self.covariance = None, None
         self.mean, self.covariance = None, None
         self.is_activated = False
         self.is_activated = False
 
 
-        self.score = score
-        self.track_len = 0
-
-        self.smooth_feat = None
-        self.update_features(temp_feat)
-        self.features = deque([], maxlen=buff_size)
-        self.alpha = 0.9
+        self.use_reid = True if temp_feat is not None else False
+        if self.use_reid:
+            self.smooth_feat = None
+            self.update_features(temp_feat)
+            self.features = deque([], maxlen=buff_size)
+            self.alpha = 0.9
 
 
     def update_features(self, feat):
     def update_features(self, feat):
-        # L2 normalizing
+        # L2 normalizing, this function has no use for BYTETracker
         feat /= np.linalg.norm(feat)
         feat /= np.linalg.norm(feat)
         self.curr_feat = feat
         self.curr_feat = feat
         if self.smooth_feat is None:
         if self.smooth_feat is None:
@@ -182,7 +177,8 @@ class STrack(BaseTrack):
     def re_activate(self, new_track, frame_id, new_id=False):
     def re_activate(self, new_track, frame_id, new_id=False):
         self.mean, self.covariance = self.kalman_filter.update(
         self.mean, self.covariance = self.kalman_filter.update(
             self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh))
             self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh))
-        self.update_features(new_track.curr_feat)
+        if self.use_reid:
+            self.update_features(new_track.curr_feat)
         self.track_len = 0
         self.track_len = 0
         self.state = TrackState.Tracked
         self.state = TrackState.Tracked
         self.is_activated = True
         self.is_activated = True
@@ -201,7 +197,7 @@ class STrack(BaseTrack):
         self.is_activated = True  # set flag 'activated'
         self.is_activated = True  # set flag 'activated'
 
 
         self.score = new_track.score
         self.score = new_track.score
-        if update_feature:
+        if update_feature and self.use_reid:
             self.update_features(new_track.curr_feat)
             self.update_features(new_track.curr_feat)
 
 
     @property
     @property

+ 85 - 27
paddlex/ppdet/modeling/mot/tracker/jde_tracker.py

@@ -58,6 +58,7 @@ class JDETracker(object):
     """
     """
 
 
     def __init__(self,
     def __init__(self,
+                 use_byte=False,
                  num_classes=1,
                  num_classes=1,
                  det_thresh=0.3,
                  det_thresh=0.3,
                  track_buffer=30,
                  track_buffer=30,
@@ -66,11 +67,14 @@ class JDETracker(object):
                  tracked_thresh=0.7,
                  tracked_thresh=0.7,
                  r_tracked_thresh=0.5,
                  r_tracked_thresh=0.5,
                  unconfirmed_thresh=0.7,
                  unconfirmed_thresh=0.7,
-                 motion='KalmanFilter',
                  conf_thres=0,
                  conf_thres=0,
+                 match_thres=0.8,
+                 low_conf_thres=0.2,
+                 motion='KalmanFilter',
                  metric_type='euclidean'):
                  metric_type='euclidean'):
+        self.use_byte = use_byte
         self.num_classes = num_classes
         self.num_classes = num_classes
-        self.det_thresh = det_thresh
+        self.det_thresh = det_thresh if not use_byte else conf_thres + 0.1
         self.track_buffer = track_buffer
         self.track_buffer = track_buffer
         self.min_box_area = min_box_area
         self.min_box_area = min_box_area
         self.vertical_ratio = vertical_ratio
         self.vertical_ratio = vertical_ratio
@@ -78,9 +82,12 @@ class JDETracker(object):
         self.tracked_thresh = tracked_thresh
         self.tracked_thresh = tracked_thresh
         self.r_tracked_thresh = r_tracked_thresh
         self.r_tracked_thresh = r_tracked_thresh
         self.unconfirmed_thresh = unconfirmed_thresh
         self.unconfirmed_thresh = unconfirmed_thresh
+        self.conf_thres = conf_thres
+        self.match_thres = match_thres
+        self.low_conf_thres = low_conf_thres
+
         if motion == 'KalmanFilter':
         if motion == 'KalmanFilter':
             self.motion = KalmanFilter()
             self.motion = KalmanFilter()
-        self.conf_thres = conf_thres
         self.metric_type = metric_type
         self.metric_type = metric_type
 
 
         self.frame_id = 0
         self.frame_id = 0
@@ -91,7 +98,7 @@ class JDETracker(object):
         self.max_time_lost = 0
         self.max_time_lost = 0
         # max_time_lost will be calculated: int(frame_rate / 30.0 * track_buffer)
         # max_time_lost will be calculated: int(frame_rate / 30.0 * track_buffer)
 
 
-    def update(self, pred_dets, pred_embs):
+    def update(self, pred_dets, pred_embs=None):
         """
         """
         Processes the image frame and finds bounding box(detections).
         Processes the image frame and finds bounding box(detections).
         Associates the detection with corresponding tracklets and also handles
         Associates the detection with corresponding tracklets and also handles
@@ -123,7 +130,10 @@ class JDETracker(object):
         for cls_id in range(self.num_classes):
         for cls_id in range(self.num_classes):
             cls_idx = (pred_dets[:, 5:] == cls_id).squeeze(-1)
             cls_idx = (pred_dets[:, 5:] == cls_id).squeeze(-1)
             pred_dets_dict[cls_id] = pred_dets[cls_idx]
             pred_dets_dict[cls_id] = pred_dets[cls_idx]
-            pred_embs_dict[cls_id] = pred_embs[cls_idx]
+            if pred_embs is not None:
+                pred_embs_dict[cls_id] = pred_embs[cls_idx]
+            else:
+                pred_embs_dict[cls_id] = None
 
 
         for cls_id in range(self.num_classes):
         for cls_id in range(self.num_classes):
             """ Step 1: Get detections by class"""
             """ Step 1: Get detections by class"""
@@ -132,13 +142,24 @@ class JDETracker(object):
             remain_inds = (pred_dets_cls[:, 4:5] > self.conf_thres).squeeze(-1)
             remain_inds = (pred_dets_cls[:, 4:5] > self.conf_thres).squeeze(-1)
             if remain_inds.sum() > 0:
             if remain_inds.sum() > 0:
                 pred_dets_cls = pred_dets_cls[remain_inds]
                 pred_dets_cls = pred_dets_cls[remain_inds]
-                pred_embs_cls = pred_embs_cls[remain_inds]
-                detections = [
-                    STrack(
-                        STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f,
-                        self.num_classes, cls_id, 30)
-                    for (tlbrs, f) in zip(pred_dets_cls, pred_embs_cls)
-                ]
+                if self.use_byte:
+                    detections = [
+                        STrack(
+                            STrack.tlbr_to_tlwh(tlbrs[:4]),
+                            tlbrs[4],
+                            cls_id,
+                            30,
+                            temp_feat=None) for tlbrs in pred_dets_cls
+                    ]
+                else:
+                    pred_embs_cls = pred_embs_cls[remain_inds]
+                    detections = [
+                        STrack(
+                            STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], cls_id,
+                            30, temp_feat)
+                        for (tlbrs, temp_feat
+                             ) in zip(pred_dets_cls, pred_embs_cls)
+                    ]
             else:
             else:
                 detections = []
                 detections = []
             ''' Add newly detected tracklets to tracked_stracks'''
             ''' Add newly detected tracklets to tracked_stracks'''
@@ -160,12 +181,20 @@ class JDETracker(object):
             # Predict the current location with KalmanFilter
             # Predict the current location with KalmanFilter
             STrack.multi_predict(track_pool_dict[cls_id], self.motion)
             STrack.multi_predict(track_pool_dict[cls_id], self.motion)
 
 
-            dists = matching.embedding_distance(
-                track_pool_dict[cls_id], detections, metric=self.metric_type)
-            dists = matching.fuse_motion(self.motion, dists,
-                                         track_pool_dict[cls_id], detections)
-            matches, u_track, u_detection = matching.linear_assignment(
-                dists, thresh=self.tracked_thresh)
+            if self.use_byte:
+                dists = matching.iou_distance(track_pool_dict[cls_id],
+                                              detections)
+                matches, u_track, u_detection = matching.linear_assignment(
+                    dists, thresh=self.match_thres)  #
+            else:
+                dists = matching.embedding_distance(
+                    track_pool_dict[cls_id],
+                    detections,
+                    metric=self.metric_type)
+                dists = matching.fuse_motion(
+                    self.motion, dists, track_pool_dict[cls_id], detections)
+                matches, u_track, u_detection = matching.linear_assignment(
+                    dists, thresh=self.tracked_thresh)
 
 
             for i_tracked, idet in matches:
             for i_tracked, idet in matches:
                 # i_tracked is the id of the track and idet is the detection
                 # i_tracked is the id of the track and idet is the detection
@@ -183,19 +212,48 @@ class JDETracker(object):
 
 
             # None of the steps below happen if there are no undetected tracks.
             # None of the steps below happen if there are no undetected tracks.
             """ Step 3: Second association, with IOU"""
             """ Step 3: Second association, with IOU"""
-            detections = [detections[i] for i in u_detection]
-            r_tracked_stracks = []
-            for i in u_track:
-                if track_pool_dict[cls_id][i].state == TrackState.Tracked:
-                    r_tracked_stracks.append(track_pool_dict[cls_id][i])
+            if self.use_byte:
+                inds_low = pred_dets_dict[cls_id][:, 4:5] > self.low_conf_thres
+                inds_high = pred_dets_dict[cls_id][:, 4:5] < self.conf_thres
+                inds_second = np.logical_and(inds_low, inds_high).squeeze(-1)
+                pred_dets_cls_second = pred_dets_dict[cls_id][inds_second]
 
 
-            dists = matching.iou_distance(r_tracked_stracks, detections)
-            matches, u_track, u_detection = matching.linear_assignment(
-                dists, thresh=self.r_tracked_thresh)
+                # association the untrack to the low score detections
+                if len(pred_dets_cls_second) > 0:
+                    detections_second = [
+                        STrack(
+                            STrack.tlbr_to_tlwh(tlbrs[:4]),
+                            tlbrs[4],
+                            cls_id,
+                            30,
+                            temp_feat=None)
+                        for tlbrs in pred_dets_cls_second[:, :5]
+                    ]
+                else:
+                    detections_second = []
+                r_tracked_stracks = [
+                    track_pool_dict[cls_id][i] for i in u_track
+                    if track_pool_dict[cls_id][i].state == TrackState.Tracked
+                ]
+                dists = matching.iou_distance(r_tracked_stracks,
+                                              detections_second)
+                matches, u_track, u_detection_second = matching.linear_assignment(
+                    dists, thresh=0.4)  # not r_tracked_thresh
+            else:
+                detections = [detections[i] for i in u_detection]
+                r_tracked_stracks = []
+                for i in u_track:
+                    if track_pool_dict[cls_id][i].state == TrackState.Tracked:
+                        r_tracked_stracks.append(track_pool_dict[cls_id][i])
+                dists = matching.iou_distance(r_tracked_stracks, detections)
+
+                matches, u_track, u_detection = matching.linear_assignment(
+                    dists, thresh=self.r_tracked_thresh)
 
 
             for i_tracked, idet in matches:
             for i_tracked, idet in matches:
                 track = r_tracked_stracks[i_tracked]
                 track = r_tracked_stracks[i_tracked]
-                det = detections[idet]
+                det = detections[
+                    idet] if not self.use_byte else detections_second[idet]
                 if track.state == TrackState.Tracked:
                 if track.state == TrackState.Tracked:
                     track.update(det, self.frame_id)
                     track.update(det, self.frame_id)
                     activated_tracks_dict[cls_id].append(track)
                     activated_tracks_dict[cls_id].append(track)

+ 1 - 3
paddlex/ppdet/modeling/necks/bifpn.py

@@ -52,10 +52,8 @@ class SeparableConvLayer(nn.Layer):
         self.pointwise_conv = nn.Conv2D(in_channels, self.out_channels, 1)
         self.pointwise_conv = nn.Conv2D(in_channels, self.out_channels, 1)
 
 
         # norm type
         # norm type
-        if self.norm_type == 'bn':
+        if self.norm_type in ['bn', 'sync_bn']:
             self.norm = nn.BatchNorm2D(self.out_channels)
             self.norm = nn.BatchNorm2D(self.out_channels)
-        elif self.norm_type == 'sync_bn':
-            self.norm = nn.SyncBatchNorm(self.out_channels)
         elif self.norm_type == 'gn':
         elif self.norm_type == 'gn':
             self.norm = nn.GroupNorm(
             self.norm = nn.GroupNorm(
                 num_groups=self.norm_groups, num_channels=self.out_channels)
                 num_groups=self.norm_groups, num_channels=self.out_channels)

+ 2 - 5
paddlex/ppdet/modeling/necks/blazeface_fpn.py

@@ -54,11 +54,8 @@ class ConvBNLayer(nn.Layer):
                 learning_rate=conv_lr, initializer=KaimingNormal()),
                 learning_rate=conv_lr, initializer=KaimingNormal()),
             bias_attr=False)
             bias_attr=False)
 
 
-        if norm_type == 'sync_bn':
-            self._batch_norm = nn.SyncBatchNorm(out_channels)
-        else:
-            self._batch_norm = nn.BatchNorm(
-                out_channels, act=None, use_global_stats=False)
+        if norm_type in ['sync_bn', 'bn']:
+            self._batch_norm = nn.BatchNorm2D(out_channels)
 
 
     def forward(self, x):
     def forward(self, x):
         x = self._conv(x)
         x = self._conv(x)

+ 6 - 9
paddlex/ppdet/modeling/ops.py

@@ -50,10 +50,6 @@ def batch_norm(ch,
                freeze_norm=False,
                freeze_norm=False,
                initializer=None,
                initializer=None,
                data_format='NCHW'):
                data_format='NCHW'):
-    if norm_type == 'sync_bn':
-        batch_norm = nn.SyncBatchNorm
-    else:
-        batch_norm = nn.BatchNorm2D
 
 
     norm_lr = 0. if freeze_norm else 1.
     norm_lr = 0. if freeze_norm else 1.
     weight_attr = ParamAttr(
     weight_attr = ParamAttr(
@@ -66,11 +62,12 @@ def batch_norm(ch,
         regularizer=L2Decay(norm_decay),
         regularizer=L2Decay(norm_decay),
         trainable=False if freeze_norm else True)
         trainable=False if freeze_norm else True)
 
 
-    norm_layer = batch_norm(
-        ch,
-        weight_attr=weight_attr,
-        bias_attr=bias_attr,
-        data_format=data_format)
+    if norm_type in ['sync_bn', 'bn']:
+        norm_layer = nn.BatchNorm2D(
+            ch,
+            weight_attr=weight_attr,
+            bias_attr=bias_attr,
+            data_format=data_format)
 
 
     norm_params = norm_layer.parameters()
     norm_params = norm_layer.parameters()
     if freeze_norm:
     if freeze_norm:

+ 7 - 6
paddlex/ppdet/modeling/post_process.py

@@ -42,10 +42,6 @@ class BBoxPostProcess(nn.Layer):
         self.num_classes = num_classes
         self.num_classes = num_classes
         self.decode = decode
         self.decode = decode
         self.nms = nms
         self.nms = nms
-        self.fake_bboxes = paddle.to_tensor(
-            np.array(
-                [[-1, 0.0, 0.0, 0.0, 0.0, 0.0]], dtype='float32'))
-        self.fake_bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
 
 
     def forward(self, head_out, rois, im_shape, scale_factor):
     def forward(self, head_out, rois, im_shape, scale_factor):
         """
         """
@@ -94,11 +90,16 @@ class BBoxPostProcess(nn.Layer):
         bboxes_list = []
         bboxes_list = []
         bbox_num_list = []
         bbox_num_list = []
         id_start = 0
         id_start = 0
+        fake_bboxes = paddle.to_tensor(
+            np.array(
+                [[-1, 0.0, 0.0, 0.0, 0.0, 0.0]], dtype='float32'))
+        fake_bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
+
         # add fake bbox when output is empty for each batch
         # add fake bbox when output is empty for each batch
         for i in range(bbox_num.shape[0]):
         for i in range(bbox_num.shape[0]):
             if bbox_num[i] == 0:
             if bbox_num[i] == 0:
-                bboxes_i = self.fake_bboxes
-                bbox_num_i = self.fake_bbox_num
+                bboxes_i = fake_bboxes
+                bbox_num_i = fake_bbox_num
                 id_start += 1
                 id_start += 1
             else:
             else:
                 bboxes_i = bboxes[id_start:id_start + bbox_num[i], :]
                 bboxes_i = bboxes[id_start:id_start + bbox_num[i], :]

+ 2 - 2
paddlex/ppdet/modeling/proposal_generator/target.py

@@ -52,8 +52,8 @@ def rpn_anchor_target(anchors,
             labels = paddle.scatter(labels, fg_inds, paddle.ones_like(fg_inds))
             labels = paddle.scatter(labels, fg_inds, paddle.ones_like(fg_inds))
         # Step3: make output
         # Step3: make output
         if gt_bbox.shape[0] == 0:
         if gt_bbox.shape[0] == 0:
-            matched_gt_boxes = paddle.zeros([0, 4])
-            tgt_delta = paddle.zeros([0, 4])
+            matched_gt_boxes = paddle.zeros([matches.shape[0], 4])
+            tgt_delta = paddle.zeros([matches.shape[0], 4])
         else:
         else:
             matched_gt_boxes = paddle.gather(gt_bbox, matches)
             matched_gt_boxes = paddle.gather(gt_bbox, matches)
             tgt_delta = bbox2delta(anchors, matched_gt_boxes, weights)
             tgt_delta = bbox2delta(anchors, matched_gt_boxes, weights)

+ 3 - 3
paddlex/ppdet/modeling/reid/pplcnet_embedding.py

@@ -21,7 +21,7 @@ import paddle.nn as nn
 import paddle.nn.functional as F
 import paddle.nn.functional as F
 from paddle.nn.initializer import Normal, Constant
 from paddle.nn.initializer import Normal, Constant
 from paddle import ParamAttr
 from paddle import ParamAttr
-from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Linear
+from paddle.nn import AdaptiveAvgPool2D, BatchNorm2D, Conv2D, Linear
 from paddle.regularizer import L2Decay
 from paddle.regularizer import L2Decay
 from paddle.nn.initializer import KaimingNormal, XavierNormal
 from paddle.nn.initializer import KaimingNormal, XavierNormal
 from paddlex.ppdet.core.workspace import register
 from paddlex.ppdet.core.workspace import register
@@ -76,9 +76,9 @@ class ConvBNLayer(nn.Layer):
             weight_attr=ParamAttr(initializer=KaimingNormal()),
             weight_attr=ParamAttr(initializer=KaimingNormal()),
             bias_attr=False)
             bias_attr=False)
 
 
-        self.bn = BatchNorm(
+        self.bn = BatchNorm2D(
             num_filters,
             num_filters,
-            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
             bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
             bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
         self.hardswish = nn.Hardswish()
         self.hardswish = nn.Hardswish()
 
 

+ 4 - 2
paddlex/ppdet/modeling/reid/resnet.py

@@ -55,12 +55,14 @@ class ConvBNLayer(nn.Layer):
             bias_attr=False,
             bias_attr=False,
             data_format=data_format)
             data_format=data_format)
 
 
-        self._batch_norm = nn.BatchNorm(
-            num_filters, act=act, data_layout=data_format)
+        self._batch_norm = nn.BatchNorm2D(num_filters, data_layout=data_format)
+        self.act = act
 
 
     def forward(self, inputs):
     def forward(self, inputs):
         y = self._conv(inputs)
         y = self._conv(inputs)
         y = self._batch_norm(y)
         y = self._batch_norm(y)
+        if self.act:
+            y = getattr(F, self.act)(y)
         return y
         return y
 
 
 
 

+ 4 - 2
paddlex/ppdet/optimizer.py

@@ -42,9 +42,10 @@ class CosineDecay(object):
             the max_iters is much larger than the warmup iter
             the max_iters is much larger than the warmup iter
     """
     """
 
 
-    def __init__(self, max_epochs=1000, use_warmup=True):
+    def __init__(self, max_epochs=1000, use_warmup=True, eta_min=0):
         self.max_epochs = max_epochs
         self.max_epochs = max_epochs
         self.use_warmup = use_warmup
         self.use_warmup = use_warmup
+        self.eta_min = eta_min
 
 
     def __call__(self,
     def __call__(self,
                  base_lr=None,
                  base_lr=None,
@@ -66,7 +67,8 @@ class CosineDecay(object):
                 value.append(decayed_lr)
                 value.append(decayed_lr)
             return optimizer.lr.PiecewiseDecay(boundary, value)
             return optimizer.lr.PiecewiseDecay(boundary, value)
 
 
-        return optimizer.lr.CosineAnnealingDecay(base_lr, T_max=max_iters)
+        return optimizer.lr.CosineAnnealingDecay(
+            base_lr, T_max=max_iters, eta_min=self.eta_min)
 
 
 
 
 @serializable
 @serializable

+ 8 - 4
paddlex/ppdet/utils/checkpoint.py

@@ -124,7 +124,7 @@ def match_state_dict(model_state_dict, weight_state_dict):
     weight_keys = sorted(weight_state_dict.keys())
     weight_keys = sorted(weight_state_dict.keys())
 
 
     def match(a, b):
     def match(a, b):
-        if a.startswith('backbone.res5'):
+        if b.startswith('backbone.res5'):
             # In Faster RCNN, res5 pretrained weights have prefix of backbone,
             # In Faster RCNN, res5 pretrained weights have prefix of backbone,
             # however, the corresponding model weights have difficult prefix,
             # however, the corresponding model weights have difficult prefix,
             # bbox_head.
             # bbox_head.
@@ -139,10 +139,14 @@ def match_state_dict(model_state_dict, weight_state_dict):
     max_id = match_matrix.argmax(1)
     max_id = match_matrix.argmax(1)
     max_len = match_matrix.max(1)
     max_len = match_matrix.max(1)
     max_id[max_len == 0] = -1
     max_id[max_len == 0] = -1
+
+    load_id = set(max_id)
+    load_id.discard(-1)
     not_load_weight_name = []
     not_load_weight_name = []
-    for match_idx in range(len(max_id)):
-        if match_idx < len(weight_keys) and max_id[match_idx] == -1:
-            not_load_weight_name.append(weight_keys[match_idx])
+    for idx in range(len(weight_keys)):
+        if idx not in load_id:
+            not_load_weight_name.append(weight_keys[idx])
+
     if len(not_load_weight_name) > 0:
     if len(not_load_weight_name) > 0:
         logger.info('{} in pretrained weight is not used in the model, '
         logger.info('{} in pretrained weight is not used in the model, '
                     'and its will not be loaded'.format(not_load_weight_name))
                     'and its will not be loaded'.format(not_load_weight_name))