Browse Source

Merge pull request #1321 from will-jl944/develop_jf

Training rcnn mdoel w/ negative sample supports bs>1
FlyingQianMM 3 years ago
parent
commit
25d48a59e6

+ 1 - 1
PaddleDetection

@@ -1 +1 @@
-Subproject commit 60674617b7bfff187e6eefd432e826c8d801fdad
+Subproject commit 692d732994660ceba82c75034c802eb1138239cf

+ 14 - 54
paddlex/cv/models/detector.py

@@ -1352,42 +1352,22 @@ class FasterRCNN(BaseDetector):
         """
         if train_dataset.pos_num < len(train_dataset.file_list):
             train_dataset.num_workers = 0
-            if train_batch_size != 1:
-                train_batch_size = 1
-                logging.warning(
-                    "Training RCNN models with negative samples only support batch size equals to 1 "
-                    "on a single gpu/cpu card, `train_batch_size` is forcibly set to 1."
-                )
-            nranks = paddle.distributed.get_world_size()
-            local_rank = paddle.distributed.get_rank()
-            # single card training
-            if nranks < 2 or local_rank == 0:
-                super(FasterRCNN, self).train(
-                    num_epochs, train_dataset, train_batch_size, eval_dataset,
-                    optimizer, save_interval_epochs, log_interval_steps,
-                    save_dir, pretrain_weights, learning_rate, warmup_steps,
-                    warmup_start_lr, lr_decay_epochs, lr_decay_gamma, metric,
-                    use_ema, early_stop, early_stop_patience, use_vdl,
-                    resume_checkpoint)
-        else:
-            super(FasterRCNN, self).train(
-                num_epochs, train_dataset, train_batch_size, eval_dataset,
-                optimizer, save_interval_epochs, log_interval_steps, save_dir,
-                pretrain_weights, learning_rate, warmup_steps, warmup_start_lr,
-                lr_decay_epochs, lr_decay_gamma, metric, use_ema, early_stop,
-                early_stop_patience, use_vdl, resume_checkpoint)
+        super(FasterRCNN, self).train(
+            num_epochs, train_dataset, train_batch_size, eval_dataset,
+            optimizer, save_interval_epochs, log_interval_steps, save_dir,
+            pretrain_weights, learning_rate, warmup_steps, warmup_start_lr,
+            lr_decay_epochs, lr_decay_gamma, metric, use_ema, early_stop,
+            early_stop_patience, use_vdl, resume_checkpoint)
 
     def _compose_batch_transform(self, transforms, mode='train'):
         if mode == 'train':
             default_batch_transforms = [
                 _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
             ]
-            collate_batch = False
         else:
             default_batch_transforms = [
                 _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
             ]
-            collate_batch = True
         custom_batch_transforms = []
         for i, op in enumerate(transforms.transforms):
             if isinstance(op, (BatchRandomResize, BatchRandomResizeByShort)):
@@ -1400,7 +1380,7 @@ class FasterRCNN(BaseDetector):
 
         batch_transforms = BatchCompose(
             custom_batch_transforms + default_batch_transforms,
-            collate_batch=collate_batch)
+            collate_batch=False)
 
         return batch_transforms
 
@@ -2200,42 +2180,22 @@ class MaskRCNN(BaseDetector):
         """
         if train_dataset.pos_num < len(train_dataset.file_list):
             train_dataset.num_workers = 0
-            if train_batch_size != 1:
-                train_batch_size = 1
-                logging.warning(
-                    "Training RCNN models with negative samples only support batch size equals to 1 "
-                    "on a single gpu/cpu card, `train_batch_size` is forcibly set to 1."
-                )
-            nranks = paddle.distributed.get_world_size()
-            local_rank = paddle.distributed.get_rank()
-            # single card training
-            if nranks < 2 or local_rank == 0:
-                super(MaskRCNN, self).train(
-                    num_epochs, train_dataset, train_batch_size, eval_dataset,
-                    optimizer, save_interval_epochs, log_interval_steps,
-                    save_dir, pretrain_weights, learning_rate, warmup_steps,
-                    warmup_start_lr, lr_decay_epochs, lr_decay_gamma, metric,
-                    use_ema, early_stop, early_stop_patience, use_vdl,
-                    resume_checkpoint)
-        else:
-            super(MaskRCNN, self).train(
-                num_epochs, train_dataset, train_batch_size, eval_dataset,
-                optimizer, save_interval_epochs, log_interval_steps, save_dir,
-                pretrain_weights, learning_rate, warmup_steps, warmup_start_lr,
-                lr_decay_epochs, lr_decay_gamma, metric, use_ema, early_stop,
-                early_stop_patience, use_vdl, resume_checkpoint)
+        super(MaskRCNN, self).train(
+            num_epochs, train_dataset, train_batch_size, eval_dataset,
+            optimizer, save_interval_epochs, log_interval_steps, save_dir,
+            pretrain_weights, learning_rate, warmup_steps, warmup_start_lr,
+            lr_decay_epochs, lr_decay_gamma, metric, use_ema, early_stop,
+            early_stop_patience, use_vdl, resume_checkpoint)
 
     def _compose_batch_transform(self, transforms, mode='train'):
         if mode == 'train':
             default_batch_transforms = [
                 _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
             ]
-            collate_batch = False
         else:
             default_batch_transforms = [
                 _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
             ]
-            collate_batch = True
         custom_batch_transforms = []
         for i, op in enumerate(transforms.transforms):
             if isinstance(op, (BatchRandomResize, BatchRandomResizeByShort)):
@@ -2248,7 +2208,7 @@ class MaskRCNN(BaseDetector):
 
         batch_transforms = BatchCompose(
             custom_batch_transforms + default_batch_transforms,
-            collate_batch=collate_batch)
+            collate_batch=False)
 
         return batch_transforms
 

+ 24 - 5
paddlex/ppdet/engine/trainer.py

@@ -33,6 +33,7 @@ from paddle.static import InputSpec
 from paddlex.ppdet.optimizer import ModelEMA
 
 from paddlex.ppdet.core.workspace import create
+from paddlex.ppdet.modeling.architectures.meta_arch import BaseArch
 from paddlex.ppdet.utils.checkpoint import load_weight, load_pretrain_weight
 from paddlex.ppdet.utils.visualizer import visualize_results, save_result
 from paddlex.ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_infer_results, KeyPointTopDownCOCOEval, KeyPointTopDownMPIIEval
@@ -111,8 +112,12 @@ class Trainer(object):
         if self.mode == 'eval':
             self._eval_batch_sampler = paddle.io.BatchSampler(
                 self.dataset, batch_size=self.cfg.EvalReader['batch_size'])
-            self.loader = create('{}Reader'.format(self.mode.capitalize()))(
-                self.dataset, cfg.worker_num, self._eval_batch_sampler)
+            reader_name = '{}Reader'.format(self.mode.capitalize())
+            # If metric is VOC, need to be set collate_batch=False.
+            if cfg.metric == 'VOC':
+                cfg[reader_name]['collate_batch'] = False
+            self.loader = create(reader_name)(self.dataset, cfg.worker_num,
+                                              self._eval_batch_sampler)
         # TestDataset build after user set images, skip loader creation here
 
         # build optimizer in train mode
@@ -336,6 +341,12 @@ class Trainer(object):
         assert self.mode == 'train', "Model not in 'train' mode"
         Init_mark = False
 
+        sync_bn = (
+            getattr(self.cfg, 'norm_type', None) in [None, 'sync_bn'] and
+            self.cfg.use_gpu and self._nranks > 1)
+        if sync_bn:
+            self.model = BaseArch.convert_sync_batchnorm(self.model)
+
         model = self.model
         if self.cfg.get('fleet', False):
             model = fleet.distributed_model(model)
@@ -364,7 +375,9 @@ class Trainer(object):
         self.status['training_staus'] = stats.TrainingStats(self.cfg.log_iter)
 
         if self.cfg.get('print_flops', False):
-            self._flops(self.loader)
+            flops_loader = create('{}Reader'.format(self.mode.capitalize()))(
+                self.dataset, self.cfg.worker_num)
+            self._flops(flops_loader)
         profiler_options = self.cfg.get('profiler_options', None)
 
         self._compose_callback.on_train_begin(self.status)
@@ -436,6 +449,9 @@ class Trainer(object):
                         paddle.io.BatchSampler(
                             self._eval_dataset,
                             batch_size=self.cfg.EvalReader['batch_size'])
+                    # If metric is VOC, need to be set collate_batch=False.
+                    if self.cfg.metric == 'VOC':
+                        self.cfg['EvalReader']['collate_batch'] = False
                     self._eval_loader = create('EvalReader')(
                         self._eval_dataset,
                         self.cfg.worker_num,
@@ -463,7 +479,9 @@ class Trainer(object):
         self.status['mode'] = 'eval'
         self.model.eval()
         if self.cfg.get('print_flops', False):
-            self._flops(loader)
+            flops_loader = create('{}Reader'.format(self.mode.capitalize()))(
+                self.dataset, self.cfg.worker_num, self._eval_batch_sampler)
+            self._flops(flops_loader)
         for step_id, data in enumerate(loader):
             self.status['step_id'] = step_id
             self._compose_callback.on_step_begin(self.status)
@@ -514,7 +532,8 @@ class Trainer(object):
         self.status['mode'] = 'test'
         self.model.eval()
         if self.cfg.get('print_flops', False):
-            self._flops(loader)
+            flops_loader = create('TestReader')(self.dataset, 0)
+            self._flops(flops_loader)
         results = []
         for step_id, data in enumerate(loader):
             self.status['step_id'] = step_id

+ 13 - 0
paddlex/ppdet/modeling/architectures/meta_arch.py

@@ -126,3 +126,16 @@ class BaseArch(nn.Layer):
 
     def get_pred(self, ):
         raise NotImplementedError("Should implement get_pred method!")
+
+    @classmethod
+    def convert_sync_batchnorm(cls, layer):
+        layer_output = layer
+        if getattr(layer, 'norm_type', None) == 'sync_bn':
+            layer_output = nn.SyncBatchNorm.convert_sync_batchnorm(layer)
+        else:
+            for name, sublayer in layer.named_children():
+                layer_output.add_sublayer(name,
+                                          cls.convert_sync_batchnorm(sublayer))
+
+        del layer
+        return layer_output

+ 2 - 5
paddlex/ppdet/modeling/backbones/blazenet.py

@@ -58,11 +58,8 @@ class ConvBNLayer(nn.Layer):
                 learning_rate=conv_lr, initializer=KaimingNormal()),
             bias_attr=False)
 
-        if norm_type == 'sync_bn':
-            self._batch_norm = nn.SyncBatchNorm(out_channels)
-        else:
-            self._batch_norm = nn.BatchNorm(
-                out_channels, act=None, use_global_stats=False)
+        if norm_type in ['bn', 'sync_bn']:
+            self._batch_norm = nn.BatchNorm2D(out_channels)
 
     def forward(self, x):
         x = self._conv(x)

+ 1 - 1
paddlex/ppdet/modeling/backbones/esnet.py

@@ -20,7 +20,7 @@ import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle import ParamAttr
-from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm
+from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D
 from paddle.nn.initializer import KaimingNormal
 from paddle.regularizer import L2Decay
 

+ 3 - 3
paddlex/ppdet/modeling/backbones/hrnet.py

@@ -62,11 +62,11 @@ class ConvNormLayer(nn.Layer):
             learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
         bias_attr = ParamAttr(
             learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
-        global_stats = True if freeze_norm else False
+        global_stats = True if freeze_norm else None
         if norm_type in ['bn', 'sync_bn']:
-            self.norm = nn.BatchNorm(
+            self.norm = nn.BatchNorm2D(
                 ch_out,
-                param_attr=param_attr,
+                weight_attr=param_attr,
                 bias_attr=bias_attr,
                 use_global_stats=global_stats)
         elif norm_type == 'gn':

+ 3 - 3
paddlex/ppdet/modeling/backbones/lcnet.py

@@ -19,7 +19,7 @@ from __future__ import print_function
 import paddle
 import paddle.nn as nn
 from paddle import ParamAttr
-from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear
+from paddle.nn import AdaptiveAvgPool2D, Conv2D
 from paddle.regularizer import L2Decay
 from paddle.nn.initializer import KaimingNormal
 
@@ -81,9 +81,9 @@ class ConvBNLayer(nn.Layer):
             weight_attr=ParamAttr(initializer=KaimingNormal()),
             bias_attr=False)
 
-        self.bn = BatchNorm(
+        self.bn = nn.BatchNorm2D(
             num_filters,
-            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
             bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
         self.hardswish = nn.Hardswish()
 

+ 14 - 14
paddlex/ppdet/modeling/backbones/lite_hrnet.py

@@ -56,11 +56,11 @@ class ConvNormLayer(nn.Layer):
                 regularizer=L2Decay(norm_decay), )
             bias_attr = ParamAttr(
                 learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
-            global_stats = True if freeze_norm else False
+            global_stats = True if freeze_norm else None
             if norm_type in ['bn', 'sync_bn']:
-                self.norm = nn.BatchNorm(
+                self.norm = nn.BatchNorm2D(
                     ch_out,
-                    param_attr=param_attr,
+                    weight_attr=param_attr,
                     bias_attr=bias_attr,
                     use_global_stats=global_stats, )
             elif norm_type == 'gn':
@@ -582,7 +582,7 @@ class LiteHRNetModule(nn.Layer):
                                 stride=1,
                                 padding=0,
                                 bias=False, ),
-                            nn.BatchNorm(self.in_channels[i]),
+                            nn.BatchNorm2D(self.in_channels[i]),
                             nn.Upsample(
                                 scale_factor=2**(j - i), mode='nearest')))
                 elif j == i:
@@ -601,7 +601,7 @@ class LiteHRNetModule(nn.Layer):
                                         padding=1,
                                         groups=self.in_channels[j],
                                         bias=False, ),
-                                    nn.BatchNorm(self.in_channels[j]),
+                                    nn.BatchNorm2D(self.in_channels[j]),
                                     L.Conv2d(
                                         self.in_channels[j],
                                         self.in_channels[i],
@@ -609,7 +609,7 @@ class LiteHRNetModule(nn.Layer):
                                         stride=1,
                                         padding=0,
                                         bias=False, ),
-                                    nn.BatchNorm(self.in_channels[i])))
+                                    nn.BatchNorm2D(self.in_channels[i])))
                         else:
                             conv_downsamples.append(
                                 nn.Sequential(
@@ -621,7 +621,7 @@ class LiteHRNetModule(nn.Layer):
                                         padding=1,
                                         groups=self.in_channels[j],
                                         bias=False, ),
-                                    nn.BatchNorm(self.in_channels[j]),
+                                    nn.BatchNorm2D(self.in_channels[j]),
                                     L.Conv2d(
                                         self.in_channels[j],
                                         self.in_channels[j],
@@ -629,7 +629,7 @@ class LiteHRNetModule(nn.Layer):
                                         stride=1,
                                         padding=0,
                                         bias=False, ),
-                                    nn.BatchNorm(self.in_channels[j]),
+                                    nn.BatchNorm2D(self.in_channels[j]),
                                     nn.ReLU()))
 
                     fuse_layer.append(nn.Sequential(*conv_downsamples))
@@ -777,7 +777,7 @@ class LiteHRNet(nn.Layer):
                                 padding=1,
                                 groups=num_channels_pre_layer[i],
                                 bias=False),
-                            nn.BatchNorm(num_channels_pre_layer[i]),
+                            nn.BatchNorm2D(num_channels_pre_layer[i]),
                             L.Conv2d(
                                 num_channels_pre_layer[i],
                                 num_channels_cur_layer[i],
@@ -785,7 +785,7 @@ class LiteHRNet(nn.Layer):
                                 stride=1,
                                 padding=0,
                                 bias=False, ),
-                            nn.BatchNorm(num_channels_cur_layer[i]),
+                            nn.BatchNorm2D(num_channels_cur_layer[i]),
                             nn.ReLU()))
                 else:
                     transition_layers.append(None)
@@ -802,7 +802,7 @@ class LiteHRNet(nn.Layer):
                                 stride=2,
                                 padding=1,
                                 bias=False, ),
-                            nn.BatchNorm(num_channels_pre_layer[-1]),
+                            nn.BatchNorm2D(num_channels_pre_layer[-1]),
                             L.Conv2d(
                                 num_channels_pre_layer[-1],
                                 num_channels_cur_layer[i]
@@ -812,9 +812,9 @@ class LiteHRNet(nn.Layer):
                                 stride=1,
                                 padding=0,
                                 bias=False, ),
-                            nn.BatchNorm(num_channels_cur_layer[i]
-                                         if j == i - num_branches_pre else
-                                         num_channels_pre_layer[-1]),
+                            nn.BatchNorm2D(num_channels_cur_layer[i]
+                                           if j == i - num_branches_pre else
+                                           num_channels_pre_layer[-1]),
                             nn.ReLU()))
                 transition_layers.append(nn.Sequential(*conv_downsamples))
         return nn.LayerList(transition_layers)

+ 2 - 9
paddlex/ppdet/modeling/backbones/mobilenet_v1.py

@@ -59,16 +59,9 @@ class ConvBNLayer(nn.Layer):
 
         param_attr = ParamAttr(regularizer=L2Decay(norm_decay))
         bias_attr = ParamAttr(regularizer=L2Decay(norm_decay))
-        if norm_type == 'sync_bn':
-            self._batch_norm = nn.SyncBatchNorm(
+        if norm_type in ['sync_bn', 'bn']:
+            self._batch_norm = nn.BatchNorm2D(
                 out_channels, weight_attr=param_attr, bias_attr=bias_attr)
-        else:
-            self._batch_norm = nn.BatchNorm(
-                out_channels,
-                act=None,
-                param_attr=param_attr,
-                bias_attr=bias_attr,
-                use_global_stats=False)
 
     def forward(self, x):
         x = self._conv(x)

+ 4 - 8
paddlex/ppdet/modeling/backbones/mobilenet_v3.py

@@ -74,15 +74,11 @@ class ConvBNLayer(nn.Layer):
             learning_rate=norm_lr,
             regularizer=L2Decay(norm_decay),
             trainable=False if freeze_norm else True)
-        global_stats = True if freeze_norm else False
-        if norm_type == 'sync_bn':
-            self.bn = nn.SyncBatchNorm(
-                out_c, weight_attr=param_attr, bias_attr=bias_attr)
-        else:
-            self.bn = nn.BatchNorm(
+        global_stats = True if freeze_norm else None
+        if norm_type in ['sync_bn', 'bn']:
+            self.bn = nn.BatchNorm2D(
                 out_c,
-                act=None,
-                param_attr=param_attr,
+                weight_attr=param_attr,
                 bias_attr=bias_attr,
                 use_global_stats=global_stats)
         norm_params = self.bn.parameters()

+ 4 - 8
paddlex/ppdet/modeling/backbones/resnet.py

@@ -100,15 +100,11 @@ class ConvNormLayer(nn.Layer):
             regularizer=L2Decay(norm_decay),
             trainable=False if freeze_norm else True)
 
-        global_stats = True if freeze_norm else False
-        if norm_type == 'sync_bn':
-            self.norm = nn.SyncBatchNorm(
-                ch_out, weight_attr=param_attr, bias_attr=bias_attr)
-        else:
-            self.norm = nn.BatchNorm(
+        global_stats = True if freeze_norm else None
+        if norm_type in ['sync_bn', 'bn']:
+            self.norm = nn.BatchNorm2D(
                 ch_out,
-                act=None,
-                param_attr=param_attr,
+                weight_attr=param_attr,
                 bias_attr=bias_attr,
                 use_global_stats=global_stats)
         norm_params = self.norm.parameters()

+ 10 - 5
paddlex/ppdet/modeling/backbones/shufflenet_v2.py

@@ -19,7 +19,8 @@ from __future__ import print_function
 import paddle
 import paddle.nn as nn
 from paddle import ParamAttr
-from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm
+import paddle.nn.functional as F
+from paddle.nn import Conv2D, MaxPool2D, AdaptiveAvgPool2D, BatchNorm2D
 from paddle.nn.initializer import KaimingNormal
 from paddle.regularizer import L2Decay
 
@@ -51,15 +52,19 @@ class ConvBNLayer(nn.Layer):
             weight_attr=ParamAttr(initializer=KaimingNormal()),
             bias_attr=False)
 
-        self._batch_norm = BatchNorm(
+        self._batch_norm = BatchNorm2D(
             out_channels,
-            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
-            bias_attr=ParamAttr(regularizer=L2Decay(0.0)),
-            act=act)
+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+        if act == "hard_swish":
+            act = 'hardswish'
+        self.act = act
 
     def forward(self, inputs):
         y = self._conv(inputs)
         y = self._batch_norm(y)
+        if self.act:
+            y = getattr(F, self.act)(y)
         return y
 
 

+ 6 - 1
paddlex/ppdet/modeling/backbones/swin_transformer.py

@@ -493,8 +493,13 @@ class BasicLayer(nn.Layer):
         cnt = 0
         for h in h_slices:
             for w in w_slices:
-                img_mask[:, h, w, :] = cnt
+                try:
+                    img_mask[:, h, w, :] = cnt
+                except:
+                    pass
+
                 cnt += 1
+
         mask_windows = window_partition(
             img_mask, self.window_size)  # nW, window_size, window_size, 1
         mask_windows = mask_windows.reshape(

+ 1 - 4
paddlex/ppdet/modeling/layers.py

@@ -176,12 +176,9 @@ class ConvNormLayer(nn.Layer):
             learning_rate=norm_lr,
             regularizer=L2Decay(norm_decay)
             if norm_decay is not None else None)
-        if norm_type == 'bn':
+        if norm_type in ['bn', 'sync_bn']:
             self.norm = nn.BatchNorm2D(
                 ch_out, weight_attr=param_attr, bias_attr=bias_attr)
-        elif norm_type == 'sync_bn':
-            self.norm = nn.SyncBatchNorm(
-                ch_out, weight_attr=param_attr, bias_attr=bias_attr)
         elif norm_type == 'gn':
             self.norm = nn.GroupNorm(
                 num_groups=norm_groups,

+ 0 - 2
paddlex/ppdet/modeling/losses/__init__.py

@@ -25,7 +25,6 @@ from . import fairmot_loss
 from . import gfocal_loss
 from . import detr_loss
 from . import sparsercnn_loss
-from . import varifocal_loss
 
 from .yolo_loss import *
 from .iou_aware_loss import *
@@ -40,4 +39,3 @@ from .fairmot_loss import *
 from .gfocal_loss import *
 from .detr_loss import *
 from .sparsercnn_loss import *
-from .varifocal_loss import VarifocalLoss

+ 15 - 19
paddlex/ppdet/modeling/mot/tracker/base_jde_tracker.py

@@ -102,31 +102,26 @@ class BaseTrack(object):
 @register
 @serializable
 class STrack(BaseTrack):
-    def __init__(self,
-                 tlwh,
-                 score,
-                 temp_feat,
-                 num_classes,
-                 cls_id,
-                 buff_size=30):
-        # object class id
-        self.cls_id = cls_id
+    def __init__(self, tlwh, score, cls_id, buff_size=30, temp_feat=None):
         # wait activate
         self._tlwh = np.asarray(tlwh, dtype=np.float)
+        self.score = score
+        self.cls_id = cls_id
+        self.track_len = 0
+
         self.kalman_filter = None
         self.mean, self.covariance = None, None
         self.is_activated = False
 
-        self.score = score
-        self.track_len = 0
-
-        self.smooth_feat = None
-        self.update_features(temp_feat)
-        self.features = deque([], maxlen=buff_size)
-        self.alpha = 0.9
+        self.use_reid = True if temp_feat is not None else False
+        if self.use_reid:
+            self.smooth_feat = None
+            self.update_features(temp_feat)
+            self.features = deque([], maxlen=buff_size)
+            self.alpha = 0.9
 
     def update_features(self, feat):
-        # L2 normalizing
+        # L2 normalizing, this function has no use for BYTETracker
         feat /= np.linalg.norm(feat)
         self.curr_feat = feat
         if self.smooth_feat is None:
@@ -182,7 +177,8 @@ class STrack(BaseTrack):
     def re_activate(self, new_track, frame_id, new_id=False):
         self.mean, self.covariance = self.kalman_filter.update(
             self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh))
-        self.update_features(new_track.curr_feat)
+        if self.use_reid:
+            self.update_features(new_track.curr_feat)
         self.track_len = 0
         self.state = TrackState.Tracked
         self.is_activated = True
@@ -201,7 +197,7 @@ class STrack(BaseTrack):
         self.is_activated = True  # set flag 'activated'
 
         self.score = new_track.score
-        if update_feature:
+        if update_feature and self.use_reid:
             self.update_features(new_track.curr_feat)
 
     @property

+ 85 - 27
paddlex/ppdet/modeling/mot/tracker/jde_tracker.py

@@ -58,6 +58,7 @@ class JDETracker(object):
     """
 
     def __init__(self,
+                 use_byte=False,
                  num_classes=1,
                  det_thresh=0.3,
                  track_buffer=30,
@@ -66,11 +67,14 @@ class JDETracker(object):
                  tracked_thresh=0.7,
                  r_tracked_thresh=0.5,
                  unconfirmed_thresh=0.7,
-                 motion='KalmanFilter',
                  conf_thres=0,
+                 match_thres=0.8,
+                 low_conf_thres=0.2,
+                 motion='KalmanFilter',
                  metric_type='euclidean'):
+        self.use_byte = use_byte
         self.num_classes = num_classes
-        self.det_thresh = det_thresh
+        self.det_thresh = det_thresh if not use_byte else conf_thres + 0.1
         self.track_buffer = track_buffer
         self.min_box_area = min_box_area
         self.vertical_ratio = vertical_ratio
@@ -78,9 +82,12 @@ class JDETracker(object):
         self.tracked_thresh = tracked_thresh
         self.r_tracked_thresh = r_tracked_thresh
         self.unconfirmed_thresh = unconfirmed_thresh
+        self.conf_thres = conf_thres
+        self.match_thres = match_thres
+        self.low_conf_thres = low_conf_thres
+
         if motion == 'KalmanFilter':
             self.motion = KalmanFilter()
-        self.conf_thres = conf_thres
         self.metric_type = metric_type
 
         self.frame_id = 0
@@ -91,7 +98,7 @@ class JDETracker(object):
         self.max_time_lost = 0
         # max_time_lost will be calculated: int(frame_rate / 30.0 * track_buffer)
 
-    def update(self, pred_dets, pred_embs):
+    def update(self, pred_dets, pred_embs=None):
         """
         Processes the image frame and finds bounding box(detections).
         Associates the detection with corresponding tracklets and also handles
@@ -123,7 +130,10 @@ class JDETracker(object):
         for cls_id in range(self.num_classes):
             cls_idx = (pred_dets[:, 5:] == cls_id).squeeze(-1)
             pred_dets_dict[cls_id] = pred_dets[cls_idx]
-            pred_embs_dict[cls_id] = pred_embs[cls_idx]
+            if pred_embs is not None:
+                pred_embs_dict[cls_id] = pred_embs[cls_idx]
+            else:
+                pred_embs_dict[cls_id] = None
 
         for cls_id in range(self.num_classes):
             """ Step 1: Get detections by class"""
@@ -132,13 +142,24 @@ class JDETracker(object):
             remain_inds = (pred_dets_cls[:, 4:5] > self.conf_thres).squeeze(-1)
             if remain_inds.sum() > 0:
                 pred_dets_cls = pred_dets_cls[remain_inds]
-                pred_embs_cls = pred_embs_cls[remain_inds]
-                detections = [
-                    STrack(
-                        STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f,
-                        self.num_classes, cls_id, 30)
-                    for (tlbrs, f) in zip(pred_dets_cls, pred_embs_cls)
-                ]
+                if self.use_byte:
+                    detections = [
+                        STrack(
+                            STrack.tlbr_to_tlwh(tlbrs[:4]),
+                            tlbrs[4],
+                            cls_id,
+                            30,
+                            temp_feat=None) for tlbrs in pred_dets_cls
+                    ]
+                else:
+                    pred_embs_cls = pred_embs_cls[remain_inds]
+                    detections = [
+                        STrack(
+                            STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], cls_id,
+                            30, temp_feat)
+                        for (tlbrs, temp_feat
+                             ) in zip(pred_dets_cls, pred_embs_cls)
+                    ]
             else:
                 detections = []
             ''' Add newly detected tracklets to tracked_stracks'''
@@ -160,12 +181,20 @@ class JDETracker(object):
             # Predict the current location with KalmanFilter
             STrack.multi_predict(track_pool_dict[cls_id], self.motion)
 
-            dists = matching.embedding_distance(
-                track_pool_dict[cls_id], detections, metric=self.metric_type)
-            dists = matching.fuse_motion(self.motion, dists,
-                                         track_pool_dict[cls_id], detections)
-            matches, u_track, u_detection = matching.linear_assignment(
-                dists, thresh=self.tracked_thresh)
+            if self.use_byte:
+                dists = matching.iou_distance(track_pool_dict[cls_id],
+                                              detections)
+                matches, u_track, u_detection = matching.linear_assignment(
+                    dists, thresh=self.match_thres)  #
+            else:
+                dists = matching.embedding_distance(
+                    track_pool_dict[cls_id],
+                    detections,
+                    metric=self.metric_type)
+                dists = matching.fuse_motion(
+                    self.motion, dists, track_pool_dict[cls_id], detections)
+                matches, u_track, u_detection = matching.linear_assignment(
+                    dists, thresh=self.tracked_thresh)
 
             for i_tracked, idet in matches:
                 # i_tracked is the id of the track and idet is the detection
@@ -183,19 +212,48 @@ class JDETracker(object):
 
             # None of the steps below happen if there are no undetected tracks.
             """ Step 3: Second association, with IOU"""
-            detections = [detections[i] for i in u_detection]
-            r_tracked_stracks = []
-            for i in u_track:
-                if track_pool_dict[cls_id][i].state == TrackState.Tracked:
-                    r_tracked_stracks.append(track_pool_dict[cls_id][i])
+            if self.use_byte:
+                inds_low = pred_dets_dict[cls_id][:, 4:5] > self.low_conf_thres
+                inds_high = pred_dets_dict[cls_id][:, 4:5] < self.conf_thres
+                inds_second = np.logical_and(inds_low, inds_high).squeeze(-1)
+                pred_dets_cls_second = pred_dets_dict[cls_id][inds_second]
 
-            dists = matching.iou_distance(r_tracked_stracks, detections)
-            matches, u_track, u_detection = matching.linear_assignment(
-                dists, thresh=self.r_tracked_thresh)
+                # association the untrack to the low score detections
+                if len(pred_dets_cls_second) > 0:
+                    detections_second = [
+                        STrack(
+                            STrack.tlbr_to_tlwh(tlbrs[:4]),
+                            tlbrs[4],
+                            cls_id,
+                            30,
+                            temp_feat=None)
+                        for tlbrs in pred_dets_cls_second[:, :5]
+                    ]
+                else:
+                    detections_second = []
+                r_tracked_stracks = [
+                    track_pool_dict[cls_id][i] for i in u_track
+                    if track_pool_dict[cls_id][i].state == TrackState.Tracked
+                ]
+                dists = matching.iou_distance(r_tracked_stracks,
+                                              detections_second)
+                matches, u_track, u_detection_second = matching.linear_assignment(
+                    dists, thresh=0.4)  # not r_tracked_thresh
+            else:
+                detections = [detections[i] for i in u_detection]
+                r_tracked_stracks = []
+                for i in u_track:
+                    if track_pool_dict[cls_id][i].state == TrackState.Tracked:
+                        r_tracked_stracks.append(track_pool_dict[cls_id][i])
+                dists = matching.iou_distance(r_tracked_stracks, detections)
+
+                matches, u_track, u_detection = matching.linear_assignment(
+                    dists, thresh=self.r_tracked_thresh)
 
             for i_tracked, idet in matches:
                 track = r_tracked_stracks[i_tracked]
-                det = detections[idet]
+                det = detections[
+                    idet] if not self.use_byte else detections_second[idet]
                 if track.state == TrackState.Tracked:
                     track.update(det, self.frame_id)
                     activated_tracks_dict[cls_id].append(track)

+ 1 - 3
paddlex/ppdet/modeling/necks/bifpn.py

@@ -52,10 +52,8 @@ class SeparableConvLayer(nn.Layer):
         self.pointwise_conv = nn.Conv2D(in_channels, self.out_channels, 1)
 
         # norm type
-        if self.norm_type == 'bn':
+        if self.norm_type in ['bn', 'sync_bn']:
             self.norm = nn.BatchNorm2D(self.out_channels)
-        elif self.norm_type == 'sync_bn':
-            self.norm = nn.SyncBatchNorm(self.out_channels)
         elif self.norm_type == 'gn':
             self.norm = nn.GroupNorm(
                 num_groups=self.norm_groups, num_channels=self.out_channels)

+ 2 - 5
paddlex/ppdet/modeling/necks/blazeface_fpn.py

@@ -54,11 +54,8 @@ class ConvBNLayer(nn.Layer):
                 learning_rate=conv_lr, initializer=KaimingNormal()),
             bias_attr=False)
 
-        if norm_type == 'sync_bn':
-            self._batch_norm = nn.SyncBatchNorm(out_channels)
-        else:
-            self._batch_norm = nn.BatchNorm(
-                out_channels, act=None, use_global_stats=False)
+        if norm_type in ['sync_bn', 'bn']:
+            self._batch_norm = nn.BatchNorm2D(out_channels)
 
     def forward(self, x):
         x = self._conv(x)

+ 6 - 9
paddlex/ppdet/modeling/ops.py

@@ -50,10 +50,6 @@ def batch_norm(ch,
                freeze_norm=False,
                initializer=None,
                data_format='NCHW'):
-    if norm_type == 'sync_bn':
-        batch_norm = nn.SyncBatchNorm
-    else:
-        batch_norm = nn.BatchNorm2D
 
     norm_lr = 0. if freeze_norm else 1.
     weight_attr = ParamAttr(
@@ -66,11 +62,12 @@ def batch_norm(ch,
         regularizer=L2Decay(norm_decay),
         trainable=False if freeze_norm else True)
 
-    norm_layer = batch_norm(
-        ch,
-        weight_attr=weight_attr,
-        bias_attr=bias_attr,
-        data_format=data_format)
+    if norm_type in ['sync_bn', 'bn']:
+        norm_layer = nn.BatchNorm2D(
+            ch,
+            weight_attr=weight_attr,
+            bias_attr=bias_attr,
+            data_format=data_format)
 
     norm_params = norm_layer.parameters()
     if freeze_norm:

+ 7 - 6
paddlex/ppdet/modeling/post_process.py

@@ -42,10 +42,6 @@ class BBoxPostProcess(nn.Layer):
         self.num_classes = num_classes
         self.decode = decode
         self.nms = nms
-        self.fake_bboxes = paddle.to_tensor(
-            np.array(
-                [[-1, 0.0, 0.0, 0.0, 0.0, 0.0]], dtype='float32'))
-        self.fake_bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
 
     def forward(self, head_out, rois, im_shape, scale_factor):
         """
@@ -94,11 +90,16 @@ class BBoxPostProcess(nn.Layer):
         bboxes_list = []
         bbox_num_list = []
         id_start = 0
+        fake_bboxes = paddle.to_tensor(
+            np.array(
+                [[-1, 0.0, 0.0, 0.0, 0.0, 0.0]], dtype='float32'))
+        fake_bbox_num = paddle.to_tensor(np.array([1], dtype='int32'))
+
         # add fake bbox when output is empty for each batch
         for i in range(bbox_num.shape[0]):
             if bbox_num[i] == 0:
-                bboxes_i = self.fake_bboxes
-                bbox_num_i = self.fake_bbox_num
+                bboxes_i = fake_bboxes
+                bbox_num_i = fake_bbox_num
                 id_start += 1
             else:
                 bboxes_i = bboxes[id_start:id_start + bbox_num[i], :]

+ 2 - 2
paddlex/ppdet/modeling/proposal_generator/target.py

@@ -52,8 +52,8 @@ def rpn_anchor_target(anchors,
             labels = paddle.scatter(labels, fg_inds, paddle.ones_like(fg_inds))
         # Step3: make output
         if gt_bbox.shape[0] == 0:
-            matched_gt_boxes = paddle.zeros([0, 4])
-            tgt_delta = paddle.zeros([0, 4])
+            matched_gt_boxes = paddle.zeros([matches.shape[0], 4])
+            tgt_delta = paddle.zeros([matches.shape[0], 4])
         else:
             matched_gt_boxes = paddle.gather(gt_bbox, matches)
             tgt_delta = bbox2delta(anchors, matched_gt_boxes, weights)

+ 3 - 3
paddlex/ppdet/modeling/reid/pplcnet_embedding.py

@@ -21,7 +21,7 @@ import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle.nn.initializer import Normal, Constant
 from paddle import ParamAttr
-from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Linear
+from paddle.nn import AdaptiveAvgPool2D, BatchNorm2D, Conv2D, Linear
 from paddle.regularizer import L2Decay
 from paddle.nn.initializer import KaimingNormal, XavierNormal
 from paddlex.ppdet.core.workspace import register
@@ -76,9 +76,9 @@ class ConvBNLayer(nn.Layer):
             weight_attr=ParamAttr(initializer=KaimingNormal()),
             bias_attr=False)
 
-        self.bn = BatchNorm(
+        self.bn = BatchNorm2D(
             num_filters,
-            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            weight_attr=ParamAttr(regularizer=L2Decay(0.0)),
             bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
         self.hardswish = nn.Hardswish()
 

+ 4 - 2
paddlex/ppdet/modeling/reid/resnet.py

@@ -55,12 +55,14 @@ class ConvBNLayer(nn.Layer):
             bias_attr=False,
             data_format=data_format)
 
-        self._batch_norm = nn.BatchNorm(
-            num_filters, act=act, data_layout=data_format)
+        self._batch_norm = nn.BatchNorm2D(num_filters, data_layout=data_format)
+        self.act = act
 
     def forward(self, inputs):
         y = self._conv(inputs)
         y = self._batch_norm(y)
+        if self.act:
+            y = getattr(F, self.act)(y)
         return y
 
 

+ 4 - 2
paddlex/ppdet/optimizer.py

@@ -42,9 +42,10 @@ class CosineDecay(object):
             the max_iters is much larger than the warmup iter
     """
 
-    def __init__(self, max_epochs=1000, use_warmup=True):
+    def __init__(self, max_epochs=1000, use_warmup=True, eta_min=0):
         self.max_epochs = max_epochs
         self.use_warmup = use_warmup
+        self.eta_min = eta_min
 
     def __call__(self,
                  base_lr=None,
@@ -66,7 +67,8 @@ class CosineDecay(object):
                 value.append(decayed_lr)
             return optimizer.lr.PiecewiseDecay(boundary, value)
 
-        return optimizer.lr.CosineAnnealingDecay(base_lr, T_max=max_iters)
+        return optimizer.lr.CosineAnnealingDecay(
+            base_lr, T_max=max_iters, eta_min=self.eta_min)
 
 
 @serializable

+ 8 - 4
paddlex/ppdet/utils/checkpoint.py

@@ -124,7 +124,7 @@ def match_state_dict(model_state_dict, weight_state_dict):
     weight_keys = sorted(weight_state_dict.keys())
 
     def match(a, b):
-        if a.startswith('backbone.res5'):
+        if b.startswith('backbone.res5'):
             # In Faster RCNN, res5 pretrained weights have prefix of backbone,
             # however, the corresponding model weights have difficult prefix,
             # bbox_head.
@@ -139,10 +139,14 @@ def match_state_dict(model_state_dict, weight_state_dict):
     max_id = match_matrix.argmax(1)
     max_len = match_matrix.max(1)
     max_id[max_len == 0] = -1
+
+    load_id = set(max_id)
+    load_id.discard(-1)
     not_load_weight_name = []
-    for match_idx in range(len(max_id)):
-        if match_idx < len(weight_keys) and max_id[match_idx] == -1:
-            not_load_weight_name.append(weight_keys[match_idx])
+    for idx in range(len(weight_keys)):
+        if idx not in load_id:
+            not_load_weight_name.append(weight_keys[idx])
+
     if len(not_load_weight_name) > 0:
         logger.info('{} in pretrained weight is not used in the model, '
                     'and its will not be loaded'.format(not_load_weight_name))