|
|
@@ -0,0 +1,301 @@
|
|
|
+# Runtime
|
|
|
+find_unused_parameters: True
|
|
|
+use_gpu: true
|
|
|
+use_xpu: false
|
|
|
+use_mlu: false
|
|
|
+use_npu: false
|
|
|
+log_iter: 20
|
|
|
+save_dir: output
|
|
|
+snapshot_epoch: 1
|
|
|
+print_flops: false
|
|
|
+print_params: false
|
|
|
+use_ema: true
|
|
|
+
|
|
|
+
|
|
|
+# Dataset
|
|
|
+metric: COCO
|
|
|
+num_classes: 80
|
|
|
+
|
|
|
+TrainDataset:
|
|
|
+ name: COCODataSet
|
|
|
+ image_dir: train2017
|
|
|
+ anno_path: annotations/instances_train2017.json
|
|
|
+ dataset_dir: dataset/coco
|
|
|
+ allow_empty: true
|
|
|
+ data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
|
|
|
+
|
|
|
+EvalDataset:
|
|
|
+ name: COCODataSet
|
|
|
+ image_dir: val2017
|
|
|
+ anno_path: annotations/instances_val2017.json
|
|
|
+ dataset_dir: dataset/coco
|
|
|
+ allow_empty: true
|
|
|
+
|
|
|
+TestDataset:
|
|
|
+ name: ImageFolder
|
|
|
+ anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
|
|
|
+ dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
|
|
|
+
|
|
|
+
|
|
|
+# Reader
|
|
|
+worker_num: 1
|
|
|
+TrainReader:
|
|
|
+ sample_transforms:
|
|
|
+ - Decode: {}
|
|
|
+ - RandomFlip: {prob: 0.5}
|
|
|
+ - RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
|
|
|
+ transforms2: [
|
|
|
+ RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
|
|
|
+ RandomSizeCrop: { min_size: 384, max_size: 600 },
|
|
|
+ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
|
|
|
+ }
|
|
|
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
|
|
+ - Permute: {}
|
|
|
+ batch_transforms:
|
|
|
+ - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
|
|
|
+ batch_size: 1
|
|
|
+ shuffle: true
|
|
|
+ drop_last: true
|
|
|
+ collate_batch: false
|
|
|
+ use_shared_memory: false
|
|
|
+
|
|
|
+EvalReader:
|
|
|
+ sample_transforms:
|
|
|
+ - Decode: {}
|
|
|
+ - Resize: {target_size: [800, 1333], keep_ratio: True}
|
|
|
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
|
|
+ - Permute: {}
|
|
|
+ batch_size: 1
|
|
|
+ shuffle: false
|
|
|
+ drop_last: false
|
|
|
+
|
|
|
+TestReader:
|
|
|
+ inputs_def:
|
|
|
+ image_shape: [-1, 3, 640, 640]
|
|
|
+ sample_transforms:
|
|
|
+ - Decode: {}
|
|
|
+ - Resize: {target_size: 640, keep_ratio: false}
|
|
|
+ - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
|
|
|
+ - Permute: {}
|
|
|
+ batch_size: 1
|
|
|
+ shuffle: false
|
|
|
+ drop_last: false
|
|
|
+
|
|
|
+
|
|
|
+# Model
|
|
|
+architecture: CO_DETR
|
|
|
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/swin_large_patch4_window12_384_22kto1k_pretrained.pdparams
|
|
|
+num_dec_layer: &num_dec_layer 6
|
|
|
+
|
|
|
+CO_DETR:
|
|
|
+ backbone: SwinTransformer
|
|
|
+ backbone_lr_mult: 0.1
|
|
|
+ neck: ChannelMapper
|
|
|
+ query_head: CoDINOHead
|
|
|
+ rpn_head: RPNHead
|
|
|
+ roi_head: Co_RoiHead
|
|
|
+ bbox_head:
|
|
|
+ name: CoATSSHead
|
|
|
+ in_channels: 256
|
|
|
+ stacked_convs: 1
|
|
|
+ feat_channels: 256
|
|
|
+ bbox_weight: [10., 10., 5., 5.]
|
|
|
+ anchor_generator:
|
|
|
+ name: CoAnchorGenerator
|
|
|
+ octave_base_scale: 8
|
|
|
+ scales_per_octave: 1
|
|
|
+ aspect_ratios: [1.0]
|
|
|
+ strides: [4., 8., 16., 32., 64., 128.]
|
|
|
+ assigner:
|
|
|
+ name: ATSSAssigner
|
|
|
+ topk: 9
|
|
|
+ sm_use: True
|
|
|
+ loss_cls:
|
|
|
+ name: Weighted_FocalLoss
|
|
|
+ use_sigmoid: true
|
|
|
+ gamma: 2.0
|
|
|
+ alpha: 0.25
|
|
|
+ loss_weight: 12.0
|
|
|
+ loss_bbox:
|
|
|
+ name: GIoULoss
|
|
|
+ loss_weight: 24.0
|
|
|
+ reduction: sum
|
|
|
+ loss_cent_weight: 12.0
|
|
|
+
|
|
|
+SwinTransformer:
|
|
|
+ arch: 'swin_L_384' # ['swin_T_224', 'swin_S_224', 'swin_B_224', 'swin_L_224', 'swin_B_384', 'swin_L_384']
|
|
|
+ out_indices: [0, 1, 2, 3]
|
|
|
+ ape: false
|
|
|
+ drop_path_rate: 0.3
|
|
|
+ patch_norm: true
|
|
|
+
|
|
|
+ChannelMapper:
|
|
|
+ in_channels: [192, 384, 768, 1536]
|
|
|
+ kernel_size: 1
|
|
|
+ out_channels: 256
|
|
|
+ norm_type: "gn"
|
|
|
+ norm_groups: 32
|
|
|
+ act: None
|
|
|
+ num_outs: 5
|
|
|
+ strides: [4., 8., 16., 32., 64.]
|
|
|
+
|
|
|
+CoDINOHead:
|
|
|
+ num_query: 900
|
|
|
+ num_dn_query: 100
|
|
|
+ label_noise_ratio: 0.5
|
|
|
+ box_noise_scale: 1.0
|
|
|
+ in_channels: 2048
|
|
|
+ sync_cls_avg_factor: True
|
|
|
+ with_box_refine: True
|
|
|
+ as_two_stage: True
|
|
|
+ mixed_selection: True
|
|
|
+ transformer:
|
|
|
+ name: CoDINOTransformer
|
|
|
+ two_stage_num_proposals: 900
|
|
|
+ with_pos_coord: True
|
|
|
+ with_coord_feat: False
|
|
|
+ num_co_heads: 2
|
|
|
+ num_feature_levels: 5
|
|
|
+ as_two_stage: True
|
|
|
+ mixed_selection: True
|
|
|
+ embed_dims: &embed_dims 256
|
|
|
+ encoder:
|
|
|
+ name: DeformableTransformerEncoder
|
|
|
+ num_layers: *num_dec_layer
|
|
|
+ with_rp: 6
|
|
|
+ encoder_layer:
|
|
|
+ name: DeformableTransformerEncoderLayer
|
|
|
+ d_model: *embed_dims
|
|
|
+ n_head: 8
|
|
|
+ dim_feedforward: 2048
|
|
|
+ n_levels: 5
|
|
|
+ n_points: 4
|
|
|
+ dropout: 0.0
|
|
|
+ decoder:
|
|
|
+ name: DINOTransformerDecoder
|
|
|
+ hidden_dim: *embed_dims
|
|
|
+ num_layers: *num_dec_layer
|
|
|
+ decoder_layer:
|
|
|
+ name: DINOTransformerDecoderLayer
|
|
|
+ d_model: *embed_dims
|
|
|
+ n_head: 8
|
|
|
+ dim_feedforward: 2048
|
|
|
+ n_points: 4
|
|
|
+ n_levels: 5
|
|
|
+ dropout: 0.0
|
|
|
+ positional_encoding:
|
|
|
+ name: PositionEmbedding
|
|
|
+ num_pos_feats: 128
|
|
|
+ temperature: 20
|
|
|
+ normalize: true
|
|
|
+ loss_cls:
|
|
|
+ name: QualityFocalLoss
|
|
|
+ use_sigmoid: true
|
|
|
+ beta: 2.0
|
|
|
+ loss_weight: 1.0
|
|
|
+ loss_bbox:
|
|
|
+ name: L1Loss
|
|
|
+ loss_weight: 5.0
|
|
|
+ loss_iou:
|
|
|
+ name: GIoULoss
|
|
|
+ loss_weight: 2.0
|
|
|
+ reduction: sum
|
|
|
+ assigner:
|
|
|
+ name: HungarianAssigner
|
|
|
+ cls_cost:
|
|
|
+ name: FocalLossCost
|
|
|
+ weight: 2.0
|
|
|
+ reg_cost:
|
|
|
+ name: BBoxL1Cost
|
|
|
+ weight: 5.0
|
|
|
+ box_format: xywh
|
|
|
+ iou_cost:
|
|
|
+ name: IoUCost
|
|
|
+ iou_mode: giou
|
|
|
+ weight: 2.0
|
|
|
+ test_cfg:
|
|
|
+ max_per_img: 300
|
|
|
+ score_thr: 0.0
|
|
|
+ nms:
|
|
|
+ name: MultiClassNMS
|
|
|
+ keep_top_k: -1
|
|
|
+ score_threshold: 0.0
|
|
|
+ nms_threshold: 0.8
|
|
|
+
|
|
|
+RPNHead:
|
|
|
+ loss_rpn_bbox:
|
|
|
+ name: L1Loss
|
|
|
+ reduction: sum
|
|
|
+ loss_weight: 12.0
|
|
|
+ in_channel: 256
|
|
|
+ anchor_generator:
|
|
|
+ name: RetinaAnchorGenerator
|
|
|
+ octave_base_scale: 4
|
|
|
+ scales_per_octave: 3
|
|
|
+ aspect_ratios: [0.5, 1.0, 2.0]
|
|
|
+ strides: [4., 8., 16., 32., 64., 128.]
|
|
|
+ rpn_target_assign:
|
|
|
+ batch_size_per_im: 256
|
|
|
+ fg_fraction: 0.5
|
|
|
+ negative_overlap: 0.3
|
|
|
+ positive_overlap: 0.7
|
|
|
+ use_random: True
|
|
|
+ train_proposal:
|
|
|
+ min_size: 0.0
|
|
|
+ nms_thresh: 0.7
|
|
|
+ pre_nms_top_n: 4000
|
|
|
+ post_nms_top_n: 1000
|
|
|
+ topk_after_collect: True
|
|
|
+ test_proposal:
|
|
|
+ min_size: 0.0
|
|
|
+ nms_thresh: 0.7
|
|
|
+ pre_nms_top_n: 1000
|
|
|
+ post_nms_top_n: 1000
|
|
|
+
|
|
|
+Co_RoiHead:
|
|
|
+ in_channel: 256
|
|
|
+ loss_normalize_pos: True
|
|
|
+ head: TwoFCHead
|
|
|
+ roi_extractor:
|
|
|
+ end_level: 4
|
|
|
+ resolution: 7
|
|
|
+ sampling_ratio: 0
|
|
|
+ aligned: True
|
|
|
+ bbox_assigner:
|
|
|
+ name: BBoxAssigner
|
|
|
+ batch_size_per_im: 512
|
|
|
+ bg_thresh: 0.5
|
|
|
+ fg_thresh: 0.5
|
|
|
+ fg_fraction: 0.25
|
|
|
+ use_random: True
|
|
|
+ bbox_loss:
|
|
|
+ name: GIoULoss
|
|
|
+ loss_weight: 120.0
|
|
|
+ cls_loss_weight: 12.0
|
|
|
+
|
|
|
+
|
|
|
+# Optimizer
|
|
|
+epoch: 12
|
|
|
+
|
|
|
+LearningRate:
|
|
|
+ base_lr: 0.0001
|
|
|
+ schedulers:
|
|
|
+ - !PiecewiseDecay
|
|
|
+ gamma: 0.1
|
|
|
+ milestones: [11]
|
|
|
+ use_warmup: false
|
|
|
+
|
|
|
+OptimizerBuilder:
|
|
|
+ clip_grad_by_norm: 0.1
|
|
|
+ regularizer: false
|
|
|
+ optimizer:
|
|
|
+ type: AdamW
|
|
|
+ weight_decay: 0.0001
|
|
|
+
|
|
|
+
|
|
|
+# Exporting the model
|
|
|
+export:
|
|
|
+ post_process: True # Whether post-processing is included in the network when export model.
|
|
|
+ nms: True # Whether NMS is included in the network when export model.
|
|
|
+ benchmark: False # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
|
|
|
+ fuse_conv_bn: False
|