| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173 |
- # Runtime
- epoch: 40
- log_iter: 10
- find_unused_parameters: true
- use_gpu: true
- use_xpu: false
- use_mlu: false
- use_npu: false
- use_ema: true
- ema_decay: 0.9999
- ema_decay_type: "exponential"
- ema_filter_no_grad: true
- save_dir: output
- snapshot_epoch: 1
- print_flops: false
- print_params: false
- eval_size: [640, 640]
- # Dataset
- metric: COCO
- num_classes: 1
- worker_num: 4
- TrainDataset:
- name: COCODetDataset
- image_dir: images
- anno_path: annotations/instance_train.json
- dataset_dir: datasets/COCO
- data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
- EvalDataset:
- name: COCODetDataset
- image_dir: images
- anno_path: annotations/instance_val.json
- dataset_dir: datasets/COCO
- allow_empty: true
- TestDataset:
- name: ImageFolder
- anno_path: annotations/instance_val.json
- dataset_dir: datasets/COCO
- TrainReader:
- sample_transforms:
- - Decode: {}
- - RandomDistort: {prob: 0.8}
- - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- - RandomCrop: {prob: 0.8}
- - RandomFlip: {}
- batch_transforms:
- - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
- - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- - NormalizeBox: {}
- - BboxXYXY2XYWH: {}
- - Permute: {}
- batch_size: 8
- shuffle: true
- drop_last: true
- collate_batch: false
- use_shared_memory: true
- EvalReader:
- sample_transforms:
- - Decode: {}
- - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
- - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- - Permute: {}
- batch_size: 4
- shuffle: false
- drop_last: false
- TestReader:
- inputs_def:
- image_shape: [3, 640, 640]
- sample_transforms:
- - Decode: {}
- - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
- - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- - Permute: {}
- batch_size: 1
- shuffle: false
- drop_last: false
- # Model
- architecture: DETR
- pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/PPHGNetV2_L_ssld_pretrained.pdparams
- norm_type: sync_bn
- hidden_dim: 256
- use_focal_loss: True
- DETR:
- backbone: PPHGNetV2
- neck: HybridEncoder
- transformer: RTDETRTransformer
- detr_head: DINOHead
- post_process: DETRPostProcess
- PPHGNetV2:
- arch: 'L'
- return_idx: [1, 2, 3]
- freeze_stem_only: true
- freeze_at: 0
- freeze_norm: true
- lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05]
- HybridEncoder:
- hidden_dim: 256
- use_encoder_idx: [2]
- num_encoder_layers: 1
- encoder_layer:
- name: TransformerLayer
- d_model: 256
- nhead: 8
- dim_feedforward: 1024
- dropout: 0.
- activation: 'gelu'
- expansion: 1.0
- RTDETRTransformer:
- num_queries: 300
- position_embed_type: sine
- feat_strides: [8, 16, 32]
- num_levels: 3
- nhead: 8
- num_decoder_layers: 6
- dim_feedforward: 1024
- dropout: 0.0
- activation: relu
- num_denoising: 100
- label_noise_ratio: 0.5
- box_noise_scale: 1.0
- learnt_init_query: false
- DINOHead:
- loss:
- name: DINOLoss
- loss_coeff: {class: 1, bbox: 5, giou: 2}
- aux_loss: true
- use_vfl: true
- matcher:
- name: HungarianMatcher
- matcher_coeff: {class: 2, bbox: 5, giou: 2}
- DETRPostProcess:
- num_top_queries: 300
- # Optimizer
- LearningRate:
- base_lr: 0.0001
- schedulers:
- - !PiecewiseDecay
- gamma: 1.0
- milestones: [100]
- use_warmup: true
- - !LinearWarmup
- start_factor: 0.001
- steps: 100
- OptimizerBuilder:
- clip_grad_by_norm: 0.1
- regularizer: false
- optimizer:
- type: AdamW
- weight_decay: 0.0001
- # Export
- export:
- post_process: true
- nms: true
- benchmark: false
- fuse_conv_bn: false
|