# Runtime epoch: 40 log_iter: 10 find_unused_parameters: true use_gpu: true use_xpu: false use_mlu: false use_npu: false use_ema: true ema_decay: 0.9999 ema_decay_type: "exponential" ema_filter_no_grad: true save_dir: output snapshot_epoch: 1 print_flops: false print_params: false eval_size: [640, 640] # Dataset metric: COCO num_classes: 1 worker_num: 4 TrainDataset: name: COCODetDataset image_dir: images anno_path: annotations/instance_train.json dataset_dir: datasets/COCO data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd'] EvalDataset: name: COCODetDataset image_dir: images anno_path: annotations/instance_val.json dataset_dir: datasets/COCO allow_empty: true TestDataset: name: ImageFolder anno_path: annotations/instance_val.json dataset_dir: datasets/COCO TrainReader: sample_transforms: - Decode: {} - RandomDistort: {prob: 0.8} - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} - RandomCrop: {prob: 0.8} - RandomFlip: {} batch_transforms: - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False} - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} - NormalizeBox: {} - BboxXYXY2XYWH: {} - Permute: {} batch_size: 8 shuffle: true drop_last: true collate_batch: false use_shared_memory: true EvalReader: sample_transforms: - Decode: {} - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} - Permute: {} batch_size: 4 shuffle: false drop_last: false TestReader: inputs_def: image_shape: [3, 640, 640] sample_transforms: - Decode: {} - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} - Permute: {} batch_size: 1 shuffle: false drop_last: false # Model architecture: DETR pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/PPHGNetV2_L_ssld_pretrained.pdparams norm_type: sync_bn hidden_dim: 256 use_focal_loss: True DETR: backbone: PPHGNetV2 neck: HybridEncoder transformer: RTDETRTransformer detr_head: DINOHead post_process: DETRPostProcess PPHGNetV2: arch: 'L' return_idx: [1, 2, 3] freeze_stem_only: true freeze_at: 0 freeze_norm: true lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05] HybridEncoder: hidden_dim: 256 use_encoder_idx: [2] num_encoder_layers: 1 encoder_layer: name: TransformerLayer d_model: 256 nhead: 8 dim_feedforward: 1024 dropout: 0. activation: 'gelu' expansion: 1.0 RTDETRTransformer: num_queries: 300 position_embed_type: sine feat_strides: [8, 16, 32] num_levels: 3 nhead: 8 num_decoder_layers: 6 dim_feedforward: 1024 dropout: 0.0 activation: relu num_denoising: 100 label_noise_ratio: 0.5 box_noise_scale: 1.0 learnt_init_query: false DINOHead: loss: name: DINOLoss loss_coeff: {class: 1, bbox: 5, giou: 2} aux_loss: true use_vfl: true matcher: name: HungarianMatcher matcher_coeff: {class: 2, bbox: 5, giou: 2} DETRPostProcess: num_top_queries: 300 # Optimizer LearningRate: base_lr: 0.0001 schedulers: - !PiecewiseDecay gamma: 1.0 milestones: [100] use_warmup: true - !LinearWarmup start_factor: 0.001 steps: 100 OptimizerBuilder: clip_grad_by_norm: 0.1 regularizer: false optimizer: type: AdamW weight_decay: 0.0001 # Export export: post_process: true nms: true benchmark: false fuse_conv_bn: false