# Runtime
epoch: 40
log_iter: 10
find_unused_parameters: true
use_gpu: true
use_xpu: false
use_mlu: false
use_npu: false
use_ema: true
ema_decay: 0.9999
ema_decay_type: "exponential"
ema_filter_no_grad: true
save_dir: output
snapshot_epoch: 1
print_flops: false
print_params: false
eval_size: [640, 640]

# Dataset
metric: COCO
num_classes: 1

worker_num: 4

TrainDataset:
  name: COCODetDataset
  image_dir: images
  anno_path: annotations/instance_train.json
  dataset_dir: datasets/COCO
  data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']

EvalDataset:
  name: COCODetDataset
  image_dir: images
  anno_path: annotations/instance_val.json
  dataset_dir: datasets/COCO
  allow_empty: true

TestDataset:
  name: ImageFolder
  anno_path: annotations/instance_val.json
  dataset_dir: datasets/COCO

TrainReader:
  sample_transforms:
    - Decode: {}
    - RandomDistort: {prob: 0.8}
    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
    - RandomCrop: {prob: 0.8}
    - RandomFlip: {}
  batch_transforms:
    - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
    - NormalizeBox: {}
    - BboxXYXY2XYWH: {}
    - Permute: {}
  batch_size: 8
  shuffle: true
  drop_last: true
  collate_batch: false
  use_shared_memory: true

EvalReader:
  sample_transforms:
    - Decode: {}
    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
    - Permute: {}
  batch_size: 4
  shuffle: false
  drop_last: false

TestReader:
  inputs_def:
    image_shape: [3, 640, 640]
  sample_transforms:
    - Decode: {}
    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
    - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
    - Permute: {}
  batch_size: 1
  shuffle: false
  drop_last: false

# Model
architecture: DETR
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/PPHGNetV2_L_ssld_pretrained.pdparams

norm_type: sync_bn
hidden_dim: 256
use_focal_loss: True

DETR:
  backbone: PPHGNetV2
  neck: HybridEncoder
  transformer: RTDETRTransformer
  detr_head: DINOHead
  post_process: DETRPostProcess

PPHGNetV2:
  arch: 'L'
  return_idx: [1, 2, 3]
  freeze_stem_only: true
  freeze_at: 0
  freeze_norm: true
  lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05]

HybridEncoder:
  hidden_dim: 256
  use_encoder_idx: [2]
  num_encoder_layers: 1
  encoder_layer:
    name: TransformerLayer
    d_model: 256
    nhead: 8
    dim_feedforward: 1024
    dropout: 0.
    activation: 'gelu'
  expansion: 1.0

RTDETRTransformer:
  num_queries: 300
  position_embed_type: sine
  feat_strides: [8, 16, 32]
  num_levels: 3
  nhead: 8
  num_decoder_layers: 6
  dim_feedforward: 1024
  dropout: 0.0
  activation: relu
  num_denoising: 100
  label_noise_ratio: 0.5
  box_noise_scale: 1.0
  learnt_init_query: false

DINOHead:
  loss:
    name: DINOLoss
    loss_coeff: {class: 1, bbox: 5, giou: 2}
    aux_loss: true
    use_vfl: true
    matcher:
      name: HungarianMatcher
      matcher_coeff: {class: 2, bbox: 5, giou: 2}

DETRPostProcess:
  num_top_queries: 300

# Optimizer
LearningRate:
  base_lr: 0.0001
  schedulers:
  - !PiecewiseDecay
    gamma: 1.0
    milestones: [100]
    use_warmup: true
  - !LinearWarmup
    start_factor: 0.001
    steps: 100

OptimizerBuilder:
  clip_grad_by_norm: 0.1
  regularizer: false
  optimizer:
    type: AdamW
    weight_decay: 0.0001

# Export
export:
  post_process: true
  nms: true
  benchmark: false
  fuse_conv_bn: false