Mask-RT-DETR-L.yaml 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. # Runtime
  2. epoch: 72
  3. use_gpu: true
  4. use_xpu: false
  5. use_mlu: false
  6. use_npu: false
  7. log_iter: 20
  8. save_dir: output
  9. snapshot_epoch: 1
  10. target_metrics: mask
  11. print_flops: false
  12. print_params: false
  13. use_ema: True
  14. ema_decay: 0.9999
  15. ema_decay_type: "exponential"
  16. ema_filter_no_grad: True
  17. # Dataset
  18. metric: COCO
  19. num_classes: 80
  20. worker_num: 4
  21. TrainDataset:
  22. name: COCOInstSegDataset
  23. image_dir: images
  24. anno_path: annotations/instance_train.json
  25. dataset_dir: datasets/COCO
  26. data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
  27. EvalDataset:
  28. name: COCOInstSegDataset
  29. image_dir: images
  30. anno_path: annotations/instance_val.json
  31. dataset_dir: datasets/COCO
  32. TestDataset:
  33. name: ImageFolder
  34. anno_path: annotations/instance_val.json
  35. dataset_dir: datasets/COCO
  36. TrainReader:
  37. sample_transforms:
  38. - Decode: {}
  39. - Poly2Mask: {del_poly: True}
  40. - RandomDistort: {prob: 0.8}
  41. - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
  42. - RandomCrop: {prob: 0.8}
  43. - RandomFlip: {}
  44. batch_transforms:
  45. - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
  46. - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
  47. - NormalizeBox: {}
  48. - BboxXYXY2XYWH: {}
  49. - Permute: {}
  50. batch_size: 4
  51. shuffle: true
  52. drop_last: true
  53. collate_batch: false
  54. use_shared_memory: true
  55. EvalReader:
  56. sample_transforms:
  57. - Decode: {}
  58. - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
  59. - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
  60. - Permute: {}
  61. batch_size: 1 # mask be 1
  62. shuffle: false
  63. drop_last: false
  64. TestReader:
  65. inputs_def:
  66. image_shape: [3, 640, 640]
  67. sample_transforms:
  68. - Decode: {}
  69. - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
  70. - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
  71. - Permute: {}
  72. batch_size: 1
  73. shuffle: false
  74. drop_last: false
  75. LearningRate:
  76. base_lr: 0.0001
  77. schedulers:
  78. - !PiecewiseDecay
  79. gamma: 1.0
  80. milestones: [100]
  81. use_warmup: true
  82. - !LinearWarmup
  83. start_factor: 0.001
  84. steps: 2000
  85. OptimizerBuilder:
  86. clip_grad_by_norm: 0.1
  87. regularizer: false
  88. optimizer:
  89. type: AdamW
  90. weight_decay: 0.0001
  91. # Model
  92. architecture: DETR
  93. with_mask: True
  94. pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_L_ssld_pretrained.pdparams
  95. norm_type: sync_bn
  96. hidden_dim: 256
  97. use_focal_loss: True
  98. eval_size: [640, 640]
  99. num_prototypes: 32
  100. find_unused_parameters: True
  101. DETR:
  102. backbone: PPHGNetV2
  103. neck: MaskHybridEncoder
  104. transformer: MaskRTDETR
  105. detr_head: MaskDINOHead
  106. post_process: DETRPostProcess
  107. PPHGNetV2:
  108. arch: 'L'
  109. return_idx: [0, 1, 2, 3]
  110. freeze_stem_only: True
  111. freeze_at: 0
  112. freeze_norm: True
  113. lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05]
  114. MaskHybridEncoder:
  115. hidden_dim: 256
  116. use_encoder_idx: [3]
  117. num_encoder_layers: 1
  118. encoder_layer:
  119. name: TransformerLayer
  120. d_model: 256
  121. nhead: 8
  122. dim_feedforward: 1024
  123. dropout: 0.
  124. activation: 'gelu'
  125. expansion: 1.0
  126. mask_feat_channels: [64, 64]
  127. MaskRTDETR:
  128. num_queries: 300
  129. position_embed_type: sine
  130. feat_strides: [8, 16, 32]
  131. num_levels: 3
  132. nhead: 8
  133. dim_feedforward: 1024
  134. dropout: 0.0
  135. activation: relu
  136. num_denoising: 100
  137. label_noise_ratio: 0.5
  138. box_noise_scale: 1.0
  139. learnt_init_query: False
  140. mask_enhanced: True
  141. MaskDINOHead:
  142. loss:
  143. name: MaskDINOLoss
  144. loss_coeff: {class: 4, bbox: 5, giou: 2, mask: 5, dice: 5}
  145. aux_loss: True
  146. use_vfl: True
  147. vfl_iou_type: 'mask'
  148. matcher:
  149. name: HungarianMatcher
  150. matcher_coeff: {class: 4, bbox: 5, giou: 2, mask: 5, dice: 5}
  151. DETRPostProcess:
  152. num_top_queries: 100
  153. mask_stride: 4
  154. # Exporting the model
  155. export:
  156. post_process: True
  157. nms: True
  158. benchmark: False
  159. fuse_conv_bn: False