Co-DINO-R50.yaml 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. # Runtime
  2. find_unused_parameters: True
  3. use_gpu: true
  4. use_xpu: false
  5. use_mlu: false
  6. use_npu: false
  7. log_iter: 20
  8. save_dir: output
  9. snapshot_epoch: 1
  10. print_flops: false
  11. print_params: false
  12. use_ema: true
  13. # Dataset
  14. metric: COCO
  15. num_classes: 80
  16. TrainDataset:
  17. name: COCODataSet
  18. image_dir: train2017
  19. anno_path: annotations/instances_train2017.json
  20. dataset_dir: dataset/coco
  21. allow_empty: true
  22. data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
  23. EvalDataset:
  24. name: COCODataSet
  25. image_dir: val2017
  26. anno_path: annotations/instances_val2017.json
  27. dataset_dir: dataset/coco
  28. allow_empty: true
  29. TestDataset:
  30. name: ImageFolder
  31. anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
  32. dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
  33. # Reader
  34. worker_num: 2
  35. TrainReader:
  36. sample_transforms:
  37. - Decode: {}
  38. - RandomFlip: {prob: 0.5}
  39. - RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
  40. transforms2: [
  41. RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
  42. RandomSizeCrop: { min_size: 384, max_size: 600 },
  43. RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
  44. }
  45. - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  46. - Permute: {}
  47. batch_transforms:
  48. - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
  49. batch_size: 2
  50. shuffle: true
  51. drop_last: true
  52. collate_batch: false
  53. use_shared_memory: false
  54. EvalReader:
  55. sample_transforms:
  56. - Decode: {}
  57. - Resize: {target_size: [800, 1333], keep_ratio: True}
  58. - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  59. - Permute: {}
  60. batch_size: 1
  61. shuffle: false
  62. drop_last: false
  63. TestReader:
  64. sample_transforms:
  65. - Decode: {}
  66. - Resize: {target_size: [800, 1333], keep_ratio: True}
  67. - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  68. - Permute: {}
  69. batch_size: 1
  70. shuffle: false
  71. drop_last: false
  72. # Model
  73. architecture: CO_DETR
  74. pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
  75. num_dec_layer: &num_dec_layer 6
  76. CO_DETR:
  77. backbone: ResNet
  78. backbone_lr_mult: 0.1
  79. neck: ChannelMapper
  80. query_head: CoDINOHead
  81. rpn_head: RPNHead
  82. roi_head: Co_RoiHead
  83. bbox_head:
  84. name: CoATSSHead
  85. in_channels: 256
  86. stacked_convs: 1
  87. feat_channels: 256
  88. bbox_weight: [10., 10., 5., 5.]
  89. anchor_generator:
  90. name: CoAnchorGenerator
  91. octave_base_scale: 8
  92. scales_per_octave: 1
  93. aspect_ratios: [1.0]
  94. strides: [4., 8., 16., 32., 64., 128.]
  95. assigner:
  96. name: ATSSAssigner
  97. topk: 9
  98. sm_use: True
  99. loss_cls:
  100. name: Weighted_FocalLoss
  101. use_sigmoid: true
  102. gamma: 2.0
  103. alpha: 0.25
  104. loss_weight: 12.0
  105. loss_bbox:
  106. name: GIoULoss
  107. loss_weight: 24.0
  108. reduction: sum
  109. loss_cent_weight: 12.0
  110. ResNet:
  111. # index 0 stands for res2
  112. depth: 50
  113. norm_type: bn
  114. freeze_at: 0
  115. return_idx: [0, 1, 2, 3]
  116. num_stages: 4
  117. ChannelMapper:
  118. in_channels: [256, 512, 1024, 2048]
  119. kernel_size: 1
  120. out_channels: 256
  121. norm_type: "gn"
  122. norm_groups: 32
  123. act: None
  124. num_outs: 5
  125. strides: [4., 8., 16., 32., 64.]
  126. CoDINOHead:
  127. num_query: 900
  128. num_dn_query: 100
  129. label_noise_ratio: 0.5
  130. box_noise_scale: 1.0
  131. in_channels: 2048
  132. sync_cls_avg_factor: True
  133. with_box_refine: True
  134. as_two_stage: True
  135. mixed_selection: True
  136. transformer:
  137. name: CoDINOTransformer
  138. two_stage_num_proposals: 900
  139. with_pos_coord: True
  140. with_coord_feat: False
  141. num_co_heads: 2
  142. num_feature_levels: 5
  143. as_two_stage: True
  144. mixed_selection: True
  145. embed_dims: &embed_dims 256
  146. encoder:
  147. name: DeformableTransformerEncoder
  148. num_layers: *num_dec_layer
  149. with_rp: 6
  150. encoder_layer:
  151. name: DeformableTransformerEncoderLayer
  152. d_model: *embed_dims
  153. n_head: 8
  154. dim_feedforward: 2048
  155. n_levels: 5
  156. n_points: 4
  157. dropout: 0.0
  158. decoder:
  159. name: DINOTransformerDecoder
  160. hidden_dim: *embed_dims
  161. num_layers: *num_dec_layer
  162. decoder_layer:
  163. name: DINOTransformerDecoderLayer
  164. d_model: *embed_dims
  165. n_head: 8
  166. dim_feedforward: 2048
  167. n_points: 4
  168. n_levels: 5
  169. dropout: 0.0
  170. positional_encoding:
  171. name: PositionEmbedding
  172. num_pos_feats: 128
  173. temperature: 20
  174. normalize: true
  175. loss_cls:
  176. name: QualityFocalLoss
  177. use_sigmoid: true
  178. beta: 2.0
  179. loss_weight: 1.0
  180. loss_bbox:
  181. name: L1Loss
  182. loss_weight: 5.0
  183. loss_iou:
  184. name: GIoULoss
  185. loss_weight: 2.0
  186. reduction: sum
  187. assigner:
  188. name: HungarianAssigner
  189. cls_cost:
  190. name: FocalLossCost
  191. weight: 2.0
  192. reg_cost:
  193. name: BBoxL1Cost
  194. weight: 5.0
  195. box_format: xywh
  196. iou_cost:
  197. name: IoUCost
  198. iou_mode: giou
  199. weight: 2.0
  200. test_cfg:
  201. max_per_img: 300
  202. score_thr: 0.0
  203. nms:
  204. name: MultiClassNMS
  205. keep_top_k: -1
  206. score_threshold: 0.0
  207. nms_threshold: 0.8
  208. RPNHead:
  209. loss_rpn_bbox:
  210. name: L1Loss
  211. reduction: sum
  212. loss_weight: 12.0
  213. in_channel: 256
  214. anchor_generator:
  215. name: RetinaAnchorGenerator
  216. octave_base_scale: 4
  217. scales_per_octave: 3
  218. aspect_ratios: [0.5, 1.0, 2.0]
  219. strides: [4., 8., 16., 32., 64., 128.]
  220. rpn_target_assign:
  221. batch_size_per_im: 256
  222. fg_fraction: 0.5
  223. negative_overlap: 0.3
  224. positive_overlap: 0.7
  225. use_random: True
  226. train_proposal:
  227. min_size: 0.0
  228. nms_thresh: 0.7
  229. pre_nms_top_n: 4000
  230. post_nms_top_n: 1000
  231. topk_after_collect: True
  232. test_proposal:
  233. min_size: 0.0
  234. nms_thresh: 0.7
  235. pre_nms_top_n: 1000
  236. post_nms_top_n: 1000
  237. Co_RoiHead:
  238. in_channel: 256
  239. loss_normalize_pos: True
  240. head: TwoFCHead
  241. roi_extractor:
  242. end_level: 4
  243. resolution: 7
  244. sampling_ratio: 0
  245. aligned: True
  246. bbox_assigner:
  247. name: BBoxAssigner
  248. batch_size_per_im: 512
  249. bg_thresh: 0.5
  250. fg_thresh: 0.5
  251. fg_fraction: 0.25
  252. use_random: True
  253. bbox_loss:
  254. name: GIoULoss
  255. loss_weight: 120.0
  256. cls_loss_weight: 12.0
  257. # Optimizer
  258. epoch: 12
  259. LearningRate:
  260. base_lr: 0.0002
  261. schedulers:
  262. - !PiecewiseDecay
  263. gamma: 0.1
  264. milestones: [11]
  265. use_warmup: false
  266. OptimizerBuilder:
  267. clip_grad_by_norm: 0.1
  268. regularizer: false
  269. optimizer:
  270. type: AdamW
  271. weight_decay: 0.0001
  272. # Exporting the model
  273. export:
  274. post_process: True # Whether post-processing is included in the network when export model.
  275. nms: True # Whether NMS is included in the network when export model.
  276. benchmark: False # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
  277. fuse_conv_bn: False