Cascade-MaskRCNN-ResNet50-FPN.yaml 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. epoch: 12
  2. use_gpu: true
  3. use_xpu: false
  4. use_mlu: false
  5. use_npu: false
  6. log_iter: 20
  7. save_dir: output
  8. target_metrics: mask
  9. snapshot_epoch: 1
  10. print_flops: false
  11. print_params: false
  12. # Dataset
  13. metric: COCO
  14. num_classes: 80
  15. worker_num: 2
  16. TrainDataset:
  17. name: COCODataSet
  18. image_dir: train2017
  19. anno_path: annotations/instances_train2017.json
  20. dataset_dir: dataset/coco
  21. data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
  22. EvalDataset:
  23. name: COCODataSet
  24. image_dir: val2017
  25. anno_path: annotations/instances_val2017.json
  26. dataset_dir: dataset/coco
  27. TestDataset:
  28. name: ImageFolder
  29. anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
  30. dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
  31. TrainReader:
  32. sample_transforms:
  33. - Decode: {}
  34. - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
  35. - RandomFlip: {prob: 0.5}
  36. - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  37. - Permute: {}
  38. batch_transforms:
  39. - PadBatch: {pad_to_stride: 32}
  40. batch_size: 1
  41. shuffle: true
  42. drop_last: true
  43. collate_batch: false
  44. EvalReader:
  45. sample_transforms:
  46. - Decode: {}
  47. - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
  48. - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  49. - Permute: {}
  50. batch_transforms:
  51. - PadBatch: {pad_to_stride: 32}
  52. batch_size: 1
  53. shuffle: false
  54. drop_last: false
  55. TestReader:
  56. sample_transforms:
  57. - Decode: {}
  58. - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
  59. - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  60. - Permute: {}
  61. batch_transforms:
  62. - PadBatch: {pad_to_stride: 32}
  63. batch_size: 1
  64. shuffle: false
  65. drop_last: false
  66. LearningRate:
  67. base_lr: 0.01
  68. schedulers:
  69. - !PiecewiseDecay
  70. gamma: 0.1
  71. milestones: [8, 11]
  72. - !LinearWarmup
  73. start_factor: 0.001
  74. steps: 1000
  75. OptimizerBuilder:
  76. clip_grad_by_norm: 35.0 # avoid gradient explosion in NPU
  77. optimizer:
  78. momentum: 0.9
  79. type: Momentum
  80. regularizer:
  81. factor: 0.0001
  82. type: L2
  83. # Model
  84. architecture: CascadeRCNN
  85. pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
  86. CascadeRCNN:
  87. backbone: ResNet
  88. neck: FPN
  89. rpn_head: RPNHead
  90. bbox_head: CascadeHead
  91. mask_head: MaskHead
  92. # post process
  93. bbox_post_process: BBoxPostProcess
  94. mask_post_process: MaskPostProcess
  95. ResNet:
  96. # index 0 stands for res2
  97. depth: 50
  98. norm_type: bn
  99. freeze_at: 0
  100. return_idx: [0,1,2,3]
  101. num_stages: 4
  102. FPN:
  103. out_channel: 256
  104. RPNHead:
  105. anchor_generator:
  106. aspect_ratios: [0.5, 1.0, 2.0]
  107. anchor_sizes: [[32], [64], [128], [256], [512]]
  108. strides: [4, 8, 16, 32, 64]
  109. rpn_target_assign:
  110. batch_size_per_im: 256
  111. fg_fraction: 0.5
  112. negative_overlap: 0.3
  113. positive_overlap: 0.7
  114. use_random: True
  115. train_proposal:
  116. min_size: 0.0
  117. nms_thresh: 0.7
  118. pre_nms_top_n: 2000
  119. post_nms_top_n: 2000
  120. topk_after_collect: True
  121. test_proposal:
  122. min_size: 0.0
  123. nms_thresh: 0.7
  124. pre_nms_top_n: 1000
  125. post_nms_top_n: 1000
  126. CascadeHead:
  127. head: CascadeTwoFCHead
  128. roi_extractor:
  129. resolution: 7
  130. sampling_ratio: 0
  131. aligned: True
  132. bbox_assigner: BBoxAssigner
  133. BBoxAssigner:
  134. batch_size_per_im: 512
  135. bg_thresh: 0.5
  136. fg_thresh: 0.5
  137. fg_fraction: 0.25
  138. cascade_iou: [0.5, 0.6, 0.7]
  139. use_random: True
  140. CascadeTwoFCHead:
  141. out_channel: 1024
  142. BBoxPostProcess:
  143. decode:
  144. name: RCNNBox
  145. prior_box_var: [30.0, 30.0, 15.0, 15.0]
  146. nms:
  147. name: MultiClassNMS
  148. keep_top_k: 100
  149. score_threshold: 0.05
  150. nms_threshold: 0.5
  151. MaskHead:
  152. head: MaskFeat
  153. roi_extractor:
  154. resolution: 14
  155. sampling_ratio: 0
  156. aligned: True
  157. mask_assigner: MaskAssigner
  158. share_bbox_feat: False
  159. MaskFeat:
  160. num_convs: 4
  161. out_channel: 256
  162. MaskAssigner:
  163. mask_resolution: 28
  164. MaskPostProcess:
  165. binary_thresh: 0.5
  166. # Exporting the model
  167. export:
  168. post_process: True # Whether post-processing is included in the network when export model.
  169. nms: True # Whether NMS is included in the network when export model.
  170. benchmark: False # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
  171. fuse_conv_bn: False