MaskRCNN-ResNet50-vd-FPN.yaml 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. epoch: 12
  2. use_gpu: true
  3. use_xpu: false
  4. use_mlu: false
  5. use_npu: false
  6. log_iter: 20
  7. save_dir: output
  8. target_metrics: mask
  9. snapshot_epoch: 1
  10. print_flops: false
  11. print_params: false
  12. #dataset
  13. metric: COCO
  14. num_classes: 80
  15. worker_num: 2
  16. TrainDataset:
  17. name: COCODataSet
  18. image_dir: train2017
  19. anno_path: annotations/instances_train2017.json
  20. dataset_dir: dataset/coco
  21. data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
  22. EvalDataset:
  23. name: COCODataSet
  24. image_dir: val2017
  25. anno_path: annotations/instances_val2017.json
  26. dataset_dir: dataset/coco
  27. TestDataset:
  28. name: ImageFolder
  29. anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
  30. dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
  31. TrainReader:
  32. sample_transforms:
  33. - Decode: {}
  34. - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
  35. - RandomFlip: {prob: 0.5}
  36. - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  37. - Permute: {}
  38. batch_transforms:
  39. - PadBatch: {pad_to_stride: 32}
  40. batch_size: 1
  41. shuffle: true
  42. drop_last: true
  43. collate_batch: false
  44. use_shared_memory: true
  45. EvalReader:
  46. sample_transforms:
  47. - Decode: {}
  48. - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
  49. - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  50. - Permute: {}
  51. batch_transforms:
  52. - PadBatch: {pad_to_stride: 32}
  53. batch_size: 1
  54. shuffle: false
  55. drop_last: false
  56. TestReader:
  57. sample_transforms:
  58. - Decode: {}
  59. - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
  60. - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  61. - Permute: {}
  62. batch_transforms:
  63. - PadBatch: {pad_to_stride: 32}
  64. batch_size: 1
  65. shuffle: false
  66. drop_last: false
  67. LearningRate:
  68. base_lr: 0.01
  69. schedulers:
  70. - !PiecewiseDecay
  71. gamma: 0.1
  72. milestones: [8, 11]
  73. - !LinearWarmup
  74. start_factor: 0.001
  75. steps: 1000
  76. OptimizerBuilder:
  77. optimizer:
  78. momentum: 0.9
  79. type: Momentum
  80. regularizer:
  81. factor: 0.0001
  82. type: L2
  83. # model
  84. architecture: MaskRCNN
  85. pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
  86. MaskRCNN:
  87. backbone: ResNet
  88. neck: FPN
  89. rpn_head: RPNHead
  90. bbox_head: BBoxHead
  91. mask_head: MaskHead
  92. # post process
  93. bbox_post_process: BBoxPostProcess
  94. mask_post_process: MaskPostProcess
  95. ResNet:
  96. # index 0 stands for res2
  97. depth: 50
  98. variant: d
  99. norm_type: bn
  100. freeze_at: 0
  101. return_idx: [0,1,2,3]
  102. num_stages: 4
  103. FPN:
  104. out_channel: 256
  105. RPNHead:
  106. anchor_generator:
  107. aspect_ratios: [0.5, 1.0, 2.0]
  108. anchor_sizes: [[32], [64], [128], [256], [512]]
  109. strides: [4, 8, 16, 32, 64]
  110. rpn_target_assign:
  111. batch_size_per_im: 256
  112. fg_fraction: 0.5
  113. negative_overlap: 0.3
  114. positive_overlap: 0.7
  115. use_random: True
  116. train_proposal:
  117. min_size: 0.0
  118. nms_thresh: 0.7
  119. pre_nms_top_n: 2000
  120. post_nms_top_n: 1000
  121. topk_after_collect: True
  122. test_proposal:
  123. min_size: 0.0
  124. nms_thresh: 0.7
  125. pre_nms_top_n: 1000
  126. post_nms_top_n: 1000
  127. BBoxHead:
  128. head: TwoFCHead
  129. roi_extractor:
  130. resolution: 7
  131. sampling_ratio: 0
  132. aligned: True
  133. bbox_assigner: BBoxAssigner
  134. BBoxAssigner:
  135. batch_size_per_im: 512
  136. bg_thresh: 0.5
  137. fg_thresh: 0.5
  138. fg_fraction: 0.25
  139. use_random: True
  140. TwoFCHead:
  141. out_channel: 1024
  142. BBoxPostProcess:
  143. decode: RCNNBox
  144. nms:
  145. name: MultiClassNMS
  146. keep_top_k: 100
  147. score_threshold: 0.05
  148. nms_threshold: 0.5
  149. MaskHead:
  150. head: MaskFeat
  151. roi_extractor:
  152. resolution: 14
  153. sampling_ratio: 0
  154. aligned: True
  155. mask_assigner: MaskAssigner
  156. share_bbox_feat: False
  157. MaskFeat:
  158. num_convs: 4
  159. out_channel: 256
  160. MaskAssigner:
  161. mask_resolution: 28
  162. MaskPostProcess:
  163. binary_thresh: 0.5
  164. # Exporting the model
  165. export:
  166. post_process: True
  167. nms: True
  168. benchmark: False
  169. fuse_conv_bn: False