FasterRCNN-Swin-Tiny-FPN.yaml 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. # Runtime
  2. use_gpu: true
  3. use_xpu: false
  4. use_mlu: false
  5. use_npu: false
  6. log_iter: 20
  7. save_dir: output
  8. snapshot_epoch: 1
  9. print_flops: false
  10. print_params: false
  11. use_ema: true
  12. # Dataset
  13. metric: COCO
  14. num_classes: 80
  15. TrainDataset:
  16. name: COCODataSet
  17. image_dir: train2017
  18. anno_path: annotations/instances_train2017.json
  19. dataset_dir: dataset/coco
  20. data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
  21. EvalDataset:
  22. name: COCODataSet
  23. image_dir: val2017
  24. anno_path: annotations/instances_val2017.json
  25. dataset_dir: dataset/coco
  26. allow_empty: true
  27. TestDataset:
  28. name: ImageFolder
  29. anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
  30. dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
  31. # Reader
  32. worker_num: 2
  33. TrainReader:
  34. sample_transforms:
  35. - Decode: {}
  36. - RandomResizeCrop: {resizes: [400, 500, 600], cropsizes: [[384, 600], ], prob: 0.5}
  37. - RandomResize: {target_size: [[480, 1333], [512, 1333], [544, 1333], [576, 1333], [608, 1333], [640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: True, interp: 2}
  38. - RandomFlip: {prob: 0.5}
  39. - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  40. - Permute: {}
  41. batch_transforms:
  42. - PadBatch: {pad_to_stride: 32}
  43. batch_size: 2
  44. shuffle: true
  45. drop_last: true
  46. collate_batch: false
  47. EvalReader:
  48. sample_transforms:
  49. - Decode: {}
  50. - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
  51. - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  52. - Permute: {}
  53. batch_transforms:
  54. - PadBatch: {pad_to_stride: 32}
  55. batch_size: 1
  56. shuffle: false
  57. drop_last: false
  58. TestReader:
  59. inputs_def:
  60. image_shape: [-1, 3, 640, 640]
  61. sample_transforms:
  62. - Decode: {}
  63. - Resize: {interp: 2, target_size: 640, keep_ratio: True}
  64. - Pad: {size: 640}
  65. - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
  66. - Permute: {}
  67. batch_size: 1
  68. shuffle: false
  69. drop_last: false
  70. # Model
  71. architecture: FasterRCNN
  72. # pretrain_weights: # rewrite in SwinTransformer.pretrained in ppdet/modeling/backbones/swin_transformer.py
  73. FasterRCNN:
  74. backbone: SwinTransformer
  75. neck: FPN
  76. rpn_head: RPNHead
  77. bbox_head: BBoxHead
  78. bbox_post_process: BBoxPostProcess
  79. SwinTransformer:
  80. arch: 'swin_T_224'
  81. ape: false
  82. drop_path_rate: 0.1
  83. patch_norm: true
  84. out_indices: [0, 1, 2, 3]
  85. pretrained: https://paddledet.bj.bcebos.com/models/pretrained/swin_tiny_patch4_window7_224_22kto1k_pretrained.pdparams
  86. FPN:
  87. out_channel: 256
  88. RPNHead:
  89. anchor_generator:
  90. aspect_ratios: [0.5, 1.0, 2.0]
  91. anchor_sizes: [[32], [64], [128], [256], [512]]
  92. strides: [4, 8, 16, 32, 64]
  93. rpn_target_assign:
  94. batch_size_per_im: 256
  95. fg_fraction: 0.5
  96. negative_overlap: 0.3
  97. positive_overlap: 0.7
  98. use_random: True
  99. train_proposal:
  100. min_size: 0.0
  101. nms_thresh: 0.7
  102. pre_nms_top_n: 2000
  103. post_nms_top_n: 1000
  104. topk_after_collect: True
  105. test_proposal:
  106. min_size: 0.0
  107. nms_thresh: 0.7
  108. pre_nms_top_n: 1000
  109. post_nms_top_n: 1000
  110. BBoxHead:
  111. head: TwoFCHead
  112. roi_extractor:
  113. resolution: 7
  114. sampling_ratio: 0
  115. aligned: True
  116. bbox_assigner: BBoxAssigner
  117. BBoxAssigner:
  118. batch_size_per_im: 512
  119. bg_thresh: 0.5
  120. fg_thresh: 0.5
  121. fg_fraction: 0.25
  122. use_random: True
  123. TwoFCHead:
  124. out_channel: 1024
  125. BBoxPostProcess:
  126. decode: RCNNBox
  127. nms:
  128. name: MultiClassNMS
  129. keep_top_k: 100
  130. score_threshold: 0.05
  131. nms_threshold: 0.5
  132. # Optimizer
  133. epoch: 12
  134. LearningRate:
  135. base_lr: 0.0001
  136. schedulers:
  137. - !PiecewiseDecay
  138. gamma: 0.1
  139. milestones: [8, 11]
  140. - !LinearWarmup
  141. start_factor: 0.1
  142. steps: 1000
  143. OptimizerBuilder:
  144. clip_grad_by_norm: 1.0
  145. optimizer:
  146. type: AdamW
  147. weight_decay: 0.05
  148. param_groups:
  149. - params: ['absolute_pos_embed', 'relative_position_bias_table', 'norm']
  150. weight_decay: 0.0
  151. # Exporting the model
  152. export:
  153. post_process: True # Whether post-processing is included in the network when export model.
  154. nms: True # Whether NMS is included in the network when export model.
  155. benchmark: False # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
  156. fuse_conv_bn: False