PP-DocBlockLayout.yaml 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. # Runtime
  2. epoch: 40
  3. log_iter: 10
  4. find_unused_parameters: true
  5. use_gpu: true
  6. use_xpu: false
  7. use_mlu: false
  8. use_npu: false
  9. use_ema: true
  10. ema_decay: 0.9999
  11. ema_decay_type: "exponential"
  12. ema_filter_no_grad: true
  13. save_dir: output
  14. snapshot_epoch: 1
  15. print_flops: false
  16. print_params: false
  17. eval_size: [640, 640]
  18. # Dataset
  19. metric: COCO
  20. num_classes: 1
  21. worker_num: 4
  22. TrainDataset:
  23. name: COCODetDataset
  24. image_dir: images
  25. anno_path: annotations/instance_train.json
  26. dataset_dir: datasets/COCO
  27. data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
  28. EvalDataset:
  29. name: COCODetDataset
  30. image_dir: images
  31. anno_path: annotations/instance_val.json
  32. dataset_dir: datasets/COCO
  33. allow_empty: true
  34. TestDataset:
  35. name: ImageFolder
  36. anno_path: annotations/instance_val.json
  37. dataset_dir: datasets/COCO
  38. TrainReader:
  39. sample_transforms:
  40. - Decode: {}
  41. - RandomDistort: {prob: 0.8}
  42. - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
  43. - RandomCrop: {prob: 0.8}
  44. - RandomFlip: {}
  45. batch_transforms:
  46. - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
  47. - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
  48. - NormalizeBox: {}
  49. - BboxXYXY2XYWH: {}
  50. - Permute: {}
  51. batch_size: 8
  52. shuffle: true
  53. drop_last: true
  54. collate_batch: false
  55. use_shared_memory: true
  56. EvalReader:
  57. sample_transforms:
  58. - Decode: {}
  59. - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
  60. - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
  61. - Permute: {}
  62. batch_size: 4
  63. shuffle: false
  64. drop_last: false
  65. TestReader:
  66. inputs_def:
  67. image_shape: [3, 640, 640]
  68. sample_transforms:
  69. - Decode: {}
  70. - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
  71. - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
  72. - Permute: {}
  73. batch_size: 1
  74. shuffle: false
  75. drop_last: false
  76. # Model
  77. architecture: DETR
  78. pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/PPHGNetV2_L_ssld_pretrained.pdparams
  79. norm_type: sync_bn
  80. hidden_dim: 256
  81. use_focal_loss: True
  82. DETR:
  83. backbone: PPHGNetV2
  84. neck: HybridEncoder
  85. transformer: RTDETRTransformer
  86. detr_head: DINOHead
  87. post_process: DETRPostProcess
  88. PPHGNetV2:
  89. arch: 'L'
  90. return_idx: [1, 2, 3]
  91. freeze_stem_only: true
  92. freeze_at: 0
  93. freeze_norm: true
  94. lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05]
  95. HybridEncoder:
  96. hidden_dim: 256
  97. use_encoder_idx: [2]
  98. num_encoder_layers: 1
  99. encoder_layer:
  100. name: TransformerLayer
  101. d_model: 256
  102. nhead: 8
  103. dim_feedforward: 1024
  104. dropout: 0.
  105. activation: 'gelu'
  106. expansion: 1.0
  107. RTDETRTransformer:
  108. num_queries: 300
  109. position_embed_type: sine
  110. feat_strides: [8, 16, 32]
  111. num_levels: 3
  112. nhead: 8
  113. num_decoder_layers: 6
  114. dim_feedforward: 1024
  115. dropout: 0.0
  116. activation: relu
  117. num_denoising: 100
  118. label_noise_ratio: 0.5
  119. box_noise_scale: 1.0
  120. learnt_init_query: false
  121. DINOHead:
  122. loss:
  123. name: DINOLoss
  124. loss_coeff: {class: 1, bbox: 5, giou: 2}
  125. aux_loss: true
  126. use_vfl: true
  127. matcher:
  128. name: HungarianMatcher
  129. matcher_coeff: {class: 2, bbox: 5, giou: 2}
  130. DETRPostProcess:
  131. num_top_queries: 300
  132. # Optimizer
  133. LearningRate:
  134. base_lr: 0.0001
  135. schedulers:
  136. - !PiecewiseDecay
  137. gamma: 1.0
  138. milestones: [100]
  139. use_warmup: true
  140. - !LinearWarmup
  141. start_factor: 0.001
  142. steps: 100
  143. OptimizerBuilder:
  144. clip_grad_by_norm: 0.1
  145. regularizer: false
  146. optimizer:
  147. type: AdamW
  148. weight_decay: 0.0001
  149. # Export
  150. export:
  151. post_process: true
  152. nms: true
  153. benchmark: false
  154. fuse_conv_bn: false