PP-OCRv3_server_det.yaml 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. Global:
  2. debug: false
  3. use_gpu: true
  4. epoch_num: 500
  5. log_smooth_window: 20
  6. print_batch_step: 10
  7. save_model_dir: ./output/ch_PP-OCR_v3_det/
  8. save_epoch_step: 100
  9. eval_batch_step:
  10. - 0
  11. - 400
  12. cal_metric_during_train: false
  13. pretrained_model: ch_PP-OCRv3_det_distill_train/best_accuracy_new.pdparams
  14. checkpoints: null
  15. save_inference_dir: null
  16. use_visualdl: false
  17. infer_img: doc/imgs_en/img_10.jpg
  18. save_res_path: ./checkpoints/det_db/predicts_db.txt
  19. distributed: true
  20. d2s_train_image_shape: [3, -1, -1]
  21. amp_dtype: bfloat16
  22. Architecture:
  23. model_type: det
  24. algorithm: DB
  25. Backbone:
  26. name: ResNet_vd
  27. in_channels: 3
  28. layers: 50
  29. Neck:
  30. name: LKPAN
  31. out_channels: 256
  32. Head:
  33. name: DBHead
  34. kernel_list: [7,2,2]
  35. k: 50
  36. Loss:
  37. name: DBLoss
  38. balance_loss: true
  39. main_loss_type: DiceLoss
  40. alpha: 5
  41. beta: 10
  42. ohem_ratio: 3
  43. Optimizer:
  44. name: Adam
  45. beta1: 0.9
  46. beta2: 0.999
  47. lr:
  48. name: Cosine
  49. learning_rate: 0.001
  50. warmup_epoch: 2
  51. regularizer:
  52. name: L2
  53. factor: 5.0e-05
  54. PostProcess:
  55. name: DBPostProcess
  56. thresh: 0.3
  57. box_thresh: 0.6
  58. max_candidates: 1000
  59. unclip_ratio: 1.5
  60. Metric:
  61. name: DetMetric
  62. main_indicator: hmean
  63. Train:
  64. dataset:
  65. name: TextDetDataset
  66. data_dir: datasets/ICDAR2015
  67. label_file_list:
  68. - datasets/ICDAR2015/train.txt
  69. transforms:
  70. - DecodeImage:
  71. img_mode: BGR
  72. channel_first: false
  73. - DetLabelEncode: null
  74. - CopyPaste:
  75. - IaaAugment:
  76. augmenter_args:
  77. - type: Fliplr
  78. args:
  79. p: 0.5
  80. - type: Affine
  81. args:
  82. rotate:
  83. - -10
  84. - 10
  85. - type: Resize
  86. args:
  87. size:
  88. - 0.5
  89. - 3
  90. - EastRandomCropData:
  91. size:
  92. - 960
  93. - 960
  94. max_tries: 50
  95. keep_ratio: true
  96. - MakeBorderMap:
  97. shrink_ratio: 0.4
  98. thresh_min: 0.3
  99. thresh_max: 0.7
  100. - MakeShrinkMap:
  101. shrink_ratio: 0.4
  102. min_text_size: 8
  103. - NormalizeImage:
  104. scale: 1./255.
  105. mean:
  106. - 0.485
  107. - 0.456
  108. - 0.406
  109. std:
  110. - 0.229
  111. - 0.224
  112. - 0.225
  113. order: hwc
  114. - ToCHWImage: null
  115. - KeepKeys:
  116. keep_keys:
  117. - image
  118. - threshold_map
  119. - threshold_mask
  120. - shrink_map
  121. - shrink_mask
  122. loader:
  123. shuffle: true
  124. drop_last: false
  125. batch_size_per_card: 8
  126. num_workers: 4
  127. Eval:
  128. dataset:
  129. name: TextDetDataset
  130. data_dir: datasets/ICDAR2015
  131. label_file_list:
  132. - datasets/ICDAR2015/val.txt
  133. transforms:
  134. - DecodeImage: # load image
  135. img_mode: BGR
  136. channel_first: False
  137. - DetLabelEncode: # Class handling label
  138. - DetResizeForTest:
  139. - NormalizeImage:
  140. scale: 1./255.
  141. mean: [0.485, 0.456, 0.406]
  142. std: [0.229, 0.224, 0.225]
  143. order: 'hwc'
  144. - ToCHWImage:
  145. - KeepKeys:
  146. keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
  147. loader:
  148. shuffle: False
  149. drop_last: False
  150. batch_size_per_card: 1 # must be 1
  151. num_workers: 2
  152. profiler_options: null