ch_SVTRv2_rec.yaml 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. Global:
  2. debug: false
  3. use_gpu: true
  4. epoch_num: 200
  5. log_smooth_window: 20
  6. print_batch_step: 10
  7. save_model_dir: ./output/rec_svtrv2_ch
  8. save_epoch_step: 10
  9. eval_batch_step: [0, 1000]
  10. cal_metric_during_train: False
  11. pretrained_model: https://paddleocr.bj.bcebos.com/pretrained/ch_SVTRv2_rec_server_trained.pdparams
  12. checkpoints:
  13. save_inference_dir:
  14. use_visualdl: false
  15. infer_img: doc/imgs_words/ch/word_1.jpg
  16. character_dict_path: ppocr/utils/ppocr_keys_v1.txt
  17. max_text_length: &max_text_length 25
  18. infer_mode: false
  19. use_space_char: true
  20. distributed: true
  21. save_res_path: ./output/rec/predicts_svrtv2.txt
  22. Optimizer:
  23. name: AdamW
  24. beta1: 0.9
  25. beta2: 0.999
  26. epsilon: 1.e-8
  27. weight_decay: 0.05
  28. no_weight_decay_name: norm
  29. one_dim_param_no_weight_decay: True
  30. lr:
  31. name: Cosine
  32. learning_rate: 0.001 # 8gpus 192bs
  33. warmup_epoch: 5
  34. Architecture:
  35. model_type: rec
  36. algorithm: SVTR_HGNet
  37. Transform:
  38. Backbone:
  39. name: SVTRv2
  40. use_pos_embed: False
  41. dims: [128, 256, 384]
  42. depths: [6, 6, 6]
  43. num_heads: [4, 8, 12]
  44. mixer: [['Conv','Conv','Conv','Conv','Conv','Conv'],['Conv','Conv','Global','Global','Global','Global'],['Global','Global','Global','Global','Global','Global']]
  45. local_k: [[5, 5], [5, 5], [-1, -1]]
  46. sub_k: [[2, 1], [2, 1], [-1, -1]]
  47. last_stage: False
  48. use_pool: True
  49. Head:
  50. name: MultiHead
  51. head_list:
  52. - CTCHead:
  53. Neck:
  54. name: svtr
  55. dims: 256
  56. depth: 2
  57. hidden_dims: 256
  58. kernel_size: [1, 3]
  59. use_guide: True
  60. Head:
  61. fc_decay: 0.00001
  62. - NRTRHead:
  63. nrtr_dim: 384
  64. max_text_length: *max_text_length
  65. num_decoder_layers: 2
  66. Loss:
  67. name: MultiLoss
  68. loss_config_list:
  69. - CTCLoss:
  70. - NRTRLoss:
  71. PostProcess:
  72. name: CTCLabelDecode
  73. Metric:
  74. name: RecMetric
  75. main_indicator: acc
  76. Train:
  77. dataset:
  78. name: MultiScaleDataSet
  79. ds_width: false
  80. data_dir: ./train_data/
  81. ext_op_transform_idx: 1
  82. label_file_list:
  83. - ./train_data/train_list.txt
  84. transforms:
  85. - DecodeImage:
  86. img_mode: BGR
  87. channel_first: false
  88. - RecAug:
  89. - MultiLabelEncode:
  90. gtc_encode: NRTRLabelEncode
  91. - KeepKeys:
  92. keep_keys:
  93. - image
  94. - label_ctc
  95. - label_gtc
  96. - length
  97. - valid_ratio
  98. sampler:
  99. name: MultiScaleSampler
  100. scales: [[320, 32], [320, 48], [320, 64]]
  101. first_bs: &bs 192
  102. fix_bs: false
  103. divided_factor: [8, 16] # w, h
  104. is_training: True
  105. loader:
  106. shuffle: true
  107. batch_size_per_card: *bs
  108. drop_last: true
  109. num_workers: 8
  110. Eval:
  111. dataset:
  112. name: SimpleDataSet
  113. data_dir: ./train_data
  114. label_file_list:
  115. - ./train_data/val_list.txt
  116. transforms:
  117. - DecodeImage:
  118. img_mode: BGR
  119. channel_first: false
  120. - MultiLabelEncode:
  121. gtc_encode: NRTRLabelEncode
  122. - RecResizeImg:
  123. image_shape: [3, 48, 320]
  124. - KeepKeys:
  125. keep_keys:
  126. - image
  127. - label_ctc
  128. - label_gtc
  129. - length
  130. - valid_ratio
  131. loader:
  132. shuffle: false
  133. drop_last: false
  134. batch_size_per_card: 128
  135. num_workers: 4