LaTeX_OCR_rec.yaml 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. Global:
  2. use_gpu: True
  3. epoch_num: 500
  4. log_smooth_window: 20
  5. print_batch_step: 100
  6. save_model_dir: ./output/rec/latex_ocr/
  7. save_epoch_step: 5
  8. max_seq_len: 512
  9. # evaluation is run every 60000 iterations (22 epoch)(batch_size = 56)
  10. eval_batch_step: [0, 60000]
  11. cal_metric_during_train: True
  12. pretrained_model:
  13. checkpoints:
  14. save_inference_dir:
  15. use_visualdl: False
  16. infer_img: doc/datasets/pme_demo/0000013.png
  17. infer_mode: False
  18. use_space_char: False
  19. rec_char_dict_path: ppocr/utils/dict/latex_ocr_tokenizer.json
  20. save_res_path: ./output/rec/predicts_latexocr.txt
  21. d2s_train_image_shape: [1,256,256]
  22. find_unused_parameters: True
  23. Optimizer:
  24. name: AdamW
  25. beta1: 0.9
  26. beta2: 0.999
  27. lr:
  28. name: Const
  29. learning_rate: 0.0001
  30. Architecture:
  31. model_type: rec
  32. algorithm: LaTeXOCR
  33. in_channels: 1
  34. Transform:
  35. Backbone:
  36. name: HybridTransformer
  37. img_size: [192, 672]
  38. patch_size: 16
  39. num_classes: 0
  40. embed_dim: 256
  41. depth: 4
  42. num_heads: 8
  43. input_channel: 1
  44. is_predict: False
  45. is_export: False
  46. Head:
  47. name: LaTeXOCRHead
  48. pad_value: 0
  49. is_export: False
  50. decoder_args:
  51. attn_on_attn: True
  52. cross_attend: True
  53. ff_glu: True
  54. rel_pos_bias: False
  55. use_scalenorm: False
  56. Loss:
  57. name: LaTeXOCRLoss
  58. PostProcess:
  59. name: LaTeXOCRDecode
  60. rec_char_dict_path: ppocr/utils/dict/latex_ocr_tokenizer.json
  61. Metric:
  62. name: LaTeXOCRMetric
  63. main_indicator: exp_rate
  64. cal_bleu_score: True
  65. Train:
  66. dataset:
  67. name: LaTeXOCRDataSet
  68. data: ./train_data/LaTeXOCR/latexocr_train.pkl
  69. min_dimensions: [32, 32]
  70. max_dimensions: [672, 192]
  71. batch_size_per_pair: 40
  72. keep_smaller_batches: False
  73. transforms:
  74. - DecodeImage:
  75. channel_first: False
  76. - MinMaxResize:
  77. min_dimensions: [32, 32]
  78. max_dimensions: [672, 192]
  79. - LatexTrainTransform:
  80. bitmap_prob: .04
  81. - NormalizeImage:
  82. mean: [0.7931, 0.7931, 0.7931]
  83. std: [0.1738, 0.1738, 0.1738]
  84. order: 'hwc'
  85. - LatexImageFormat:
  86. - KeepKeys:
  87. keep_keys: ['image']
  88. loader:
  89. shuffle: True
  90. batch_size_per_card: 1
  91. drop_last: False
  92. num_workers: 0
  93. collate_fn: LaTeXOCRCollator
  94. Eval:
  95. dataset:
  96. name: LaTeXOCRDataSet
  97. data: ./train_data/LaTeXOCR/latexocr_val.pkl
  98. min_dimensions: [32, 32]
  99. max_dimensions: [672, 192]
  100. batch_size_per_pair: 10
  101. keep_smaller_batches: True
  102. transforms:
  103. - DecodeImage:
  104. img_mode: RGB
  105. channel_first: False
  106. - MinMaxResize:
  107. min_dimensions: [32, 32]
  108. max_dimensions: [672, 192]
  109. - LatexTestTransform:
  110. - NormalizeImage:
  111. mean: [0.7931, 0.7931, 0.7931]
  112. std: [0.1738, 0.1738, 0.1738]
  113. order: 'hwc'
  114. - LatexImageFormat:
  115. - KeepKeys:
  116. keep_keys: ['image']
  117. loader:
  118. shuffle: False
  119. drop_last: False
  120. batch_size_per_card: 1
  121. num_workers: 0
  122. collate_fn: LaTeXOCRCollator