PP-FormulaNet_plus-M.yaml 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. Global:
  2. model_name: PP-FormulaNet_plus-M # To use static model for inference.
  3. use_gpu: True
  4. epoch_num: 20
  5. log_smooth_window: 10
  6. print_batch_step: 10
  7. save_model_dir: ./output/rec/pp_formulanet_plus_m/
  8. save_epoch_step: 2
  9. # evaluation is run every 179 iterations (1 epoch)(batch_size = 56) # max_seq_len: 1024
  10. eval_batch_step: [0, 179]
  11. cal_metric_during_train: True
  12. pretrained_model:
  13. checkpoints:
  14. save_inference_dir:
  15. use_visualdl: False
  16. infer_img: doc/datasets/pme_demo/0000013.png
  17. infer_mode: False
  18. use_space_char: False
  19. rec_char_dict_path: &rec_char_dict_path ppocr/utils/dict/unimernet_tokenizer
  20. max_new_tokens: &max_new_tokens 2560
  21. input_size: &input_size [384, 384]
  22. save_res_path: ./output/rec/predicts_pp_formulanet_plus_m.txt
  23. allow_resize_largeImg: False
  24. start_ema: True
  25. d2s_train_image_shape: [1,384,384]
  26. Optimizer:
  27. name: AdamW
  28. beta1: 0.9
  29. beta2: 0.999
  30. weight_decay: 0.05
  31. lr:
  32. name: LinearWarmupCosine
  33. learning_rate: 0.0001
  34. Architecture:
  35. model_type: rec
  36. algorithm: PP-FormulaNet_plus-M
  37. in_channels: 3
  38. Transform:
  39. Backbone:
  40. name: PPHGNetV2_B6_Formula
  41. class_num: 1024
  42. Head:
  43. name: PPFormulaNet_Head
  44. max_new_tokens: *max_new_tokens
  45. decoder_start_token_id: 0
  46. decoder_ffn_dim: 2048
  47. decoder_hidden_size: 512
  48. decoder_layers: 6
  49. temperature: 0.2
  50. do_sample: False
  51. top_p: 0.95
  52. encoder_hidden_size: 2048
  53. is_export: False
  54. length_aware: False
  55. use_parallel: False
  56. parallel_step: 0
  57. Loss:
  58. name: PPFormulaNet_L_Loss
  59. PostProcess:
  60. name: UniMERNetDecode
  61. rec_char_dict_path: *rec_char_dict_path
  62. Metric:
  63. name: LaTeXOCRMetric
  64. main_indicator: exp_rate
  65. cal_bleu_score: True
  66. Train:
  67. dataset:
  68. name: SimpleDataSet
  69. data_dir: ./ocr_rec_latexocr_dataset_example
  70. label_file_list: ["./ocr_rec_latexocr_dataset_example/train.txt"]
  71. transforms:
  72. - UniMERNetImgDecode:
  73. input_size: *input_size
  74. random_padding: True
  75. random_resize: True
  76. random_crop: True
  77. - UniMERNetTrainTransform:
  78. - LatexImageFormat:
  79. - UniMERNetLabelEncode:
  80. rec_char_dict_path: *rec_char_dict_path
  81. max_seq_len: *max_new_tokens
  82. - KeepKeys:
  83. keep_keys: ['image', 'label', 'attention_mask']
  84. loader:
  85. shuffle: False
  86. drop_last: False
  87. batch_size_per_card: 14
  88. num_workers: 0
  89. collate_fn: UniMERNetCollator
  90. Eval:
  91. dataset:
  92. name: SimpleDataSet
  93. data_dir: ./ocr_rec_latexocr_dataset_example
  94. label_file_list: ["./ocr_rec_latexocr_dataset_example/val.txt"]
  95. transforms:
  96. - UniMERNetImgDecode:
  97. input_size: *input_size
  98. - UniMERNetTestTransform:
  99. - LatexImageFormat:
  100. - UniMERNetLabelEncode:
  101. max_seq_len: *max_new_tokens
  102. rec_char_dict_path: *rec_char_dict_path
  103. - KeepKeys:
  104. keep_keys: ['image', 'label', 'attention_mask', 'filename']
  105. loader:
  106. shuffle: False
  107. drop_last: False
  108. batch_size_per_card: 30
  109. num_workers: 0
  110. collate_fn: UniMERNetCollator