PP-FormulaNet_plus-L.yaml 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. Global:
  2. model_name: PP-FormulaNet_plus-L # To use static model for inference.
  3. use_gpu: True
  4. epoch_num: 10
  5. log_smooth_window: 10
  6. print_batch_step: 10
  7. save_model_dir: ./output/rec/pp_formulanet_plus_l/
  8. save_epoch_step: 2
  9. # evaluation is run every 417 iterations (1 epoch)(batch_size = 24) # max_seq_len: 1024
  10. eval_batch_step: [0, 417 ]
  11. cal_metric_during_train: True
  12. pretrained_model:
  13. checkpoints:
  14. save_inference_dir:
  15. use_visualdl: False
  16. infer_img: doc/datasets/pme_demo/0000013.png
  17. infer_mode: False
  18. use_space_char: False
  19. rec_char_dict_path: &rec_char_dict_path ppocr/utils/dict/unimernet_tokenizer
  20. max_new_tokens: &max_new_tokens 2560
  21. input_size: &input_size [768, 768]
  22. save_res_path: ./output/rec/predicts_pp_formulanet_plus_l.txt
  23. allow_resize_largeImg: False
  24. start_ema: True
  25. d2s_train_image_shape: [1,768,768]
  26. Optimizer:
  27. name: AdamW
  28. beta1: 0.9
  29. beta2: 0.999
  30. weight_decay: 0.05
  31. lr:
  32. name: LinearWarmupCosine
  33. learning_rate: 0.0001
  34. Architecture:
  35. model_type: rec
  36. algorithm: PP-FormulaNet_plus-L
  37. in_channels: 3
  38. Transform:
  39. Backbone:
  40. name: Vary_VIT_B_Formula
  41. image_size: 768
  42. encoder_embed_dim: 768
  43. encoder_depth: 12
  44. encoder_num_heads: 12
  45. encoder_global_attn_indexes: [2, 5, 8, 11]
  46. Head:
  47. name: PPFormulaNet_Head
  48. max_new_tokens: *max_new_tokens
  49. decoder_start_token_id: 0
  50. decoder_ffn_dim: 2048
  51. decoder_hidden_size: 512
  52. decoder_layers: 8
  53. temperature: 0.2
  54. do_sample: False
  55. top_p: 0.95
  56. encoder_hidden_size: 1024
  57. is_export: False
  58. length_aware: False
  59. use_parallel: False
  60. parallel_step: 0
  61. Loss:
  62. name: PPFormulaNet_L_Loss
  63. PostProcess:
  64. name: UniMERNetDecode
  65. rec_char_dict_path: *rec_char_dict_path
  66. Metric:
  67. name: LaTeXOCRMetric
  68. main_indicator: exp_rate
  69. cal_bleu_score: True
  70. Train:
  71. dataset:
  72. name: SimpleDataSet
  73. data_dir: ./ocr_rec_latexocr_dataset_example
  74. label_file_list: ["./ocr_rec_latexocr_dataset_example/train.txt"]
  75. transforms:
  76. - UniMERNetImgDecode:
  77. input_size: *input_size
  78. random_padding: True
  79. random_resize: True
  80. random_crop: True
  81. - UniMERNetTrainTransform:
  82. - LatexImageFormat:
  83. - UniMERNetLabelEncode:
  84. rec_char_dict_path: *rec_char_dict_path
  85. max_seq_len: *max_new_tokens
  86. - KeepKeys:
  87. keep_keys: ['image', 'label', 'attention_mask']
  88. loader:
  89. shuffle: False
  90. drop_last: False
  91. batch_size_per_card: 3
  92. num_workers: 0
  93. collate_fn: UniMERNetCollator
  94. Eval:
  95. dataset:
  96. name: SimpleDataSet
  97. data_dir: ./ocr_rec_latexocr_dataset_example
  98. label_file_list: ["./ocr_rec_latexocr_dataset_example/val.txt"]
  99. transforms:
  100. - UniMERNetImgDecode:
  101. input_size: *input_size
  102. - UniMERNetTestTransform:
  103. - LatexImageFormat:
  104. - UniMERNetLabelEncode:
  105. max_seq_len: *max_new_tokens
  106. rec_char_dict_path: *rec_char_dict_path
  107. - KeepKeys:
  108. keep_keys: ['image', 'label', 'attention_mask', 'filename']
  109. loader:
  110. shuffle: False
  111. drop_last: False
  112. batch_size_per_card: 10
  113. num_workers: 0
  114. collate_fn: UniMERNetCollator