PP-FormulaNet-L.yaml 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. Global:
  2. use_gpu: True
  3. epoch_num: 10
  4. log_smooth_window: 10
  5. print_batch_step: 10
  6. save_model_dir: ./output/rec/pp_formulanet_l/
  7. save_epoch_step: 2
  8. # evaluation is run every 417 iterations (1 epoch)(batch_size = 24) # max_seq_len: 1024
  9. eval_batch_step: [0, 417 ]
  10. cal_metric_during_train: True
  11. pretrained_model:
  12. checkpoints:
  13. save_inference_dir:
  14. use_visualdl: False
  15. infer_img: doc/datasets/pme_demo/0000013.png
  16. infer_mode: False
  17. use_space_char: False
  18. rec_char_dict_path: &rec_char_dict_path ppocr/utils/dict/unimernet_tokenizer
  19. max_new_tokens: &max_new_tokens 1024
  20. input_size: &input_size [768, 768]
  21. save_res_path: ./output/rec/predicts_pp_formulanet_l.txt
  22. allow_resize_largeImg: False
  23. start_ema: True
  24. d2s_train_image_shape: [1,768,768]
  25. Optimizer:
  26. name: AdamW
  27. beta1: 0.9
  28. beta2: 0.999
  29. weight_decay: 0.05
  30. lr:
  31. name: LinearWarmupCosine
  32. learning_rate: 0.0001
  33. Architecture:
  34. model_type: rec
  35. algorithm: PP-FormulaNet-L
  36. in_channels: 3
  37. Transform:
  38. Backbone:
  39. name: Vary_VIT_B_Formula
  40. image_size: 768
  41. encoder_embed_dim: 768
  42. encoder_depth: 12
  43. encoder_num_heads: 12
  44. encoder_global_attn_indexes: [2, 5, 8, 11]
  45. Head:
  46. name: PPFormulaNet_Head
  47. max_new_tokens: *max_new_tokens
  48. decoder_start_token_id: 0
  49. decoder_ffn_dim: 2048
  50. decoder_hidden_size: 512
  51. decoder_layers: 8
  52. temperature: 0.2
  53. do_sample: False
  54. top_p: 0.95
  55. encoder_hidden_size: 1024
  56. is_export: False
  57. length_aware: False
  58. use_parallel: False
  59. parallel_step: 0
  60. Loss:
  61. name: PPFormulaNet_L_Loss
  62. PostProcess:
  63. name: UniMERNetDecode
  64. rec_char_dict_path: *rec_char_dict_path
  65. Metric:
  66. name: LaTeXOCRMetric
  67. main_indicator: exp_rate
  68. cal_bleu_score: True
  69. Train:
  70. dataset:
  71. name: SimpleDataSet
  72. data_dir: ./ocr_rec_latexocr_dataset_example
  73. label_file_list: ["./ocr_rec_latexocr_dataset_example/train.txt"]
  74. transforms:
  75. - UniMERNetImgDecode:
  76. input_size: *input_size
  77. - UniMERNetTrainTransform:
  78. - LatexImageFormat:
  79. - UniMERNetLabelEncode:
  80. rec_char_dict_path: *rec_char_dict_path
  81. max_seq_len: *max_new_tokens
  82. - KeepKeys:
  83. keep_keys: ['image', 'label', 'attention_mask']
  84. loader:
  85. shuffle: False
  86. drop_last: False
  87. batch_size_per_card: 6
  88. num_workers: 0
  89. collate_fn: UniMERNetCollator
  90. Eval:
  91. dataset:
  92. name: SimpleDataSet
  93. data_dir: ./ocr_rec_latexocr_dataset_example
  94. label_file_list: ["./ocr_rec_latexocr_dataset_example/val.txt"]
  95. transforms:
  96. - UniMERNetImgDecode:
  97. input_size: *input_size
  98. - UniMERNetTestTransform:
  99. - LatexImageFormat:
  100. - UniMERNetLabelEncode:
  101. max_seq_len: *max_new_tokens
  102. rec_char_dict_path: *rec_char_dict_path
  103. - KeepKeys:
  104. keep_keys: ['image', 'label', 'attention_mask', 'filename']
  105. loader:
  106. shuffle: False
  107. drop_last: False
  108. batch_size_per_card: 10
  109. num_workers: 0
  110. collate_fn: UniMERNetCollator