瀏覽代碼

add_ppformulanet_plus

liuhongen1234567 6 月之前
父節點
當前提交
08531c79eb

+ 22 - 0
docs/module_usage/tutorials/ocr_modules/formula_recognition.en.md

@@ -39,6 +39,28 @@ The formula recognition module is a crucial component of OCR (Optical Character
 <td>-/-</td>
 <td>535.2 M</td>
 <tr>
+<td>PP-FormulaNet_plus-S</td><td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-FormulaNet_plus-S_infer.tar">Inference Model</a>/<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-FormulaNet_plus-S_pretrained.pdparams">Training Model</a></td>
+<td>-</td>
+<td>-/-</td>
+<td>-/-</td>
+<td>-</td>
+<td rowspan="2">- </td>
+</tr>
+<tr>
+<td>PP-FormulaNet_plus-M</td><td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-FormulaNet_plus-M_infer.tar">Inference Model</a>/<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-FormulaNet_plus-M_pretrained.pdparams">Training Model</a></td>
+<td>-</td>
+<td>-/-</td>
+<td>-/-</td>
+<td>-</td>
+</tr>
+<tr>
+<td>PP-FormulaNet_plus-L</td><td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-FormulaNet_plus-L_infer.tar">Inference Model</a>/<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-FormulaNet_plus-L_pretrained.pdparams">Training Model</a></td>
+<td>-</td>
+<td>-/-</td>
+<td>-/-</td>
+<td>-</td>
+</tr>
+<tr>
 <td>LaTeX_OCR_rec</td><td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/LaTeX_OCR_rec_infer.tar">Inference Model</a>/<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/LaTeX_OCR_rec_pretrained.pdparams">Training Model</a></td>
 <td>71.63</td>
 <td>-/-</td>

+ 22 - 0
docs/module_usage/tutorials/ocr_modules/formula_recognition.md

@@ -39,6 +39,28 @@ comments: true
 <td>-/-</td>
 <td>535.2 M</td>
 <tr>
+<td>PP-FormulaNet_plus-S</td><td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-FormulaNet_plus-S_infer.tar">推理模型</a>/<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-FormulaNet_plus-S_pretrained.pdparams">训练模型</a></td>
+<td>-</td>
+<td>-/-</td>
+<td>-/-</td>
+<td>-</td>
+<td rowspan="2">- </td>
+</tr>
+<tr>
+<td>PP-FormulaNet_plus-M</td><td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-FormulaNet_plus-M_infer.tar">推理模型</a>/<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-FormulaNet_plus-M_pretrained.pdparams">训练模型</a></td>
+<td>-</td>
+<td>-/-</td>
+<td>-/-</td>
+<td>-</td>
+</tr>
+<tr>
+<td>PP-FormulaNet_plus-L</td><td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-FormulaNet_plus-L_infer.tar">推理模型</a>/<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-FormulaNet_plus-L_pretrained.pdparams">训练模型</a></td>
+<td>-</td>
+<td>-/-</td>
+<td>-/-</td>
+<td>-</td>
+</tr>
+<tr>
 <td>LaTeX_OCR_rec</td><td><a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/LaTeX_OCR_rec_infer.tar">推理模型</a>/<a href="https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/LaTeX_OCR_rec_pretrained.pdparams">训练模型</a></td>
 <td>71.63</td>
 <td>-/-</td>

+ 40 - 0
paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: PP-FormulaNet_plus-L
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "./dataset/ocr_rec_latexocr_dataset_example"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: FormulaRecDataset
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  epochs_iters: 20
+  batch_size: 3
+  learning_rate: 0.0001
+  pretrain_weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-FormulaNet_plus-L_pretrained.pdparams
+  resume_path: null
+  log_interval: 20
+  eval_interval: 1
+  save_interval: 1
+
+Evaluate:
+  batch_size: 3
+  weight_path: output/best_accuracy/best_accuracy.pdparams
+  log_interval: 1
+
+Export:
+  weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-FormulaNet_plus-L_pretrained.pdparams
+
+Predict:
+  batch_size: 1
+  model_dir: "output/best_accuracy/inference"
+  input: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_formula_rec_001.png"
+  kernel_option:
+    run_mode: paddle

+ 40 - 0
paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: PP-FormulaNet_plus-M
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "./dataset/ocr_rec_latexocr_dataset_example"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: FormulaRecDataset
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  epochs_iters: 20
+  batch_size: 14
+  learning_rate: 0.0001
+  pretrain_weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-FormulaNet_plus-M_pretrained.pdparams
+  resume_path: null
+  log_interval: 20
+  eval_interval: 1
+  save_interval: 1
+
+Evaluate:
+  batch_size: 20
+  weight_path: output/best_accuracy/best_accuracy.pdparams
+  log_interval: 1
+
+Export:
+  weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-FormulaNet_plus-M_pretrained.pdparams
+
+Predict:
+  batch_size: 1
+  model_dir: "output/best_accuracy/inference"
+  input: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_formula_rec_001.png"
+  kernel_option:
+    run_mode: paddle

+ 40 - 0
paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml

@@ -0,0 +1,40 @@
+Global:
+  model: PP-FormulaNet_plus-S
+  mode: check_dataset # check_dataset/train/evaluate/predict
+  dataset_dir: "./dataset/ocr_rec_latexocr_dataset_example"
+  device: gpu:0,1,2,3
+  output: "output"
+
+CheckDataset:
+  convert: 
+    enable: False
+    src_dataset_type: FormulaRecDataset
+  split: 
+    enable: False
+    train_percent: null
+    val_percent: null
+
+Train:
+  epochs_iters: 20
+  batch_size: 14
+  learning_rate: 0.0001
+  pretrain_weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-FormulaNet_plus-S_pretrained.pdparams
+  resume_path: null
+  log_interval: 20
+  eval_interval: 1
+  save_interval: 1
+
+Evaluate:
+  batch_size: 20
+  weight_path: output/best_accuracy/best_accuracy.pdparams
+  log_interval: 1
+
+Export:
+  weight_path: https://paddle-model-ecology.bj.bcebos.com/paddlex/official_pretrained_model/PP-FormulaNet_plus-S_pretrained.pdparams
+
+Predict:
+  batch_size: 1
+  model_dir: "output/best_accuracy/inference"
+  input: "https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/general_formula_rec_001.png"
+  kernel_option:
+    run_mode: paddle

+ 1 - 1
paddlex/configs/pipelines/formula_recognition.yaml

@@ -17,7 +17,7 @@ SubModules:
 
   FormulaRecognition:
     module_name: formula_recognition
-    model_name: PP-FormulaNet-L
+    model_name: PP-FormulaNet_plus-L
     model_dir: null
     batch_size: 5
 

+ 7 - 1
paddlex/inference/models/formula_recognition/predictor.py

@@ -97,7 +97,13 @@ class FormulaRecPredictor(BasePredictor):
             batch_imgs = self.pre_tfs["UniMERNetImgDecode"](imgs=batch_raw_imgs)
             batch_imgs = self.pre_tfs["UniMERNetTestTransform"](imgs=batch_imgs)
             batch_imgs = self.pre_tfs["UniMERNetImageFormat"](imgs=batch_imgs)
-        elif self.model_name in ("PP-FormulaNet-S", "PP-FormulaNet-L"):
+        elif self.model_name in (
+            "PP-FormulaNet-S",
+            "PP-FormulaNet-L",
+            "PP-FormulaNet_plus-S",
+            "PP-FormulaNet_plus-M",
+            "PP-FormulaNet_plus-L",
+        ):
             batch_imgs = self.pre_tfs["UniMERNetImgDecode"](imgs=batch_raw_imgs)
             batch_imgs = self.pre_tfs["UniMERNetTestTransform"](imgs=batch_imgs)
             batch_imgs = self.pre_tfs["LatexImageFormat"](imgs=batch_imgs)

+ 31 - 3
paddlex/inference/models/formula_recognition/processors.py

@@ -842,8 +842,27 @@ class UniMERNetDecode(object):
         text_reg = r"(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})"
         letter = "[a-zA-Z]"
         noletter = "[\W_^\d]"
-        names = [x[0].replace(" ", "") for x in re.findall(text_reg, s)]
-        s = re.sub(text_reg, lambda match: str(names.pop(0)), s)
+        names = []
+        for x in re.findall(text_reg, s):
+            pattern = r"\\[a-zA-Z]+"
+            pattern = r"(\\[a-zA-Z]+)\s(?=\w)|\\[a-zA-Z]+\s(?=})"
+            matches = re.findall(pattern, x[0])
+            for m in matches:
+                if (
+                    m
+                    not in [
+                        "\\operatorname",
+                        "\\mathrm",
+                        "\\text",
+                        "\\mathbf",
+                    ]
+                    and m.strip() != ""
+                ):
+                    s = s.replace(m, m + "XXXXXXX")
+                    s = s.replace(" ", "")
+                    names.append(s)
+        if len(names) > 0:
+            s = re.sub(text_reg, lambda match: str(names.pop(0)), s)
         news = s
         while True:
             s = news
@@ -852,7 +871,16 @@ class UniMERNetDecode(object):
             news = re.sub(r"(%s)\s+?(%s)" % (letter, noletter), r"\1\2", news)
             if news == s:
                 break
-        return s
+        return s.replace("XXXXXXX", " ")
+
+    def remove_chinese_text_wrapping(self, formula):
+        pattern = re.compile(r"\\text\s*{\s*([^}]*?[\u4e00-\u9fff]+[^}]*?)\s*}")
+
+        def replacer(match):
+            return match.group(1)
+
+        replaced_formula = pattern.sub(replacer, formula)
+        return replaced_formula.replace('"', "")
 
     def remove_chinese_text_wrapping(self, formula):
         pattern = re.compile(r"\\text\s*{\s*([^}]*?[\u4e00-\u9fff]+[^}]*?)\s*}")

+ 17 - 2
paddlex/inference/models/formula_recognition/result.py

@@ -15,6 +15,7 @@
 import copy
 import math
 import os
+import re
 import subprocess
 import tempfile
 from pathlib import Path
@@ -126,6 +127,7 @@ def get_align_equation(equation: str) -> str:
     """
     is_align = False
     equation = str(equation) + "\n"
+
     begin_dict = [
         r"begin{align}",
         r"begin{align*}",
@@ -147,6 +149,17 @@ def get_align_equation(equation: str) -> str:
     return equation
 
 
+def add_text_for_zh_formula(formula: str) -> str:
+    pattern = re.compile(r"([^\x00-\x7F]+)")
+
+    def replacer(match):
+        return f"\\text{{{match.group(1)}}}"
+
+    replaced_formula = pattern.sub(replacer, formula)
+
+    return replaced_formula
+
+
 def generate_tex_file(tex_file_path: str, equation: str) -> None:
     """
     Generates a LaTeX file containing a specific equation.
@@ -161,17 +174,19 @@ def generate_tex_file(tex_file_path: str, equation: str) -> None:
     """
     with custom_open(tex_file_path, "w") as fp:
         start_template = (
-            r"\documentclass{article}" + "\n"
+            r"\documentclass[varwidth]{standalone}" + "\n"
             r"\usepackage{cite}" + "\n"
             r"\usepackage{amsmath,amssymb,amsfonts,upgreek}" + "\n"
             r"\usepackage{graphicx}" + "\n"
             r"\usepackage{textcomp}" + "\n"
+            r"\usepackage{xeCJK}" + "\n"
             r"\DeclareMathSizes{14}{14}{9.8}{7}" + "\n"
             r"\pagestyle{empty}" + "\n"
             r"\begin{document}" + "\n"
             r"\begin{large}" + "\n"
         )
         fp.write(start_template)
+        equation = add_text_for_zh_formula(equation)
         equation = get_align_equation(equation)
         fp.write(equation)
         end_template = r"\end{large}" + "\n" r"\end{document}" + "\n"
@@ -197,7 +212,7 @@ def generate_pdf_file(
                         and None if an error occurred during the pdflatex execution.
     """
     if os.path.exists(tex_path):
-        command = "pdflatex -interaction=nonstopmode -halt-on-error -output-directory={} {}".format(
+        command = "xelatex -interaction=nonstopmode -halt-on-error -output-directory={} {}".format(
             pdf_dir, tex_path
         )
         if is_debug:

+ 3 - 0
paddlex/inference/utils/official_models.py

@@ -250,6 +250,9 @@ PP-LCNet_x1_0_vehicle_attribute_infer.tar",
     "UniMERNet": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/UniMERNet_infer.tar",
     "PP-FormulaNet-S": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-FormulaNet-S_infer.tar",
     "PP-FormulaNet-L": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-FormulaNet-L_infer.tar",
+    "PP-FormulaNet_plus-S": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-FormulaNet_plus-S_infer.tar",
+    "PP-FormulaNet_plus-M": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-FormulaNet_plus-M_infer.tar",
+    "PP-FormulaNet_plus-L": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/PP-FormulaNet_plus-L_infer.tar",
     "FasterRCNN-ResNet34-FPN": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/FasterRCNN-ResNet34-FPN_infer.tar",
     "FasterRCNN-ResNet50": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/FasterRCNN-ResNet50_infer.tar",
     "FasterRCNN-ResNet50-FPN": "https://paddle-model-ecology.bj.bcebos.com/paddlex/official_inference_model/paddle3.0.0/FasterRCNN-ResNet50-FPN_infer.tar",

+ 3 - 0
paddlex/modules/formula_recognition/evaluator.py

@@ -36,6 +36,9 @@ class FormulaRecEvaluator(BaseEvaluator):
             "UniMERNet",
             "PP-FormulaNet-L",
             "PP-FormulaNet-S",
+            "PP-FormulaNet_plus-L",
+            "PP-FormulaNet_plus-M",
+            "PP-FormulaNet_plus-S",
         ):
             self.pdx_config.update_dataset(
                 self.global_config.dataset_dir, "SimpleDataSet"

+ 3 - 0
paddlex/modules/formula_recognition/model_list.py

@@ -17,4 +17,7 @@ MODELS = [
     "UniMERNet",
     "PP-FormulaNet-S",
     "PP-FormulaNet-L",
+    "PP-FormulaNet_plus-S",
+    "PP-FormulaNet_plus-M",
+    "PP-FormulaNet_plus-L",
 ]

+ 3 - 0
paddlex/modules/formula_recognition/trainer.py

@@ -53,6 +53,9 @@ class FormulaRecTrainer(BaseTrainer):
             "UniMERNet",
             "PP-FormulaNet-L",
             "PP-FormulaNet-S",
+            "PP-FormulaNet_plus-L",
+            "PP-FormulaNet_plus-M",
+            "PP-FormulaNet_plus-S",
         ):
             self.pdx_config.update_dataset(
                 self.global_config.dataset_dir, "SimpleDataSet"

+ 1 - 1
paddlex/repo_apis/PaddleOCR_api/configs/PP-FormulaNet-S.yaml

@@ -38,7 +38,7 @@ Architecture:
   in_channels: 3
   Transform:
   Backbone:
-    name: PPHGNetV2_B4
+    name: PPHGNetV2_B4_Formula
     class_num: 1024
 
   Head:

+ 122 - 0
paddlex/repo_apis/PaddleOCR_api/configs/PP-FormulaNet_plus-L.yaml

@@ -0,0 +1,122 @@
+Global:
+  model_name: PP-FormulaNet_plus-L # To use static model for inference.
+  use_gpu: True
+  epoch_num: 10
+  log_smooth_window: 10
+  print_batch_step: 10
+  save_model_dir: ./output/rec/pp_formulanet_plus_l/
+  save_epoch_step: 2
+  # evaluation is run every  417  iterations (1 epoch)(batch_size = 24)   # max_seq_len: 1024
+  eval_batch_step: [0,  417 ]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/datasets/pme_demo/0000013.png
+  infer_mode: False
+  use_space_char: False
+  rec_char_dict_path: &rec_char_dict_path ppocr/utils/dict/unimernet_tokenizer
+  max_new_tokens: &max_new_tokens 2560
+  input_size: &input_size [768, 768]
+  save_res_path: ./output/rec/predicts_pp_formulanet_plus_l.txt
+  allow_resize_largeImg: False
+  start_ema: True
+  d2s_train_image_shape: [1,768,768]
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  weight_decay: 0.05
+  lr:
+    name: LinearWarmupCosine
+    learning_rate: 0.0001
+
+Architecture:
+  model_type: rec
+  algorithm: PP-FormulaNet_plus-L
+  in_channels: 3
+  Transform:
+  Backbone:
+    name: Vary_VIT_B_Formula
+    image_size: 768 
+    encoder_embed_dim: 768
+    encoder_depth: 12
+    encoder_num_heads: 12
+    encoder_global_attn_indexes: [2, 5, 8, 11]
+  Head:
+    name: PPFormulaNet_Head
+    max_new_tokens: *max_new_tokens
+    decoder_start_token_id: 0
+    decoder_ffn_dim: 2048
+    decoder_hidden_size: 512
+    decoder_layers: 8
+    temperature: 0.2
+    do_sample: False
+    top_p: 0.95 
+    encoder_hidden_size: 1024
+    is_export: False
+    length_aware: False 
+    use_parallel: False
+    parallel_step: 0
+
+Loss:
+  name: PPFormulaNet_L_Loss
+
+PostProcess:
+  name:  UniMERNetDecode
+  rec_char_dict_path:  *rec_char_dict_path
+
+Metric:
+  name: LaTeXOCRMetric
+  main_indicator:  exp_rate
+  cal_bleu_score: True
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./ocr_rec_latexocr_dataset_example
+    label_file_list: ["./ocr_rec_latexocr_dataset_example/train.txt"]
+    transforms:
+      - UniMERNetImgDecode:
+          input_size: *input_size
+          random_padding: True
+          random_resize:  True
+          random_crop: True 
+      - UniMERNetTrainTransform: 
+      - LatexImageFormat:
+      - UniMERNetLabelEncode:
+          rec_char_dict_path: *rec_char_dict_path
+          max_seq_len:  *max_new_tokens
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'attention_mask']
+
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 3
+    num_workers: 0
+    collate_fn: UniMERNetCollator
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./ocr_rec_latexocr_dataset_example
+    label_file_list: ["./ocr_rec_latexocr_dataset_example/val.txt"]
+    transforms:
+      - UniMERNetImgDecode:
+          input_size: *input_size
+      - UniMERNetTestTransform:
+      - LatexImageFormat:
+      - UniMERNetLabelEncode:
+          max_seq_len:  *max_new_tokens
+          rec_char_dict_path: *rec_char_dict_path
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'attention_mask', 'filename']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 10
+    num_workers: 0
+    collate_fn: UniMERNetCollator

+ 119 - 0
paddlex/repo_apis/PaddleOCR_api/configs/PP-FormulaNet_plus-M.yaml

@@ -0,0 +1,119 @@
+Global:
+  model_name: PP-FormulaNet_plus-M # To use static model for inference.
+  use_gpu: True
+  epoch_num: 20
+  log_smooth_window: 10
+  print_batch_step: 10
+  save_model_dir: ./output/rec/pp_formulanet_plus_m/
+  save_epoch_step: 2
+  # evaluation is run every 179 iterations (1 epoch)(batch_size = 56)   # max_seq_len: 1024
+  eval_batch_step: [0, 179]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/datasets/pme_demo/0000013.png
+  infer_mode: False
+  use_space_char: False
+  rec_char_dict_path: &rec_char_dict_path  ppocr/utils/dict/unimernet_tokenizer
+  max_new_tokens: &max_new_tokens 2560
+  input_size: &input_size [384, 384]
+  save_res_path: ./output/rec/predicts_pp_formulanet_plus_m.txt
+  allow_resize_largeImg: False
+  start_ema: True
+  d2s_train_image_shape: [1,384,384]
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  weight_decay: 0.05
+  lr:
+    name: LinearWarmupCosine
+    learning_rate: 0.0001
+
+Architecture:
+  model_type: rec
+  algorithm: PP-FormulaNet_plus-M
+  in_channels: 3
+  Transform:
+  Backbone:
+    name: PPHGNetV2_B6_Formula
+    class_num: 1024
+
+  Head:
+    name: PPFormulaNet_Head
+    max_new_tokens: *max_new_tokens
+    decoder_start_token_id: 0
+    decoder_ffn_dim: 2048
+    decoder_hidden_size: 512
+    decoder_layers: 6
+    temperature: 0.2
+    do_sample: False
+    top_p: 0.95 
+    encoder_hidden_size: 2048
+    is_export: False
+    length_aware: False 
+    use_parallel: False
+    parallel_step: 0
+
+Loss:
+  name: PPFormulaNet_L_Loss
+
+PostProcess:
+  name:  UniMERNetDecode
+  rec_char_dict_path: *rec_char_dict_path
+
+Metric:
+  name: LaTeXOCRMetric
+  main_indicator:  exp_rate
+  cal_bleu_score: True
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./ocr_rec_latexocr_dataset_example
+    label_file_list: ["./ocr_rec_latexocr_dataset_example/train.txt"]
+    transforms:
+      - UniMERNetImgDecode:
+          input_size: *input_size
+          random_padding: True
+          random_resize:  True
+          random_crop: True 
+      - UniMERNetTrainTransform: 
+      - LatexImageFormat:
+      - UniMERNetLabelEncode:
+          rec_char_dict_path: *rec_char_dict_path
+          max_seq_len: *max_new_tokens
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'attention_mask']
+
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 14
+    num_workers: 0
+    collate_fn: UniMERNetCollator
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./ocr_rec_latexocr_dataset_example
+    label_file_list: ["./ocr_rec_latexocr_dataset_example/val.txt"]
+    transforms:
+      - UniMERNetImgDecode:
+          input_size:  *input_size
+      - UniMERNetTestTransform:
+      - LatexImageFormat:
+      - UniMERNetLabelEncode:
+          max_seq_len: *max_new_tokens
+          rec_char_dict_path: *rec_char_dict_path
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'attention_mask', 'filename']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 30
+    num_workers: 0
+    collate_fn: UniMERNetCollator

+ 120 - 0
paddlex/repo_apis/PaddleOCR_api/configs/PP-FormulaNet_plus-S.yaml

@@ -0,0 +1,120 @@
+Global:
+  model_name: PP-FormulaNet_plus-S # To use static model for inference.
+  use_gpu: True
+  epoch_num: 20
+  log_smooth_window: 10
+  print_batch_step: 10
+  save_model_dir: ./output/rec/pp_formulanet_plus_s/
+  save_epoch_step: 2
+  # evaluation is run every 179 iterations (1 epoch)(batch_size = 56)   # max_seq_len: 1024
+  eval_batch_step: [0, 179]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/datasets/pme_demo/0000013.png
+  infer_mode: False
+  use_space_char: False
+  rec_char_dict_path: &rec_char_dict_path  ppocr/utils/dict/unimernet_tokenizer
+  max_new_tokens: &max_new_tokens 1024
+  input_size: &input_size [384, 384]
+  save_res_path: ./output/rec/predicts_pp_formulanet_plus_s.txt
+  allow_resize_largeImg: False
+  start_ema: True
+  d2s_train_image_shape: [1,384,384]
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  weight_decay: 0.05
+  lr:
+    name: LinearWarmupCosine
+    learning_rate: 0.0001
+
+Architecture:
+  model_type: rec
+  algorithm: PP-FormulaNet_plus-S
+  in_channels: 3
+  Transform:
+  Backbone:
+    name: PPHGNetV2_B4_Formula
+    class_num: 1024
+
+  Head:
+    name: PPFormulaNet_Head
+    max_new_tokens:  *max_new_tokens
+    decoder_start_token_id: 0
+    decoder_ffn_dim: 1536
+    decoder_hidden_size: 384
+    decoder_layers: 2
+    temperature: 0.2
+    do_sample: False
+    top_p: 0.95 
+    encoder_hidden_size: 2048
+    is_export: False
+    length_aware: True 
+    use_parallel: True,
+    parallel_step: 3
+
+Loss:
+  name: PPFormulaNet_S_Loss
+  parallel_step: 3
+
+PostProcess:
+  name:  UniMERNetDecode
+  rec_char_dict_path: *rec_char_dict_path
+
+Metric:
+  name: LaTeXOCRMetric
+  main_indicator:  exp_rate
+  cal_bleu_score: True
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./ocr_rec_latexocr_dataset_example
+    label_file_list: ["./ocr_rec_latexocr_dataset_example/train.txt"]
+    transforms:
+      - UniMERNetImgDecode:
+          input_size: *input_size
+          random_padding: True
+          random_resize:  True
+          random_crop: True 
+      - UniMERNetTrainTransform: 
+      - LatexImageFormat:
+      - UniMERNetLabelEncode:
+          rec_char_dict_path: *rec_char_dict_path
+          max_seq_len: *max_new_tokens
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'attention_mask']
+
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 14
+    num_workers: 0
+    collate_fn: UniMERNetCollator
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./ocr_rec_latexocr_dataset_example
+    label_file_list: ["./ocr_rec_latexocr_dataset_example/val.txt"]
+    transforms:
+      - UniMERNetImgDecode:
+          input_size:  *input_size
+      - UniMERNetTestTransform:
+      - LatexImageFormat:
+      - UniMERNetLabelEncode:
+          max_seq_len: *max_new_tokens
+          rec_char_dict_path: *rec_char_dict_path
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'attention_mask', 'filename']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 30
+    num_workers: 0
+    collate_fn: UniMERNetCollator

+ 27 - 0
paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py

@@ -70,3 +70,30 @@ register_model_info(
         "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
     }
 )
+
+register_model_info(
+    {
+        "model_name": "PP-FormulaNet_plus-S",
+        "suite": "FormulaRec",
+        "config_path": osp.join(PDX_CONFIG_DIR, "PP-FormulaNet_plus-S.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "PP-FormulaNet_plus-M",
+        "suite": "FormulaRec",
+        "config_path": osp.join(PDX_CONFIG_DIR, "PP-FormulaNet_plus-M.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
+    }
+)
+
+register_model_info(
+    {
+        "model_name": "PP-FormulaNet_plus-L",
+        "suite": "FormulaRec",
+        "config_path": osp.join(PDX_CONFIG_DIR, "PP-FormulaNet_plus-L.yaml"),
+        "supported_apis": ["train", "evaluate", "predict", "export", "infer"],
+    }
+)