소스 검색

fix special issue in formula (#4468)

liuhongen1234567 2 달 전
부모
커밋
9c5f3c9485
2개의 변경된 파일38개의 추가작업 그리고 17개의 파일을 삭제
  1. 5 5
      paddlex/inference/models/formula_recognition/processors.py
  2. 33 12
      paddlex/inference/models/formula_recognition/result.py

+ 5 - 5
paddlex/inference/models/formula_recognition/processors.py

@@ -337,8 +337,8 @@ class LaTeXOCRDecode(object):
             str: The post-processed LaTeX string.
         """
         text_reg = r"(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})"
-        letter = "[a-zA-Z]"
-        noletter = "[\W_^\d]"
+        letter = r"[a-zA-Z]"
+        noletter = r"[\W_^\d]"
         names = [x[0].replace(" ", "") for x in re.findall(text_reg, s)]
         s = re.sub(text_reg, lambda match: str(names.pop(0)), s)
         news = s
@@ -840,8 +840,8 @@ class UniMERNetDecode(object):
             str: Normalized string.
         """
         text_reg = r"(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})"
-        letter = "[a-zA-Z]"
-        noletter = "[\W_^\d]"
+        letter = r"[a-zA-Z]"
+        noletter = r"[\W_^\d]"
         names = []
         for x in re.findall(text_reg, s):
             pattern = r"\\[a-zA-Z]+"
@@ -874,7 +874,7 @@ class UniMERNetDecode(object):
         return s.replace("XXXXXXX", " ")
 
     def remove_chinese_text_wrapping(self, formula):
-        pattern = re.compile(r"\\text\s*{\s*([^}]*?[\u4e00-\u9fff]+[^}]*?)\s*}")
+        pattern = re.compile(r"\\text\s*{([^{}]*[\u4e00-\u9fff]+[^{}]*)}")
 
         def replacer(match):
             return match.group(1)

+ 33 - 12
paddlex/inference/models/formula_recognition/result.py

@@ -164,18 +164,39 @@ def generate_tex_file(tex_file_path: str, equation: str) -> None:
         equation (str): The LaTeX equation to be written into the file.
     """
     with custom_open(tex_file_path, "w") as fp:
-        start_template = (
-            r"\documentclass[varwidth]{standalone}" + "\n"
-            r"\usepackage{cite}" + "\n"
-            r"\usepackage{amsmath,amssymb,amsfonts,upgreek}" + "\n"
-            r"\usepackage{graphicx}" + "\n"
-            r"\usepackage{textcomp}" + "\n"
-            r"\usepackage{xeCJK}" + "\n"
-            r"\DeclareMathSizes{14}{14}{9.8}{7}" + "\n"
-            r"\pagestyle{empty}" + "\n"
-            r"\begin{document}" + "\n"
-            r"\begin{large}" + "\n"
-        )
+        start_template = r"""
+            \documentclass[varwidth]{standalone}
+            \usepackage{cite}
+            \usepackage{amsmath,amssymb,amsfonts,upgreek}
+            \usepackage{graphicx}
+            \usepackage{textcomp}
+            \usepackage{xeCJK}
+            \DeclareMathSizes{14}{14}{9.8}{7}
+            \pagestyle{empty}
+            \makeatletter
+            \def\x@arrow{\DOTSB\Relbar}
+            \def\xlongequalsignfill@{\arrowfill@\x@arrow\Relbar\x@arrow}
+            \newcommand{\xlongequal}[2][]{\ext@arrow 0099\xlongequalsignfill@{#1}{#2}}
+            \def\xLongleftrightarrowfill@{\arrowfill@\Longleftarrow\Relbar\Longrightarrow}
+            \newcommand{\xLongleftrightarrow}[2][]{\ext@arrow 0099\xLongleftrightarrowfill@{#1}{#2}}
+            \def\xlongleftrightarrowfill@{\arrowfill@\longleftarrow\relbar\longrightarrow}
+            \newcommand{\xlongleftrightarrow}[2][]{\ext@arrow 0099\xlongleftrightarrowfill@{#1}{#2}}
+            \def\xLeftrightarrowfill@{\arrowfill@\Leftarrow\Relbar\Rightarrow}
+            \newcommand{\xLeftrightarrow}[2][]{\ext@arrow 0099\xLeftrightarrowfill@{#1}{#2}}
+            \def\xleftrightarrowfill@{\arrowfill@\leftarrow\relbar\rightarrow}
+            \newcommand{\xleftrightarrow}[2][]{\ext@arrow 0099\xleftrightarrowfill@{#1}{#2}}
+            \def\xLongleftarrowfill@{\arrowfill@\Longleftarrow\Relbar\Relbar}
+            \newcommand{\xLongleftarrow}[2][]{\ext@arrow 0099\xLongleftarrowfill@{#1}{#2}}
+            \def\xLongrightarrowfill@{\arrowfill@\Relbar\Relbar\Longrightarrow}
+            \newcommand{\xLongrightarrow}[2][]{\ext@arrow 0099\xLongrightarrowfill@{#1}{#2}}
+            \def\xlongleftarrowfill@{\arrowfill@\longleftarrow\relbar\relbar}
+            \newcommand{\xlongleftarrow}[2][]{\ext@arrow 0099\xlongleftarrowfill@{#1}{#2}}
+            \def\xlongrightarrowfill@{\arrowfill@\relbar\relbar\longrightarrow}
+            \newcommand{\xlongrightarrow}[2][]{\ext@arrow 0099\xlongrightarrowfill@{#1}{#2}}
+            \makeatother
+            \begin{document}
+            \begin{large}
+        """
         fp.write(start_template)
         equation = add_text_for_zh_formula(equation)
         equation = get_align_equation(equation)