result.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import copy
  15. import math
  16. import os
  17. import re
  18. import subprocess
  19. import tempfile
  20. from typing import List, Optional
  21. import numpy as np
  22. import PIL
  23. from PIL import Image, ImageDraw, ImageFont
  24. from ....utils import logging
  25. from ....utils.deps import function_requires_deps, is_dep_available
  26. from ....utils.file_interface import custom_open
  27. from ....utils.fonts import PINGFANG_FONT
  28. from ...common.result import BaseCVResult, JsonMixin
  29. if is_dep_available("opencv-contrib-python"):
  30. import cv2
  31. if is_dep_available("pypdfium2"):
  32. import pypdfium2 as pdfium
  33. class FormulaRecResult(BaseCVResult):
  34. def _to_str(self, *args, **kwargs):
  35. data = copy.deepcopy(self)
  36. data.pop("input_img")
  37. _str = JsonMixin._to_str(data, *args, **kwargs)["res"]
  38. return {"res": _str}
  39. def _to_json(self, *args, **kwargs):
  40. data = copy.deepcopy(self)
  41. data.pop("input_img")
  42. return JsonMixin._to_json(data, *args, **kwargs)
  43. def _to_img(
  44. self,
  45. ) -> Image.Image:
  46. """
  47. Draws a recognized formula on an image.
  48. This method processes an input image to recognize and render a LaTeX formula.
  49. It overlays the rendered formula onto the input image and returns the combined image.
  50. If the LaTeX rendering engine is not installed or a syntax error is detected,
  51. it logs a warning and returns the original image.
  52. Returns:
  53. Image.Image: An image with the recognized formula rendered alongside the original image.
  54. """
  55. image = Image.fromarray(self["input_img"])
  56. try:
  57. env_valid()
  58. except subprocess.CalledProcessError as e:
  59. logging.warning(
  60. "Please refer to 2.3 Formula Recognition Pipeline Visualization in Formula Recognition Pipeline Tutorial to install the LaTeX rendering engine at first."
  61. )
  62. return {"res": image}
  63. rec_formula = str(self["rec_formula"])
  64. image = np.array(image.convert("RGB"))
  65. xywh = crop_white_area(image)
  66. if xywh is not None:
  67. x, y, w, h = xywh
  68. image = image[y : y + h, x : x + w]
  69. image = Image.fromarray(image)
  70. image_width, image_height = image.size
  71. box = [[0, 0], [image_width, 0], [image_width, image_height], [0, image_height]]
  72. try:
  73. img_formula = draw_formula_module(
  74. image.size, box, rec_formula, is_debug=False
  75. )
  76. img_formula = Image.fromarray(img_formula)
  77. render_width, render_height = img_formula.size
  78. resize_height = render_height
  79. resize_width = int(resize_height * image_width / image_height)
  80. image = image.resize((resize_width, resize_height), Image.LANCZOS)
  81. new_image_width = image.width + int(render_width) + 10
  82. new_image = Image.new(
  83. "RGB", (new_image_width, render_height), (255, 255, 255)
  84. )
  85. new_image.paste(image, (0, 0))
  86. new_image.paste(img_formula, (image.width + 10, 0))
  87. return {"res": new_image}
  88. except subprocess.CalledProcessError as e:
  89. logging.warning("Syntax error detected in formula, rendering failed.")
  90. return {"res": image}
  91. def get_align_equation(equation: str) -> str:
  92. """
  93. Wraps an equation in LaTeX environment tags if not already aligned.
  94. This function checks if a given LaTeX equation contains any alignment tags (`align` or `align*`).
  95. If the equation does not contain these tags, it wraps the equation in `equation` and `nonumber` tags.
  96. Args:
  97. equation (str): The LaTeX equation to be checked and potentially modified.
  98. Returns:
  99. str: The modified equation with appropriate LaTeX tags for alignment.
  100. """
  101. is_align = False
  102. equation = str(equation) + "\n"
  103. begin_dict = [
  104. r"begin{align}",
  105. r"begin{align*}",
  106. ]
  107. for begin_sym in begin_dict:
  108. if begin_sym in equation:
  109. is_align = True
  110. break
  111. if not is_align:
  112. equation = (
  113. r"\begin{equation}"
  114. + "\n"
  115. + equation.strip()
  116. + r"\nonumber"
  117. + "\n"
  118. + r"\end{equation}"
  119. + "\n"
  120. )
  121. return equation
  122. def add_text_for_zh_formula(formula: str) -> str:
  123. pattern = re.compile(r"([^\x00-\x7F]+)")
  124. def replacer(match):
  125. return f"\\text{{{match.group(1)}}}"
  126. replaced_formula = pattern.sub(replacer, formula)
  127. return replaced_formula
  128. def generate_tex_file(tex_file_path: str, equation: str) -> None:
  129. """
  130. Generates a LaTeX file containing a specific equation.
  131. This function creates a LaTeX file at the specified file path, writing the necessary
  132. LaTeX preamble and wrapping the provided equation in a document structure. The equation
  133. is processed to ensure it includes alignment tags if necessary.
  134. Args:
  135. tex_file_path (str): The file path where the LaTeX file will be saved.
  136. equation (str): The LaTeX equation to be written into the file.
  137. """
  138. with custom_open(tex_file_path, "w") as fp:
  139. start_template = r"""
  140. \documentclass[varwidth]{standalone}
  141. \usepackage{cite}
  142. \usepackage{amsmath,amssymb,amsfonts,upgreek}
  143. \usepackage{graphicx}
  144. \usepackage{textcomp}
  145. \usepackage{xeCJK}
  146. \DeclareMathSizes{14}{14}{9.8}{7}
  147. \pagestyle{empty}
  148. \makeatletter
  149. \def\x@arrow{\DOTSB\Relbar}
  150. \def\xlongequalsignfill@{\arrowfill@\x@arrow\Relbar\x@arrow}
  151. \newcommand{\xlongequal}[2][]{\ext@arrow 0099\xlongequalsignfill@{#1}{#2}}
  152. \def\xLongleftrightarrowfill@{\arrowfill@\Longleftarrow\Relbar\Longrightarrow}
  153. \newcommand{\xLongleftrightarrow}[2][]{\ext@arrow 0099\xLongleftrightarrowfill@{#1}{#2}}
  154. \def\xlongleftrightarrowfill@{\arrowfill@\longleftarrow\relbar\longrightarrow}
  155. \newcommand{\xlongleftrightarrow}[2][]{\ext@arrow 0099\xlongleftrightarrowfill@{#1}{#2}}
  156. \def\xLeftrightarrowfill@{\arrowfill@\Leftarrow\Relbar\Rightarrow}
  157. \newcommand{\xLeftrightarrow}[2][]{\ext@arrow 0099\xLeftrightarrowfill@{#1}{#2}}
  158. \def\xleftrightarrowfill@{\arrowfill@\leftarrow\relbar\rightarrow}
  159. \newcommand{\xleftrightarrow}[2][]{\ext@arrow 0099\xleftrightarrowfill@{#1}{#2}}
  160. \def\xLongleftarrowfill@{\arrowfill@\Longleftarrow\Relbar\Relbar}
  161. \newcommand{\xLongleftarrow}[2][]{\ext@arrow 0099\xLongleftarrowfill@{#1}{#2}}
  162. \def\xLongrightarrowfill@{\arrowfill@\Relbar\Relbar\Longrightarrow}
  163. \newcommand{\xLongrightarrow}[2][]{\ext@arrow 0099\xLongrightarrowfill@{#1}{#2}}
  164. \def\xlongleftarrowfill@{\arrowfill@\longleftarrow\relbar\relbar}
  165. \newcommand{\xlongleftarrow}[2][]{\ext@arrow 0099\xlongleftarrowfill@{#1}{#2}}
  166. \def\xlongrightarrowfill@{\arrowfill@\relbar\relbar\longrightarrow}
  167. \newcommand{\xlongrightarrow}[2][]{\ext@arrow 0099\xlongrightarrowfill@{#1}{#2}}
  168. \makeatother
  169. \begin{document}
  170. \begin{large}
  171. """
  172. fp.write(start_template)
  173. equation = add_text_for_zh_formula(equation)
  174. equation = get_align_equation(equation)
  175. fp.write(equation)
  176. end_template = r"\end{large}" + "\n" r"\end{document}" + "\n"
  177. fp.write(end_template)
  178. def generate_pdf_file(
  179. tex_path: str, pdf_dir: str, is_debug: bool = False
  180. ) -> Optional[bool]:
  181. """
  182. Generates a PDF file from a LaTeX file using pdflatex.
  183. This function checks if the specified LaTeX file exists, and then runs pdflatex to generate a PDF file
  184. in the specified directory. It can run in debug mode to show detailed output or in silent mode.
  185. Args:
  186. tex_path (str): The path to the LaTeX file.
  187. pdf_dir (str): The directory where the PDF file will be saved.
  188. is_debug (bool, optional): If True, runs pdflatex with detailed output. Defaults to False.
  189. Returns:
  190. Optional[bool]: Returns True if the PDF was generated successfully, False if the LaTeX file does not exist,
  191. and None if an error occurred during the pdflatex execution.
  192. """
  193. if os.path.exists(tex_path):
  194. command = "xelatex -interaction=nonstopmode -halt-on-error -output-directory={} {}".format(
  195. pdf_dir, tex_path
  196. )
  197. if is_debug:
  198. subprocess.check_call(command, shell=True)
  199. else:
  200. custom_open(os.devnull, "w")
  201. subprocess.check_call(
  202. command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True
  203. )
  204. @function_requires_deps("opencv-contrib-python")
  205. def crop_white_area(image: np.ndarray) -> Optional[List[int]]:
  206. """
  207. Finds and returns the bounding box of the non-white area in an image.
  208. This function converts an image to grayscale and uses binary thresholding to
  209. find contours. It then calculates the bounding rectangle around the non-white
  210. areas of the image.
  211. Args:
  212. image (np.ndarray): The input image as a NumPy array.
  213. Returns:
  214. Optional[List[int]]: A list [x, y, w, h] representing the bounding box of
  215. the non-white area, or None if no such area is found.
  216. """
  217. image = np.array(image).astype("uint8")
  218. gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  219. _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
  220. contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  221. if len(contours) > 0:
  222. x, y, w, h = cv2.boundingRect(np.concatenate(contours))
  223. return [x, y, w, h]
  224. else:
  225. return None
  226. @function_requires_deps("pypdfium2", "opencv-contrib-python")
  227. def pdf2img(pdf_path: str, img_path: str, is_padding: bool = False):
  228. """
  229. Converts a single-page PDF to an image, optionally cropping white areas and adding padding.
  230. Args:
  231. pdf_path (str): The path to the PDF file.
  232. img_path (str): The path where the image will be saved.
  233. is_padding (bool): If True, adds a 30-pixel white padding around the image.
  234. Returns:
  235. np.ndarray: The resulting image as a NumPy array, or None if the PDF is not single-page.
  236. """
  237. pdfDoc = pdfium.PdfDocument(pdf_path)
  238. try:
  239. if len(pdfDoc) != 1:
  240. return None
  241. for page in pdfDoc:
  242. rotate = int(0)
  243. zoom = 2
  244. img = page.render(scale=zoom, rotation=rotate).to_pil()
  245. img = img.convert("RGB")
  246. img = np.array(img)
  247. img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
  248. xywh = crop_white_area(img)
  249. if xywh is not None:
  250. x, y, w, h = xywh
  251. img = img[y : y + h, x : x + w]
  252. if is_padding:
  253. img = cv2.copyMakeBorder(
  254. img, 30, 30, 30, 30, cv2.BORDER_CONSTANT, value=(255, 255, 255)
  255. )
  256. return img
  257. finally:
  258. pdfDoc.close()
  259. return None
  260. def draw_formula_module(
  261. img_size: tuple, box: list, formula: str, is_debug: bool = False
  262. ):
  263. """
  264. Draw box formula for module.
  265. Args:
  266. img_size (tuple): The size of the image as (width, height).
  267. box (list): The coordinates for the bounding box.
  268. formula (str): The LaTeX formula to render.
  269. is_debug (bool): If True, retains intermediate files for debugging purposes.
  270. Returns:
  271. np.ndarray: The resulting image with the formula or an error message.
  272. """
  273. box_width, box_height = img_size
  274. with tempfile.TemporaryDirectory() as td:
  275. tex_file_path = os.path.join(td, "temp.tex")
  276. pdf_file_path = os.path.join(td, "temp.pdf")
  277. img_file_path = os.path.join(td, "temp.jpg")
  278. generate_tex_file(tex_file_path, formula)
  279. if os.path.exists(tex_file_path):
  280. generate_pdf_file(tex_file_path, td, is_debug)
  281. formula_img = None
  282. if os.path.exists(pdf_file_path):
  283. formula_img = pdf2img(pdf_file_path, img_file_path, is_padding=False)
  284. if formula_img is not None:
  285. return formula_img
  286. else:
  287. img_right_text = draw_box_txt_fine(
  288. img_size, box, "Rendering Failed", PINGFANG_FONT.path
  289. )
  290. return img_right_text
  291. def env_valid() -> bool:
  292. """
  293. Validates if the environment is correctly set up to convert LaTeX formulas to images.
  294. Returns:
  295. bool: True if the environment is valid and the conversion is successful, False otherwise.
  296. """
  297. with tempfile.TemporaryDirectory() as td:
  298. tex_file_path = os.path.join(td, "temp.tex")
  299. pdf_file_path = os.path.join(td, "temp.pdf")
  300. img_file_path = os.path.join(td, "temp.jpg")
  301. formula = "a+b=c"
  302. is_debug = False
  303. generate_tex_file(tex_file_path, formula)
  304. if os.path.exists(tex_file_path):
  305. generate_pdf_file(tex_file_path, td, is_debug)
  306. if os.path.exists(pdf_file_path):
  307. formula_img = pdf2img(pdf_file_path, img_file_path, is_padding=False)
  308. @function_requires_deps("opencv-contrib-python")
  309. def draw_box_txt_fine(img_size: tuple, box: list, txt: str, font_path: str):
  310. """
  311. Draw box text.
  312. Args:
  313. img_size (tuple): Size of the image as (width, height).
  314. box (list): List of four points defining the box, each point is a tuple (x, y).
  315. txt (str): The text to draw inside the box.
  316. font_path (str): Path to the font file to be used for drawing text.
  317. Returns:
  318. np.ndarray: Image array with the text drawn and transformed to fit the box.
  319. """
  320. box_height = int(
  321. math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
  322. )
  323. box_width = int(
  324. math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
  325. )
  326. if box_height > 2 * box_width and box_height > 30:
  327. img_text = Image.new("RGB", (box_height, box_width), (255, 255, 255))
  328. draw_text = ImageDraw.Draw(img_text)
  329. if txt:
  330. font = create_font(txt, (box_height, box_width), font_path)
  331. draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
  332. img_text = img_text.transpose(Image.ROTATE_270)
  333. else:
  334. img_text = Image.new("RGB", (box_width, box_height), (255, 255, 255))
  335. draw_text = ImageDraw.Draw(img_text)
  336. if txt:
  337. font = create_font(txt, (box_width, box_height), font_path)
  338. draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
  339. pts1 = np.float32(
  340. [[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]]
  341. )
  342. pts2 = np.array(box, dtype=np.float32)
  343. M = cv2.getPerspectiveTransform(pts1, pts2)
  344. img_text = np.array(img_text, dtype=np.uint8)
  345. img_right_text = cv2.warpPerspective(
  346. img_text,
  347. M,
  348. img_size,
  349. flags=cv2.INTER_NEAREST,
  350. borderMode=cv2.BORDER_CONSTANT,
  351. borderValue=(255, 255, 255),
  352. )
  353. return img_right_text
  354. def create_font(txt: str, sz: tuple, font_path: str) -> ImageFont.FreeTypeFont:
  355. """
  356. Creates a font object with a size that ensures the text fits within the specified dimensions.
  357. Args:
  358. txt (str): The text to fit.
  359. sz (tuple): The target size as (width, height).
  360. font_path (str): The path to the font file.
  361. Returns:
  362. ImageFont.FreeTypeFont: A PIL font object at the appropriate size.
  363. """
  364. font_size = int(sz[1] * 0.8)
  365. font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
  366. if int(PIL.__version__.split(".")[0]) < 10:
  367. length = font.getsize(txt)[0]
  368. else:
  369. length = font.getlength(txt)
  370. if length > sz[0]:
  371. font_size = int(font_size * sz[0] / length)
  372. font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
  373. return font