formula_rec.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import sys
  16. import cv2
  17. import math
  18. import random
  19. import tempfile
  20. import subprocess
  21. import numpy as np
  22. from pathlib import Path
  23. from PIL import Image, ImageDraw
  24. from .base import BaseResult, CVResult
  25. from ...utils import logging
  26. from .ocr import draw_box_txt_fine
  27. from ...utils.fonts import PINGFANG_FONT_FILE_PATH
  28. class FormulaRecResult(CVResult):
  29. def _to_str(self, *args, **kwargs):
  30. return super()._to_str(*args, **kwargs).replace("\\\\", "\\")
  31. def _to_img(
  32. self,
  33. ):
  34. """Draw formula on image"""
  35. try:
  36. env_valid()
  37. except subprocess.CalledProcessError as e:
  38. logging.warning(
  39. "Please refer to 2.3 Formula Recognition Pipeline Visualization in Formula Recognition Pipeline Tutorial to install the LaTeX rendering engine at first."
  40. )
  41. return None
  42. image = self._img_reader.read(self["input_path"])
  43. rec_formula = str(self["rec_text"])
  44. image = np.array(image.convert("RGB"))
  45. xywh = crop_white_area(image)
  46. if xywh is not None:
  47. x, y, w, h = xywh
  48. image = image[y : y + h, x : x + w]
  49. image = Image.fromarray(image)
  50. image_width, image_height = image.size
  51. box = [[0, 0], [image_width, 0], [image_width, image_height], [0, image_height]]
  52. try:
  53. img_formula = draw_formula_module(
  54. image.size, box, rec_formula, is_debug=False
  55. )
  56. img_formula = Image.fromarray(img_formula)
  57. render_width, render_height = img_formula.size
  58. resize_height = render_height
  59. resize_width = int(resize_height * image_width / image_height)
  60. image = image.resize((resize_width, resize_height), Image.LANCZOS)
  61. new_image_width = image.width + int(render_width) + 10
  62. new_image = Image.new(
  63. "RGB", (new_image_width, render_height), (255, 255, 255)
  64. )
  65. new_image.paste(image, (0, 0))
  66. new_image.paste(img_formula, (image.width + 10, 0))
  67. return new_image
  68. except subprocess.CalledProcessError as e:
  69. logging.warning("Syntax error detected in formula, rendering failed.")
  70. return None
  71. class FormulaResult(CVResult):
  72. def _to_str(self, *args, **kwargs):
  73. return super()._to_str(*args, **kwargs).replace("\\\\", "\\")
  74. def _to_img(
  75. self,
  76. ):
  77. """draw formula result"""
  78. try:
  79. env_valid()
  80. except subprocess.CalledProcessError as e:
  81. logging.warning(
  82. "Please refer to 2.3 Formula Recognition Pipeline Visualization in Formula Recognition Pipeline Tutorial to install the LaTeX rendering engine at first."
  83. )
  84. return None
  85. boxes = self["dt_polys"]
  86. formulas = self["rec_formula"]
  87. image = self._img_reader.read(self["input_path"])
  88. h, w = image.height, image.width
  89. img_left = image.copy()
  90. img_right = np.ones((h, w, 3), dtype=np.uint8) * 255
  91. random.seed(0)
  92. draw_left = ImageDraw.Draw(img_left)
  93. if formulas is None or len(formulas) != len(boxes):
  94. formulas = [None] * len(boxes)
  95. for idx, (box, formula) in enumerate(zip(boxes, formulas)):
  96. try:
  97. color = (
  98. random.randint(0, 255),
  99. random.randint(0, 255),
  100. random.randint(0, 255),
  101. )
  102. box = np.array(box)
  103. pts = [(x, y) for x, y in box.tolist()]
  104. draw_left.polygon(pts, outline=color, width=8)
  105. draw_left.polygon(box, fill=color)
  106. img_right_text = draw_box_formula_fine(
  107. (w, h),
  108. box,
  109. formula,
  110. is_debug=False,
  111. )
  112. pts = np.array(box, np.int32).reshape((-1, 1, 2))
  113. cv2.polylines(img_right_text, [pts], True, color, 1)
  114. img_right = cv2.bitwise_and(img_right, img_right_text)
  115. except subprocess.CalledProcessError as e:
  116. logging.warning("Syntax error detected in formula, rendering failed.")
  117. continue
  118. img_left = Image.blend(image, img_left, 0.5)
  119. img_show = Image.new("RGB", (int(w * 2), h), (255, 255, 255))
  120. img_show.paste(img_left, (0, 0, w, h))
  121. img_show.paste(Image.fromarray(img_right), (w, 0, w * 2, h))
  122. return img_show
  123. class FormulaVisualResult(BaseResult):
  124. def __init__(self, data, page_id=None, src_input_name=None):
  125. super().__init__(data)
  126. self.page_id = page_id
  127. self.src_input_name = src_input_name
  128. def _to_str(self, *args, **kwargs):
  129. return super()._to_str(*args, **kwargs).replace("\\\\", "\\")
  130. def get_target_name(self, save_path):
  131. if self.src_input_name.endswith(".pdf"):
  132. save_path = (
  133. Path(save_path)
  134. / f"{Path(self.src_input_name).stem}_pdf"
  135. / Path("page_{:04d}".format(self.page_id + 1))
  136. )
  137. else:
  138. save_path = Path(save_path) / f"{Path(self.src_input_name).stem}"
  139. return save_path
  140. def save_to_json(self, save_path):
  141. if not save_path.lower().endswith(("json")):
  142. save_path = self.get_target_name(save_path)
  143. else:
  144. save_path = Path(save_path).stem
  145. formula_save_path = f"{save_path}_formula.jpg"
  146. if not str(save_path).endswith(".json"):
  147. save_path = "{}.json".format(save_path)
  148. super().save_to_json(save_path)
  149. def save_to_img(self, save_path):
  150. if not save_path.lower().endswith((".jpg", ".png")):
  151. save_path = self.get_target_name(save_path)
  152. else:
  153. save_path = Path(save_path).stem
  154. formula_save_path = f"{save_path}_formula.jpg"
  155. formula_result = self["formula_result"]
  156. if formula_result:
  157. formula_result.save_to_img(formula_save_path)
  158. def get_align_equation(equation):
  159. is_align = False
  160. equation = str(equation) + "\n"
  161. begin_dict = [
  162. r"begin{align}",
  163. r"begin{align*}",
  164. ]
  165. for begin_sym in begin_dict:
  166. if begin_sym in equation:
  167. is_align = True
  168. break
  169. if not is_align:
  170. equation = (
  171. r"\begin{equation}"
  172. + "\n"
  173. + equation.strip()
  174. + r"\nonumber"
  175. + "\n"
  176. + r"\end{equation}"
  177. + "\n"
  178. )
  179. return equation
  180. def generate_tex_file(tex_file_path, equation):
  181. with open(tex_file_path, "w") as fp:
  182. start_template = (
  183. r"\documentclass{article}" + "\n"
  184. r"\usepackage{cite}" + "\n"
  185. r"\usepackage{amsmath,amssymb,amsfonts}" + "\n"
  186. r"\usepackage{graphicx}" + "\n"
  187. r"\usepackage{textcomp}" + "\n"
  188. r"\DeclareMathSizes{14}{14}{9.8}{7}" + "\n"
  189. r"\pagestyle{empty}" + "\n"
  190. r"\begin{document}" + "\n"
  191. r"\begin{large}" + "\n"
  192. )
  193. fp.write(start_template)
  194. equation = get_align_equation(equation)
  195. fp.write(equation)
  196. end_template = r"\end{large}" + "\n" r"\end{document}" + "\n"
  197. fp.write(end_template)
  198. def generate_pdf_file(tex_path, pdf_dir, is_debug=False):
  199. if os.path.exists(tex_path):
  200. command = "pdflatex -halt-on-error -output-directory={} {}".format(
  201. pdf_dir, tex_path
  202. )
  203. if is_debug:
  204. subprocess.check_call(command, shell=True)
  205. else:
  206. devNull = open(os.devnull, "w")
  207. subprocess.check_call(
  208. command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True
  209. )
  210. def crop_white_area(image):
  211. image = np.array(image).astype("uint8")
  212. gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  213. _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
  214. contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  215. if len(contours) > 0:
  216. x, y, w, h = cv2.boundingRect(np.concatenate(contours))
  217. return [x, y, w, h]
  218. else:
  219. return None
  220. def pdf2img(pdf_path, img_path, is_padding=False):
  221. import fitz
  222. pdfDoc = fitz.open(pdf_path)
  223. if pdfDoc.page_count != 1:
  224. return None
  225. for pg in range(pdfDoc.page_count):
  226. page = pdfDoc[pg]
  227. rotate = int(0)
  228. zoom_x = 2
  229. zoom_y = 2
  230. mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate)
  231. pix = page.get_pixmap(matrix=mat, alpha=False)
  232. if not os.path.exists(img_path):
  233. os.makedirs(img_path)
  234. pix._writeIMG(img_path, 7, 100)
  235. img = cv2.imread(img_path)
  236. xywh = crop_white_area(img)
  237. if xywh is not None:
  238. x, y, w, h = xywh
  239. img = img[y : y + h, x : x + w]
  240. if is_padding:
  241. img = cv2.copyMakeBorder(
  242. img, 30, 30, 30, 30, cv2.BORDER_CONSTANT, value=(255, 255, 255)
  243. )
  244. return img
  245. return None
  246. def draw_formula_module(img_size, box, formula, is_debug=False):
  247. """draw box formula for module"""
  248. box_width, box_height = img_size
  249. with tempfile.TemporaryDirectory() as td:
  250. tex_file_path = os.path.join(td, "temp.tex")
  251. pdf_file_path = os.path.join(td, "temp.pdf")
  252. img_file_path = os.path.join(td, "temp.jpg")
  253. generate_tex_file(tex_file_path, formula)
  254. if os.path.exists(tex_file_path):
  255. generate_pdf_file(tex_file_path, td, is_debug)
  256. formula_img = None
  257. if os.path.exists(pdf_file_path):
  258. formula_img = pdf2img(pdf_file_path, img_file_path, is_padding=False)
  259. if formula_img is not None:
  260. return formula_img
  261. else:
  262. img_right_text = draw_box_txt_fine(
  263. img_size, box, "Rendering Failed", PINGFANG_FONT_FILE_PATH
  264. )
  265. return img_right_text
  266. def env_valid():
  267. with tempfile.TemporaryDirectory() as td:
  268. tex_file_path = os.path.join(td, "temp.tex")
  269. pdf_file_path = os.path.join(td, "temp.pdf")
  270. img_file_path = os.path.join(td, "temp.jpg")
  271. formula = "a+b=c"
  272. is_debug = False
  273. generate_tex_file(tex_file_path, formula)
  274. if os.path.exists(tex_file_path):
  275. generate_pdf_file(tex_file_path, td, is_debug)
  276. if os.path.exists(pdf_file_path):
  277. formula_img = pdf2img(pdf_file_path, img_file_path, is_padding=False)
  278. def draw_box_formula_fine(img_size, box, formula, is_debug=False):
  279. """draw box formula for pipeline"""
  280. box_height = int(
  281. math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
  282. )
  283. box_width = int(
  284. math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
  285. )
  286. with tempfile.TemporaryDirectory() as td:
  287. tex_file_path = os.path.join(td, "temp.tex")
  288. pdf_file_path = os.path.join(td, "temp.pdf")
  289. img_file_path = os.path.join(td, "temp.jpg")
  290. generate_tex_file(tex_file_path, formula)
  291. if os.path.exists(tex_file_path):
  292. generate_pdf_file(tex_file_path, td, is_debug)
  293. formula_img = None
  294. if os.path.exists(pdf_file_path):
  295. formula_img = pdf2img(pdf_file_path, img_file_path, is_padding=False)
  296. if formula_img is not None:
  297. formula_h, formula_w = formula_img.shape[:-1]
  298. resize_height = box_height
  299. resize_width = formula_w * resize_height / formula_h
  300. formula_img = cv2.resize(
  301. formula_img, (int(resize_width), int(resize_height))
  302. )
  303. formula_h, formula_w = formula_img.shape[:-1]
  304. pts1 = np.float32(
  305. [[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]]
  306. )
  307. pts2 = np.array(box, dtype=np.float32)
  308. M = cv2.getPerspectiveTransform(pts1, pts2)
  309. formula_img = np.array(formula_img, dtype=np.uint8)
  310. img_right_text = cv2.warpPerspective(
  311. formula_img,
  312. M,
  313. img_size,
  314. flags=cv2.INTER_NEAREST,
  315. borderMode=cv2.BORDER_CONSTANT,
  316. borderValue=(255, 255, 255),
  317. )
  318. else:
  319. img_right_text = draw_box_txt_fine(
  320. img_size, box, "Rendering Failed", PINGFANG_FONT_FILE_PATH
  321. )
  322. return img_right_text