result.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os, sys
  15. from typing import Tuple, List, Dict, Any
  16. import cv2
  17. import PIL
  18. import math
  19. import copy
  20. import random
  21. import tempfile
  22. import subprocess
  23. import numpy as np
  24. from pathlib import Path
  25. import PIL
  26. from PIL import Image, ImageDraw, ImageFont
  27. from ...common.result import BaseCVResult, JsonMixin, ImgMixin, StrMixin
  28. from ....utils import logging
  29. from ....utils.fonts import PINGFANG_FONT_FILE_PATH
  30. from ...models.formula_recognition.result import (
  31. get_align_equation,
  32. generate_tex_file,
  33. generate_pdf_file,
  34. env_valid,
  35. pdf2img,
  36. create_font,
  37. crop_white_area,
  38. draw_box_txt_fine,
  39. draw_formula_module,
  40. )
  41. class FormulaRecognitionResult(BaseCVResult):
  42. """Formula Recognition Result"""
  43. def _get_input_fn(self):
  44. fn = super()._get_input_fn()
  45. if (page_idx := self["page_index"]) is not None:
  46. fp = Path(fn)
  47. stem, suffix = fp.stem, fp.suffix
  48. return f"{stem}_{page_idx}{suffix}"
  49. else:
  50. return fn
  51. def _to_img(self) -> Dict[str, Image.Image]:
  52. """
  53. Converts the internal data to a PIL Image with detection and recognition results.
  54. Returns:
  55. Dict[str, Image.Image]: An image with detection boxes, texts, and scores blended on it.
  56. """
  57. image = Image.fromarray(self["doc_preprocessor_res"]["output_img"])
  58. res_img_dict = {}
  59. model_settings = self["model_settings"]
  60. if model_settings["use_doc_preprocessor"]:
  61. res_img_dict.update(**self["doc_preprocessor_res"].img)
  62. layout_det_res = self["layout_det_res"]
  63. if len(layout_det_res) > 0:
  64. res_img_dict["layout_det_res"] = layout_det_res.img["res"]
  65. try:
  66. env_valid()
  67. except subprocess.CalledProcessError as e:
  68. logging.warning(
  69. "Please refer to 2.3 Formula Recognition Pipeline Visualization in Formula Recognition Pipeline Tutorial to install the LaTeX rendering engine at first."
  70. )
  71. res_img_dict["formula_res_img"] = image
  72. return res_img_dict
  73. if len(layout_det_res) <= 0:
  74. image = np.array(image.convert("RGB"))
  75. rec_formula = self["formula_res_list"][0]["rec_formula"]
  76. xywh = crop_white_area(image)
  77. if xywh is not None:
  78. x, y, w, h = xywh
  79. image = image[y : y + h, x : x + w]
  80. image = Image.fromarray(image)
  81. image_width, image_height = image.size
  82. box = [
  83. [0, 0],
  84. [image_width, 0],
  85. [image_width, image_height],
  86. [0, image_height],
  87. ]
  88. try:
  89. img_formula = draw_formula_module(
  90. image.size, box, rec_formula, is_debug=False
  91. )
  92. img_formula = Image.fromarray(img_formula)
  93. render_width, render_height = img_formula.size
  94. resize_height = render_height
  95. resize_width = int(resize_height * image_width / image_height)
  96. image = image.resize((resize_width, resize_height), Image.LANCZOS)
  97. new_image_width = image.width + int(render_width) + 10
  98. new_image = Image.new(
  99. "RGB", (new_image_width, render_height), (255, 255, 255)
  100. )
  101. new_image.paste(image, (0, 0))
  102. new_image.paste(img_formula, (image.width + 10, 0))
  103. res_img_dict["formula_res_img"] = new_image
  104. return res_img_dict
  105. except subprocess.CalledProcessError as e:
  106. logging.warning("Syntax error detected in formula, rendering failed.")
  107. res_img_dict["formula_res_img"] = image
  108. return res_img_dict
  109. h, w = image.height, image.width
  110. img_left = image.copy()
  111. img_right = np.ones((h, w, 3), dtype=np.uint8) * 255
  112. random.seed(0)
  113. draw_left = ImageDraw.Draw(img_left)
  114. formula_res_list = self["formula_res_list"]
  115. for tno in range(len(self["formula_res_list"])):
  116. formula_res = self["formula_res_list"][tno]
  117. formula_region_id = formula_res["formula_region_id"]
  118. formula = str(formula_res["rec_formula"])
  119. dt_polys = formula_res["dt_polys"]
  120. x1, y1, x2, y2 = list(dt_polys)
  121. try:
  122. color = (
  123. random.randint(0, 255),
  124. random.randint(0, 255),
  125. random.randint(0, 255),
  126. )
  127. box = [x1, y1, x2, y1, x2, y2, x1, y2]
  128. box = np.array(box).reshape([-1, 2])
  129. pts = [(x, y) for x, y in box.tolist()]
  130. draw_left.polygon(pts, outline=color, width=8)
  131. draw_left.polygon(box, fill=color)
  132. img_right_text = draw_box_formula_fine(
  133. (w, h),
  134. box,
  135. formula,
  136. is_debug=False,
  137. )
  138. pts = np.array(box, np.int32).reshape((-1, 1, 2))
  139. cv2.polylines(img_right_text, [pts], True, color, 1)
  140. img_right = cv2.bitwise_and(img_right, img_right_text)
  141. except subprocess.CalledProcessError as e:
  142. logging.warning("Syntax error detected in formula, rendering failed.")
  143. continue
  144. img_left = Image.blend(image, img_left, 0.5)
  145. img_show = Image.new("RGB", (int(w * 2), h), (255, 255, 255))
  146. img_show.paste(img_left, (0, 0, w, h))
  147. img_show.paste(Image.fromarray(img_right), (w, 0, w * 2, h))
  148. res_img_dict["formula_res_img"] = img_show
  149. return res_img_dict
  150. def _to_str(self, *args, **kwargs) -> Dict[str, str]:
  151. """Converts the instance's attributes to a dictionary and then to a string.
  152. Args:
  153. *args: Additional positional arguments passed to the base class method.
  154. **kwargs: Additional keyword arguments passed to the base class method.
  155. Returns:
  156. Dict[str, str]: A dictionary with the instance's attributes converted to strings.
  157. """
  158. data = {}
  159. data["input_path"] = self["input_path"]
  160. data["page_index"] = self["page_index"]
  161. data["model_settings"] = self["model_settings"]
  162. if self["model_settings"]["use_doc_preprocessor"]:
  163. data["doc_preprocessor_res"] = self["doc_preprocessor_res"].str["res"]
  164. if len(self["layout_det_res"]) > 0:
  165. data["layout_det_res"] = self["layout_det_res"].str["res"]
  166. data["formula_res_list"] = []
  167. for tno in range(len(self["formula_res_list"])):
  168. rec_formula_dict = {
  169. "rec_formula": self["formula_res_list"][tno]["rec_formula"],
  170. "formula_region_id": self["formula_res_list"][tno]["formula_region_id"],
  171. }
  172. if "dt_polys" in self["formula_res_list"][tno]:
  173. rec_formula_dict["dt_polys"] = (
  174. self["formula_res_list"][tno]["dt_polys"],
  175. )
  176. data["formula_res_list"].append(rec_formula_dict)
  177. return JsonMixin._to_str(data, *args, **kwargs)
  178. def _to_json(self, *args, **kwargs) -> Dict[str, str]:
  179. """
  180. Converts the object's data to a JSON dictionary.
  181. Args:
  182. *args: Positional arguments passed to the JsonMixin._to_json method.
  183. **kwargs: Keyword arguments passed to the JsonMixin._to_json method.
  184. Returns:
  185. Dict[str, str]: A dictionary containing the object's data in JSON format.
  186. """
  187. data = {}
  188. data["input_path"] = self["input_path"]
  189. data["page_index"] = str(self["page_index"])
  190. data["model_settings"] = self["model_settings"]
  191. if self["model_settings"]["use_doc_preprocessor"]:
  192. data["doc_preprocessor_res"] = self["doc_preprocessor_res"].str["res"]
  193. if len(self["layout_det_res"]) > 0:
  194. data["layout_det_res"] = self["layout_det_res"].str["res"]
  195. data["formula_res_list"] = []
  196. for tno in range(len(self["formula_res_list"])):
  197. rec_formula_dict = {
  198. "rec_formula": self["formula_res_list"][tno]["rec_formula"],
  199. "formula_region_id": self["formula_res_list"][tno]["formula_region_id"],
  200. }
  201. if "dt_polys" in self["formula_res_list"][tno]:
  202. rec_formula_dict["dt_polys"] = (
  203. self["formula_res_list"][tno]["dt_polys"],
  204. )
  205. data["formula_res_list"].append(rec_formula_dict)
  206. return JsonMixin._to_json(data, *args, **kwargs)
  207. def draw_box_formula_fine(
  208. img_size: Tuple[int, int], box: np.ndarray, formula: str, is_debug: bool = False
  209. ) -> np.ndarray:
  210. """draw box formula for pipeline"""
  211. """
  212. Draw box formula for pipeline.
  213. This function generates a LaTeX formula image and transforms it to fit
  214. within a specified bounding box on a larger image. If the rendering fails,
  215. it will write "Rendering Failed" inside the box.
  216. Args:
  217. img_size (Tuple[int, int]): The size of the image (width, height).
  218. box (np.ndarray): A numpy array representing the four corners of the bounding box.
  219. formula (str): The LaTeX formula to render.
  220. is_debug (bool, optional): If True, enables debug mode. Defaults to False.
  221. Returns:
  222. np.ndarray: An image array with the rendered formula inside the specified box.
  223. """
  224. box_height = int(
  225. math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
  226. )
  227. box_width = int(
  228. math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
  229. )
  230. with tempfile.TemporaryDirectory() as td:
  231. tex_file_path = os.path.join(td, "temp.tex")
  232. pdf_file_path = os.path.join(td, "temp.pdf")
  233. img_file_path = os.path.join(td, "temp.jpg")
  234. generate_tex_file(tex_file_path, formula)
  235. if os.path.exists(tex_file_path):
  236. generate_pdf_file(tex_file_path, td, is_debug)
  237. formula_img = None
  238. if os.path.exists(pdf_file_path):
  239. formula_img = pdf2img(pdf_file_path, img_file_path, is_padding=False)
  240. if formula_img is not None:
  241. formula_h, formula_w = formula_img.shape[:-1]
  242. resize_height = box_height
  243. resize_width = formula_w * resize_height / formula_h
  244. formula_img = cv2.resize(
  245. formula_img, (int(resize_width), int(resize_height))
  246. )
  247. formula_h, formula_w = formula_img.shape[:-1]
  248. pts1 = np.float32(
  249. [[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]]
  250. )
  251. pts2 = np.array(box, dtype=np.float32)
  252. M = cv2.getPerspectiveTransform(pts1, pts2)
  253. formula_img = np.array(formula_img, dtype=np.uint8)
  254. img_right_text = cv2.warpPerspective(
  255. formula_img,
  256. M,
  257. img_size,
  258. flags=cv2.INTER_NEAREST,
  259. borderMode=cv2.BORDER_CONSTANT,
  260. borderValue=(255, 255, 255),
  261. )
  262. else:
  263. img_right_text = draw_box_txt_fine(
  264. img_size, box, "Rendering Failed", PINGFANG_FONT_FILE_PATH
  265. )
  266. return img_right_text