result.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os, sys
  15. from typing import Tuple
  16. import cv2
  17. import PIL
  18. import math
  19. import random
  20. import tempfile
  21. import subprocess
  22. import numpy as np
  23. from pathlib import Path
  24. from PIL import Image, ImageDraw, ImageFont
  25. from ...common.result import BaseCVResult
  26. from ....utils import logging
  27. from ....utils.fonts import PINGFANG_FONT_FILE_PATH
  28. from ...models_new.formula_recognition.result import (
  29. get_align_equation,
  30. generate_tex_file,
  31. generate_pdf_file,
  32. env_valid,
  33. pdf2img,
  34. create_font,
  35. crop_white_area,
  36. draw_box_txt_fine,
  37. )
  38. class FormulaRecognitionResult(dict):
  39. """Layout Parsing Result"""
  40. def __init__(self, data) -> None:
  41. """Initializes a new instance of the class with the specified data."""
  42. super().__init__(data)
  43. def save_to_img(self, save_path: str) -> None:
  44. """
  45. Saves an image with overlaid formula recognition results.
  46. This function attempts to save an image with recognized formulas highlighted
  47. and annotated. It verifies the environment setup before proceeding and logs
  48. a warning if the necessary rendering engine is not installed. The output image
  49. consists of two halves: the left side shows the original image with bounding
  50. boxes, and the right side shows the recognized formulas.
  51. Args:
  52. save_path (str): The directory path where the output image will be saved.
  53. Returns:
  54. None
  55. """
  56. try:
  57. env_valid()
  58. except subprocess.CalledProcessError as e:
  59. logging.warning(
  60. "Please refer to 2.3 Formula Recognition Pipeline Visualization in Formula Recognition Pipeline Tutorial to install the LaTeX rendering engine at first."
  61. )
  62. return None
  63. if not os.path.exists(save_path):
  64. os.makedirs(save_path)
  65. img_id = self["img_id"]
  66. img_name = self["img_name"]
  67. if len(self["layout_det_res"]) <= 0:
  68. return
  69. image = Image.fromarray(self["layout_det_res"]["input_img"])
  70. h, w = image.height, image.width
  71. img_left = image.copy()
  72. img_right = np.ones((h, w, 3), dtype=np.uint8) * 255
  73. random.seed(0)
  74. draw_left = ImageDraw.Draw(img_left)
  75. formula_save_path = os.path.join(save_path, "formula_img_{}.jpg".format(img_id))
  76. formula_res_list = self["formula_res_list"]
  77. for tno in range(len(self["formula_res_list"])):
  78. formula_res = self["formula_res_list"][tno]
  79. formula_region_id = formula_res["formula_region_id"]
  80. formula = str(formula_res["rec_formula"])
  81. dt_polys = formula_res["dt_polys"]
  82. x1, y1, x2, y2 = list(dt_polys)
  83. try:
  84. color = (
  85. random.randint(0, 255),
  86. random.randint(0, 255),
  87. random.randint(0, 255),
  88. )
  89. box = [x1, y1, x2, y1, x2, y2, x1, y2]
  90. box = np.array(box).reshape([-1, 2])
  91. pts = [(x, y) for x, y in box.tolist()]
  92. draw_left.polygon(pts, outline=color, width=8)
  93. draw_left.polygon(box, fill=color)
  94. img_right_text = draw_box_formula_fine(
  95. (w, h),
  96. box,
  97. formula,
  98. is_debug=False,
  99. )
  100. pts = np.array(box, np.int32).reshape((-1, 1, 2))
  101. cv2.polylines(img_right_text, [pts], True, color, 1)
  102. img_right = cv2.bitwise_and(img_right, img_right_text)
  103. except subprocess.CalledProcessError as e:
  104. logging.warning("Syntax error detected in formula, rendering failed.")
  105. continue
  106. img_left = Image.blend(image, img_left, 0.5)
  107. img_show = Image.new("RGB", (int(w * 2), h), (255, 255, 255))
  108. img_show.paste(img_left, (0, 0, w, h))
  109. img_show.paste(Image.fromarray(img_right), (w, 0, w * 2, h))
  110. img_show.save(formula_save_path)
  111. def save_results(self, save_path: str) -> None:
  112. """Save the formula recognition results to the specified directory.
  113. Args:
  114. save_path (str): The directory path to save the results.
  115. """
  116. if not os.path.exists(save_path):
  117. os.makedirs(save_path)
  118. if not os.path.isdir(save_path):
  119. return
  120. img_id = self["img_id"]
  121. layout_det_res = self["layout_det_res"]
  122. if len(layout_det_res) > 0:
  123. save_img_path = Path(save_path) / f"layout_det_result_img{img_id}.jpg"
  124. layout_det_res.save_to_img(save_img_path)
  125. self.save_to_img(save_path)
  126. input_params = self["input_params"]
  127. if input_params["use_doc_preprocessor"]:
  128. save_img_path = Path(save_path) / f"doc_preprocessor_result_img{img_id}.jpg"
  129. self["doc_preprocessor_res"].save_to_img(save_img_path)
  130. for tno in range(len(self["formula_res_list"])):
  131. formula_res = self["formula_res_list"][tno]
  132. formula_region_id = formula_res["formula_region_id"]
  133. save_img_path = (
  134. Path(save_path)
  135. / f"formula_res_img{img_id}_region{formula_region_id}.jpg"
  136. )
  137. formula_res.save_to_img(save_img_path)
  138. return
  139. def draw_box_formula_fine(
  140. img_size: Tuple[int, int], box: np.ndarray, formula: str, is_debug: bool = False
  141. ) -> np.ndarray:
  142. """draw box formula for pipeline"""
  143. """
  144. Draw box formula for pipeline.
  145. This function generates a LaTeX formula image and transforms it to fit
  146. within a specified bounding box on a larger image. If the rendering fails,
  147. it will write "Rendering Failed" inside the box.
  148. Args:
  149. img_size (Tuple[int, int]): The size of the image (width, height).
  150. box (np.ndarray): A numpy array representing the four corners of the bounding box.
  151. formula (str): The LaTeX formula to render.
  152. is_debug (bool, optional): If True, enables debug mode. Defaults to False.
  153. Returns:
  154. np.ndarray: An image array with the rendered formula inside the specified box.
  155. """
  156. box_height = int(
  157. math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
  158. )
  159. box_width = int(
  160. math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
  161. )
  162. with tempfile.TemporaryDirectory() as td:
  163. tex_file_path = os.path.join(td, "temp.tex")
  164. pdf_file_path = os.path.join(td, "temp.pdf")
  165. img_file_path = os.path.join(td, "temp.jpg")
  166. generate_tex_file(tex_file_path, formula)
  167. if os.path.exists(tex_file_path):
  168. generate_pdf_file(tex_file_path, td, is_debug)
  169. formula_img = None
  170. if os.path.exists(pdf_file_path):
  171. formula_img = pdf2img(pdf_file_path, img_file_path, is_padding=False)
  172. if formula_img is not None:
  173. formula_h, formula_w = formula_img.shape[:-1]
  174. resize_height = box_height
  175. resize_width = formula_w * resize_height / formula_h
  176. formula_img = cv2.resize(
  177. formula_img, (int(resize_width), int(resize_height))
  178. )
  179. formula_h, formula_w = formula_img.shape[:-1]
  180. pts1 = np.float32(
  181. [[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]]
  182. )
  183. pts2 = np.array(box, dtype=np.float32)
  184. M = cv2.getPerspectiveTransform(pts1, pts2)
  185. formula_img = np.array(formula_img, dtype=np.uint8)
  186. img_right_text = cv2.warpPerspective(
  187. formula_img,
  188. M,
  189. img_size,
  190. flags=cv2.INTER_NEAREST,
  191. borderMode=cv2.BORDER_CONSTANT,
  192. borderValue=(255, 255, 255),
  193. )
  194. else:
  195. img_right_text = draw_box_txt_fine(
  196. img_size, box, "Rendering Failed", PINGFANG_FONT_FILE_PATH
  197. )
  198. return img_right_text