| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282 |
- # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import math
- import os
- import random
- import subprocess
- import tempfile
- from typing import Dict, Tuple
- import numpy as np
- from PIL import Image, ImageDraw
- from ....utils import logging
- from ....utils.deps import class_requires_deps, function_requires_deps, is_dep_available
- from ....utils.fonts import PINGFANG_FONT
- from ...common.result import BaseCVResult, JsonMixin
- from ...models.formula_recognition.result import (
- crop_white_area,
- draw_box_txt_fine,
- draw_formula_module,
- env_valid,
- generate_pdf_file,
- generate_tex_file,
- pdf2img,
- )
- if is_dep_available("opencv-contrib-python"):
- import cv2
- @class_requires_deps("opencv-contrib-python")
- class FormulaRecognitionResult(BaseCVResult):
- """Formula Recognition Result"""
- def _to_img(self) -> Dict[str, Image.Image]:
- """
- Converts the internal data to a PIL Image with detection and recognition results.
- Returns:
- Dict[str, Image.Image]: An image with detection boxes, texts, and scores blended on it.
- """
- image = Image.fromarray(self["doc_preprocessor_res"]["output_img"][:, :, ::-1])
- res_img_dict = {}
- model_settings = self["model_settings"]
- if model_settings["use_doc_preprocessor"]:
- res_img_dict.update(**self["doc_preprocessor_res"].img)
- layout_det_res = self["layout_det_res"]
- if len(layout_det_res) > 0:
- res_img_dict["layout_det_res"] = layout_det_res.img["res"]
- try:
- env_valid()
- except subprocess.CalledProcessError as e:
- logging.warning(
- "Please refer to 2.3 Formula Recognition Pipeline Visualization in Formula Recognition Pipeline Tutorial to install the LaTeX rendering engine at first."
- )
- res_img_dict["formula_res_img"] = image
- return res_img_dict
- if len(layout_det_res) <= 0:
- image = np.array(image.convert("RGB"))
- rec_formula = self["formula_res_list"][0]["rec_formula"]
- xywh = crop_white_area(image)
- if xywh is not None:
- x, y, w, h = xywh
- image = image[y : y + h, x : x + w]
- image = Image.fromarray(image)
- image_width, image_height = image.size
- box = [
- [0, 0],
- [image_width, 0],
- [image_width, image_height],
- [0, image_height],
- ]
- try:
- img_formula = draw_formula_module(
- image.size, box, rec_formula, is_debug=False
- )
- img_formula = Image.fromarray(img_formula)
- render_width, render_height = img_formula.size
- resize_height = render_height
- resize_width = int(resize_height * image_width / image_height)
- image = image.resize((resize_width, resize_height), Image.LANCZOS)
- new_image_width = image.width + int(render_width) + 10
- new_image = Image.new(
- "RGB", (new_image_width, render_height), (255, 255, 255)
- )
- new_image.paste(image, (0, 0))
- new_image.paste(img_formula, (image.width + 10, 0))
- res_img_dict["formula_res_img"] = new_image
- return res_img_dict
- except subprocess.CalledProcessError as e:
- logging.warning("Syntax error detected in formula, rendering failed.")
- res_img_dict["formula_res_img"] = image
- return res_img_dict
- h, w = image.height, image.width
- img_left = image.copy()
- img_right = np.ones((h, w, 3), dtype=np.uint8) * 255
- random.seed(0)
- draw_left = ImageDraw.Draw(img_left)
- self["formula_res_list"]
- for tno in range(len(self["formula_res_list"])):
- formula_res = self["formula_res_list"][tno]
- formula_res["formula_region_id"]
- formula = str(formula_res["rec_formula"])
- dt_polys = formula_res["dt_polys"]
- x1, y1, x2, y2 = list(dt_polys)
- try:
- color = (
- random.randint(0, 255),
- random.randint(0, 255),
- random.randint(0, 255),
- )
- box = [x1, y1, x2, y1, x2, y2, x1, y2]
- box = np.array(box).reshape([-1, 2])
- pts = [(x, y) for x, y in box.tolist()]
- draw_left.polygon(pts, outline=color, width=8)
- draw_left.polygon(box, fill=color)
- img_right_text = draw_box_formula_fine(
- (w, h),
- box,
- formula,
- is_debug=False,
- )
- pts = np.array(box, np.int32).reshape((-1, 1, 2))
- cv2.polylines(img_right_text, [pts], True, color, 1)
- img_right = cv2.bitwise_and(img_right, img_right_text)
- except subprocess.CalledProcessError as e:
- logging.warning("Syntax error detected in formula, rendering failed.")
- continue
- img_left = Image.blend(image, img_left, 0.5)
- img_show = Image.new("RGB", (int(w * 2), h), (255, 255, 255))
- img_show.paste(img_left, (0, 0, w, h))
- img_show.paste(Image.fromarray(img_right), (w, 0, w * 2, h))
- res_img_dict["formula_res_img"] = img_show
- return res_img_dict
- def _to_str(self, *args, **kwargs) -> Dict[str, str]:
- """Converts the instance's attributes to a dictionary and then to a string.
- Args:
- *args: Additional positional arguments passed to the base class method.
- **kwargs: Additional keyword arguments passed to the base class method.
- Returns:
- Dict[str, str]: A dictionary with the instance's attributes converted to strings.
- """
- data = {}
- data["input_path"] = self["input_path"]
- data["page_index"] = self["page_index"]
- data["model_settings"] = self["model_settings"]
- if self["model_settings"]["use_doc_preprocessor"]:
- data["doc_preprocessor_res"] = self["doc_preprocessor_res"].str["res"]
- if len(self["layout_det_res"]) > 0:
- data["layout_det_res"] = self["layout_det_res"].str["res"]
- data["formula_res_list"] = []
- for tno in range(len(self["formula_res_list"])):
- rec_formula_dict = {
- "rec_formula": self["formula_res_list"][tno]["rec_formula"],
- "formula_region_id": self["formula_res_list"][tno]["formula_region_id"],
- }
- if "dt_polys" in self["formula_res_list"][tno]:
- rec_formula_dict["dt_polys"] = (
- self["formula_res_list"][tno]["dt_polys"],
- )
- data["formula_res_list"].append(rec_formula_dict)
- return JsonMixin._to_str(data, *args, **kwargs)
- def _to_json(self, *args, **kwargs) -> Dict[str, str]:
- """
- Converts the object's data to a JSON dictionary.
- Args:
- *args: Positional arguments passed to the JsonMixin._to_json method.
- **kwargs: Keyword arguments passed to the JsonMixin._to_json method.
- Returns:
- Dict[str, str]: A dictionary containing the object's data in JSON format.
- """
- data = {}
- data["input_path"] = self["input_path"]
- data["page_index"] = str(self["page_index"])
- data["model_settings"] = self["model_settings"]
- if self["model_settings"]["use_doc_preprocessor"]:
- data["doc_preprocessor_res"] = self["doc_preprocessor_res"].str["res"]
- if len(self["layout_det_res"]) > 0:
- data["layout_det_res"] = self["layout_det_res"].str["res"]
- data["formula_res_list"] = []
- for tno in range(len(self["formula_res_list"])):
- rec_formula_dict = {
- "rec_formula": self["formula_res_list"][tno]["rec_formula"],
- "formula_region_id": self["formula_res_list"][tno]["formula_region_id"],
- }
- if "dt_polys" in self["formula_res_list"][tno]:
- rec_formula_dict["dt_polys"] = (
- self["formula_res_list"][tno]["dt_polys"],
- )
- data["formula_res_list"].append(rec_formula_dict)
- return JsonMixin._to_json(data, *args, **kwargs)
- @function_requires_deps("opencv-contrib-python")
- def draw_box_formula_fine(
- img_size: Tuple[int, int], box: np.ndarray, formula: str, is_debug: bool = False
- ) -> np.ndarray:
- """draw box formula for pipeline"""
- """
- Draw box formula for pipeline.
- This function generates a LaTeX formula image and transforms it to fit
- within a specified bounding box on a larger image. If the rendering fails,
- it will write "Rendering Failed" inside the box.
- Args:
- img_size (Tuple[int, int]): The size of the image (width, height).
- box (np.ndarray): A numpy array representing the four corners of the bounding box.
- formula (str): The LaTeX formula to render.
- is_debug (bool, optional): If True, enables debug mode. Defaults to False.
- Returns:
- np.ndarray: An image array with the rendered formula inside the specified box.
- """
- box_height = int(
- math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
- )
- box_width = int(
- math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
- )
- with tempfile.TemporaryDirectory() as td:
- tex_file_path = os.path.join(td, "temp.tex")
- pdf_file_path = os.path.join(td, "temp.pdf")
- img_file_path = os.path.join(td, "temp.jpg")
- generate_tex_file(tex_file_path, formula)
- if os.path.exists(tex_file_path):
- generate_pdf_file(tex_file_path, td, is_debug)
- formula_img = None
- if os.path.exists(pdf_file_path):
- formula_img = pdf2img(pdf_file_path, img_file_path, is_padding=False)
- if formula_img is not None:
- formula_h, formula_w = formula_img.shape[:-1]
- resize_height = box_height
- resize_width = formula_w * resize_height / formula_h
- formula_img = cv2.resize(
- formula_img, (int(resize_width), int(resize_height))
- )
- formula_h, formula_w = formula_img.shape[:-1]
- pts1 = np.float32(
- [[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]]
- )
- pts2 = np.array(box, dtype=np.float32)
- M = cv2.getPerspectiveTransform(pts1, pts2)
- formula_img = np.array(formula_img, dtype=np.uint8)
- img_right_text = cv2.warpPerspective(
- formula_img,
- M,
- img_size,
- flags=cv2.INTER_NEAREST,
- borderMode=cv2.BORDER_CONSTANT,
- borderValue=(255, 255, 255),
- )
- else:
- img_right_text = draw_box_txt_fine(
- img_size, box, "Rendering Failed", PINGFANG_FONT.path
- )
- return img_right_text
|