| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317 |
- # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import os
- import sys
- import cv2
- import PIL
- import math
- import random
- import tempfile
- import subprocess
- import numpy as np
- from pathlib import Path
- from PIL import Image, ImageDraw, ImageFont
- from ...common.result import BaseCVResult
- from ....utils import logging
- from ....utils.fonts import PINGFANG_FONT_FILE_PATH
- class FormulaRecResult(BaseCVResult):
- def _to_str(self, *args, **kwargs):
- return super()._to_str(*args, **kwargs).replace("\\\\", "\\")
- def _to_img(
- self,
- ):
- """Draw formula on image"""
- image = Image.fromarray(self._input_img)
- try:
- env_valid()
- except subprocess.CalledProcessError as e:
- logging.warning(
- "Please refer to 2.3 Formula Recognition Pipeline Visualization in Formula Recognition Pipeline Tutorial to install the LaTeX rendering engine at first."
- )
- return image
- rec_formula = str(self["rec_formula"])
- image = np.array(image.convert("RGB"))
- xywh = crop_white_area(image)
- if xywh is not None:
- x, y, w, h = xywh
- image = image[y : y + h, x : x + w]
- image = Image.fromarray(image)
- image_width, image_height = image.size
- box = [[0, 0], [image_width, 0], [image_width, image_height], [0, image_height]]
- try:
- img_formula = draw_formula_module(
- image.size, box, rec_formula, is_debug=False
- )
- img_formula = Image.fromarray(img_formula)
- render_width, render_height = img_formula.size
- resize_height = render_height
- resize_width = int(resize_height * image_width / image_height)
- image = image.resize((resize_width, resize_height), Image.LANCZOS)
- new_image_width = image.width + int(render_width) + 10
- new_image = Image.new(
- "RGB", (new_image_width, render_height), (255, 255, 255)
- )
- new_image.paste(image, (0, 0))
- new_image.paste(img_formula, (image.width + 10, 0))
- return new_image
- except subprocess.CalledProcessError as e:
- logging.warning("Syntax error detected in formula, rendering failed.")
- return image
- def get_align_equation(equation):
- is_align = False
- equation = str(equation) + "\n"
- begin_dict = [
- r"begin{align}",
- r"begin{align*}",
- ]
- for begin_sym in begin_dict:
- if begin_sym in equation:
- is_align = True
- break
- if not is_align:
- equation = (
- r"\begin{equation}"
- + "\n"
- + equation.strip()
- + r"\nonumber"
- + "\n"
- + r"\end{equation}"
- + "\n"
- )
- return equation
- def generate_tex_file(tex_file_path, equation):
- with open(tex_file_path, "w") as fp:
- start_template = (
- r"\documentclass{article}" + "\n"
- r"\usepackage{cite}" + "\n"
- r"\usepackage{amsmath,amssymb,amsfonts}" + "\n"
- r"\usepackage{graphicx}" + "\n"
- r"\usepackage{textcomp}" + "\n"
- r"\DeclareMathSizes{14}{14}{9.8}{7}" + "\n"
- r"\pagestyle{empty}" + "\n"
- r"\begin{document}" + "\n"
- r"\begin{large}" + "\n"
- )
- fp.write(start_template)
- equation = get_align_equation(equation)
- fp.write(equation)
- end_template = r"\end{large}" + "\n" r"\end{document}" + "\n"
- fp.write(end_template)
- def generate_pdf_file(tex_path, pdf_dir, is_debug=False):
- if os.path.exists(tex_path):
- command = "pdflatex -halt-on-error -output-directory={} {}".format(
- pdf_dir, tex_path
- )
- if is_debug:
- subprocess.check_call(command, shell=True)
- else:
- devNull = open(os.devnull, "w")
- subprocess.check_call(
- command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True
- )
- def crop_white_area(image):
- image = np.array(image).astype("uint8")
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
- _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
- contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
- if len(contours) > 0:
- x, y, w, h = cv2.boundingRect(np.concatenate(contours))
- return [x, y, w, h]
- else:
- return None
- def pdf2img(pdf_path, img_path, is_padding=False):
- import fitz
- pdfDoc = fitz.open(pdf_path)
- if pdfDoc.page_count != 1:
- return None
- for pg in range(pdfDoc.page_count):
- page = pdfDoc[pg]
- rotate = int(0)
- zoom_x = 2
- zoom_y = 2
- mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate)
- pix = page.get_pixmap(matrix=mat, alpha=False)
- if not os.path.exists(img_path):
- os.makedirs(img_path)
- pix._writeIMG(img_path, 7, 100)
- img = cv2.imread(img_path)
- xywh = crop_white_area(img)
- if xywh is not None:
- x, y, w, h = xywh
- img = img[y : y + h, x : x + w]
- if is_padding:
- img = cv2.copyMakeBorder(
- img, 30, 30, 30, 30, cv2.BORDER_CONSTANT, value=(255, 255, 255)
- )
- return img
- return None
- def draw_formula_module(img_size, box, formula, is_debug=False):
- """draw box formula for module"""
- box_width, box_height = img_size
- with tempfile.TemporaryDirectory() as td:
- tex_file_path = os.path.join(td, "temp.tex")
- pdf_file_path = os.path.join(td, "temp.pdf")
- img_file_path = os.path.join(td, "temp.jpg")
- generate_tex_file(tex_file_path, formula)
- if os.path.exists(tex_file_path):
- generate_pdf_file(tex_file_path, td, is_debug)
- formula_img = None
- if os.path.exists(pdf_file_path):
- formula_img = pdf2img(pdf_file_path, img_file_path, is_padding=False)
- if formula_img is not None:
- return formula_img
- else:
- img_right_text = draw_box_txt_fine(
- img_size, box, "Rendering Failed", PINGFANG_FONT_FILE_PATH
- )
- return img_right_text
- def env_valid():
- with tempfile.TemporaryDirectory() as td:
- tex_file_path = os.path.join(td, "temp.tex")
- pdf_file_path = os.path.join(td, "temp.pdf")
- img_file_path = os.path.join(td, "temp.jpg")
- formula = "a+b=c"
- is_debug = False
- generate_tex_file(tex_file_path, formula)
- if os.path.exists(tex_file_path):
- generate_pdf_file(tex_file_path, td, is_debug)
- if os.path.exists(pdf_file_path):
- formula_img = pdf2img(pdf_file_path, img_file_path, is_padding=False)
- def draw_box_formula_fine(img_size, box, formula, is_debug=False):
- """draw box formula for pipeline"""
- box_height = int(
- math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
- )
- box_width = int(
- math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
- )
- with tempfile.TemporaryDirectory() as td:
- tex_file_path = os.path.join(td, "temp.tex")
- pdf_file_path = os.path.join(td, "temp.pdf")
- img_file_path = os.path.join(td, "temp.jpg")
- generate_tex_file(tex_file_path, formula)
- if os.path.exists(tex_file_path):
- generate_pdf_file(tex_file_path, td, is_debug)
- formula_img = None
- if os.path.exists(pdf_file_path):
- formula_img = pdf2img(pdf_file_path, img_file_path, is_padding=False)
- if formula_img is not None:
- formula_h, formula_w = formula_img.shape[:-1]
- resize_height = box_height
- resize_width = formula_w * resize_height / formula_h
- formula_img = cv2.resize(
- formula_img, (int(resize_width), int(resize_height))
- )
- formula_h, formula_w = formula_img.shape[:-1]
- pts1 = np.float32(
- [[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]]
- )
- pts2 = np.array(box, dtype=np.float32)
- M = cv2.getPerspectiveTransform(pts1, pts2)
- formula_img = np.array(formula_img, dtype=np.uint8)
- img_right_text = cv2.warpPerspective(
- formula_img,
- M,
- img_size,
- flags=cv2.INTER_NEAREST,
- borderMode=cv2.BORDER_CONSTANT,
- borderValue=(255, 255, 255),
- )
- else:
- img_right_text = draw_box_txt_fine(
- img_size, box, "Rendering Failed", PINGFANG_FONT_FILE_PATH
- )
- return img_right_text
- def draw_box_txt_fine(img_size, box, txt, font_path):
- """draw box text"""
- box_height = int(
- math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
- )
- box_width = int(
- math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
- )
- if box_height > 2 * box_width and box_height > 30:
- img_text = Image.new("RGB", (box_height, box_width), (255, 255, 255))
- draw_text = ImageDraw.Draw(img_text)
- if txt:
- font = create_font(txt, (box_height, box_width), font_path)
- draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
- img_text = img_text.transpose(Image.ROTATE_270)
- else:
- img_text = Image.new("RGB", (box_width, box_height), (255, 255, 255))
- draw_text = ImageDraw.Draw(img_text)
- if txt:
- font = create_font(txt, (box_width, box_height), font_path)
- draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
- pts1 = np.float32(
- [[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]]
- )
- pts2 = np.array(box, dtype=np.float32)
- M = cv2.getPerspectiveTransform(pts1, pts2)
- img_text = np.array(img_text, dtype=np.uint8)
- img_right_text = cv2.warpPerspective(
- img_text,
- M,
- img_size,
- flags=cv2.INTER_NEAREST,
- borderMode=cv2.BORDER_CONSTANT,
- borderValue=(255, 255, 255),
- )
- return img_right_text
- def create_font(txt, sz, font_path):
- """create font"""
- font_size = int(sz[1] * 0.8)
- font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
- if int(PIL.__version__.split(".")[0]) < 10:
- length = font.getsize(txt)[0]
- else:
- length = font.getlength(txt)
- if length > sz[0]:
- font_size = int(font_size * sz[0] / length)
- font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
- return font
|