zhengchun
/
PaddleX


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
							# copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys
import cv2
import math
import random
import tempfile
import subprocess
import numpy as np
from PIL import Image, ImageDraw

from .base import CVResult
from ...utils import logging
from .ocr import draw_box_txt_fine
from ...utils.fonts import PINGFANG_FONT_FILE_PATH


class FormulaRecResult(CVResult):
    def _to_str(self, *args, **kwargs):
        return super()._to_str(*args, **kwargs).replace("\\\\", "\\")

    def _to_img(
        self,
    ):
        """Draw formula on image"""
        image = self._img_reader.read(self["input_path"])
        rec_formula = str(self["rec_text"])
        image = np.array(image.convert("RGB"))
        xywh = crop_white_area(image)
        if xywh is not None:
            x, y, w, h = xywh
            image = image[y : y + h, x : x + w]
        image = Image.fromarray(image)
        image_width, image_height = image.size
        box = [[0, 0], [image_width, 0], [image_width, image_height], [0, image_height]]
        try:
            img_formula = draw_formula_module(
                image.size, box, rec_formula, is_debug=False
            )
            img_formula = Image.fromarray(img_formula)
            render_width, render_height = img_formula.size
            resize_height = render_height
            resize_width = int(resize_height * image_width / image_height)
            image = image.resize((resize_width, resize_height), Image.LANCZOS)

            new_image_width = image.width + int(render_width) + 10
            new_image = Image.new(
                "RGB", (new_image_width, render_height), (255, 255, 255)
            )
            new_image.paste(image, (0, 0))
            new_image.paste(img_formula, (image.width + 10, 0))
            return new_image
        except subprocess.CalledProcessError as e:
            logging.warning(
                "Please refer to 2.3 Formula Recognition Pipeline Visualization in Formula Recognition Pipeline Tutorial to install the LaTeX rendering engine at first."
            )
            return None


class FormulaResult(CVResult):

    def _to_str(self, *args, **kwargs):
        return super()._to_str(*args, **kwargs).replace("\\\\", "\\")

    def _to_img(
        self,
    ):
        """draw formula result"""

        boxes = self["dt_polys"]
        formulas = self["rec_formula"]
        image = self._img_reader.read(self["input_path"])
        h, w = image.height, image.width
        img_left = image.copy()
        img_right = np.ones((h, w, 3), dtype=np.uint8) * 255
        random.seed(0)
        draw_left = ImageDraw.Draw(img_left)

        if formulas is None or len(formulas) != len(boxes):
            formulas = [None] * len(boxes)
        for idx, (box, formula) in enumerate(zip(boxes, formulas)):
            try:
                color = (
                    random.randint(0, 255),
                    random.randint(0, 255),
                    random.randint(0, 255),
                )
                box = np.array(box)
                pts = [(x, y) for x, y in box.tolist()]
                draw_left.polygon(pts, outline=color, width=8)
                draw_left.polygon(box, fill=color)
                img_right_text = draw_box_formula_fine(
                    (w, h),
                    box,
                    formula,
                    is_debug=False,
                )
                pts = np.array(box, np.int32).reshape((-1, 1, 2))
                cv2.polylines(img_right_text, [pts], True, color, 1)
                img_right = cv2.bitwise_and(img_right, img_right_text)
            except subprocess.CalledProcessError as e:
                logging.warning(
                    "Please refer to 2.3 Formula Recognition Pipeline Visualization in Formula Recognition Pipeline Tutorial to install the LaTeX rendering engine at first."
                )
                return None

        img_left = Image.blend(image, img_left, 0.5)
        img_show = Image.new("RGB", (int(w * 2), h), (255, 255, 255))
        img_show.paste(img_left, (0, 0, w, h))
        img_show.paste(Image.fromarray(img_right), (w, 0, w * 2, h))
        return img_show


def get_align_equation(equation):
    is_align = False
    equation = str(equation) + "\n"
    begin_dict = [
        r"begin{align}",
        r"begin{align*}",
    ]
    for begin_sym in begin_dict:
        if begin_sym in equation:
            is_align = True
            break
    if not is_align:
        equation = (
            r"\begin{equation}"
            + "\n"
            + equation.strip()
            + r"\nonumber"
            + "\n"
            + r"\end{equation}"
            + "\n"
        )
    return equation


def generate_tex_file(tex_file_path, equation):
    with open(tex_file_path, "w") as fp:
        start_template = (
            r"\documentclass{article}" + "\n"
            r"\usepackage{cite}" + "\n"
            r"\usepackage{amsmath,amssymb,amsfonts}" + "\n"
            r"\usepackage{graphicx}" + "\n"
            r"\usepackage{textcomp}" + "\n"
            r"\DeclareMathSizes{14}{14}{9.8}{7}" + "\n"
            r"\pagestyle{empty}" + "\n"
            r"\begin{document}" + "\n"
            r"\begin{large}" + "\n"
        )
        fp.write(start_template)
        equation = get_align_equation(equation)
        fp.write(equation)
        end_template = r"\end{large}" + "\n" r"\end{document}" + "\n"
        fp.write(end_template)


def generate_pdf_file(tex_path, pdf_dir, is_debug=False):
    if os.path.exists(tex_path):
        command = "pdflatex -halt-on-error -output-directory={} {}".format(
            pdf_dir, tex_path
        )
        if is_debug:
            subprocess.check_call(command, shell=True)
        else:
            devNull = open(os.devnull, "w")
            subprocess.check_call(
                command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True
            )


def crop_white_area(image):
    image = np.array(image).astype("uint8")
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if len(contours) > 0:
        x, y, w, h = cv2.boundingRect(np.concatenate(contours))
        return [x, y, w, h]
    else:
        return None


def pdf2img(pdf_path, img_path, is_padding=False):
    import fitz

    pdfDoc = fitz.open(pdf_path)
    if pdfDoc.page_count != 1:
        return None
    for pg in range(pdfDoc.page_count):
        page = pdfDoc[pg]
        rotate = int(0)
        zoom_x = 2
        zoom_y = 2
        mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate)
        pix = page.get_pixmap(matrix=mat, alpha=False)
        if not os.path.exists(img_path):
            os.makedirs(img_path)

        pix._writeIMG(img_path, 7, 100)
        img = cv2.imread(img_path)
        xywh = crop_white_area(img)

        if xywh is not None:
            x, y, w, h = xywh
            img = img[y : y + h, x : x + w]
            if is_padding:
                img = cv2.copyMakeBorder(
                    img, 30, 30, 30, 30, cv2.BORDER_CONSTANT, value=(255, 255, 255)
                )
            return img
    return None


def draw_formula_module(img_size, box, formula, is_debug=False):
    """draw box formula for module"""
    box_width, box_height = img_size
    with tempfile.TemporaryDirectory() as td:
        tex_file_path = os.path.join(td, "temp.tex")
        pdf_file_path = os.path.join(td, "temp.pdf")
        img_file_path = os.path.join(td, "temp.jpg")
        generate_tex_file(tex_file_path, formula)
        if os.path.exists(tex_file_path):
            generate_pdf_file(tex_file_path, td, is_debug)
        formula_img = None
        if os.path.exists(pdf_file_path):
            formula_img = pdf2img(pdf_file_path, img_file_path, is_padding=False)
        if formula_img is not None:
            return formula_img
        else:
            img_right_text = draw_box_txt_fine(
                img_size, box, "Rendering Failed", PINGFANG_FONT_FILE_PATH
            )
        return img_right_text


def draw_box_formula_fine(img_size, box, formula, is_debug=False):
    """draw box formula for pipeline"""
    box_height = int(
        math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
    )
    box_width = int(
        math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
    )
    with tempfile.TemporaryDirectory() as td:
        tex_file_path = os.path.join(td, "temp.tex")
        pdf_file_path = os.path.join(td, "temp.pdf")
        img_file_path = os.path.join(td, "temp.jpg")
        generate_tex_file(tex_file_path, formula)
        if os.path.exists(tex_file_path):
            generate_pdf_file(tex_file_path, td, is_debug)
        formula_img = None
        if os.path.exists(pdf_file_path):
            formula_img = pdf2img(pdf_file_path, img_file_path, is_padding=False)
        if formula_img is not None:
            formula_h, formula_w = formula_img.shape[:-1]
            resize_height = box_height
            resize_width = formula_w * resize_height / formula_h
            formula_img = cv2.resize(
                formula_img, (int(resize_width), int(resize_height))
            )
            formula_h, formula_w = formula_img.shape[:-1]
            pts1 = np.float32(
                [[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]]
            )
            pts2 = np.array(box, dtype=np.float32)
            M = cv2.getPerspectiveTransform(pts1, pts2)
            formula_img = np.array(formula_img, dtype=np.uint8)
            img_right_text = cv2.warpPerspective(
                formula_img,
                M,
                img_size,
                flags=cv2.INTER_NEAREST,
                borderMode=cv2.BORDER_CONSTANT,
                borderValue=(255, 255, 255),
            )
        else:
            img_right_text = draw_box_txt_fine(
                img_size, box, "Rendering Failed", PINGFANG_FONT_FILE_PATH
            )
        return img_right_text