|
|
@@ -12,10 +12,11 @@
|
|
|
# See the License for the specific language governing permissions and
|
|
|
# limitations under the License.
|
|
|
|
|
|
-import os
|
|
|
-import sys
|
|
|
+import os, sys
|
|
|
+from typing import Any, Dict, Optional, List
|
|
|
import cv2
|
|
|
import PIL
|
|
|
+import fitz
|
|
|
import math
|
|
|
import random
|
|
|
import tempfile
|
|
|
@@ -27,6 +28,7 @@ from PIL import Image, ImageDraw, ImageFont
|
|
|
from ...common.result import BaseCVResult
|
|
|
from ....utils import logging
|
|
|
from ....utils.fonts import PINGFANG_FONT_FILE_PATH
|
|
|
+from ....utils.file_interface import custom_open
|
|
|
|
|
|
|
|
|
class FormulaRecResult(BaseCVResult):
|
|
|
@@ -35,8 +37,18 @@ class FormulaRecResult(BaseCVResult):
|
|
|
|
|
|
def _to_img(
|
|
|
self,
|
|
|
- ):
|
|
|
- """Draw formula on image"""
|
|
|
+ ) -> Image.Image:
|
|
|
+ """
|
|
|
+ Draws a recognized formula on an image.
|
|
|
+
|
|
|
+ This method processes an input image to recognize and render a LaTeX formula.
|
|
|
+ It overlays the rendered formula onto the input image and returns the combined image.
|
|
|
+ If the LaTeX rendering engine is not installed or a syntax error is detected,
|
|
|
+ it logs a warning and returns the original image.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ Image.Image: An image with the recognized formula rendered alongside the original image.
|
|
|
+ """
|
|
|
image = Image.fromarray(self["input_img"])
|
|
|
try:
|
|
|
env_valid()
|
|
|
@@ -77,7 +89,19 @@ class FormulaRecResult(BaseCVResult):
|
|
|
return image
|
|
|
|
|
|
|
|
|
-def get_align_equation(equation):
|
|
|
+def get_align_equation(equation: str) -> str:
|
|
|
+ """
|
|
|
+ Wraps an equation in LaTeX environment tags if not already aligned.
|
|
|
+
|
|
|
+ This function checks if a given LaTeX equation contains any alignment tags (`align` or `align*`).
|
|
|
+ If the equation does not contain these tags, it wraps the equation in `equation` and `nonumber` tags.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ equation (str): The LaTeX equation to be checked and potentially modified.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ str: The modified equation with appropriate LaTeX tags for alignment.
|
|
|
+ """
|
|
|
is_align = False
|
|
|
equation = str(equation) + "\n"
|
|
|
begin_dict = [
|
|
|
@@ -101,8 +125,19 @@ def get_align_equation(equation):
|
|
|
return equation
|
|
|
|
|
|
|
|
|
-def generate_tex_file(tex_file_path, equation):
|
|
|
- with open(tex_file_path, "w") as fp:
|
|
|
+def generate_tex_file(tex_file_path: str, equation: str) -> None:
|
|
|
+ """
|
|
|
+ Generates a LaTeX file containing a specific equation.
|
|
|
+
|
|
|
+ This function creates a LaTeX file at the specified file path, writing the necessary
|
|
|
+ LaTeX preamble and wrapping the provided equation in a document structure. The equation
|
|
|
+ is processed to ensure it includes alignment tags if necessary.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ tex_file_path (str): The file path where the LaTeX file will be saved.
|
|
|
+ equation (str): The LaTeX equation to be written into the file.
|
|
|
+ """
|
|
|
+ with custom_open(tex_file_path, "w") as fp:
|
|
|
start_template = (
|
|
|
r"\documentclass{article}" + "\n"
|
|
|
r"\usepackage{cite}" + "\n"
|
|
|
@@ -121,7 +156,24 @@ def generate_tex_file(tex_file_path, equation):
|
|
|
fp.write(end_template)
|
|
|
|
|
|
|
|
|
-def generate_pdf_file(tex_path, pdf_dir, is_debug=False):
|
|
|
+def generate_pdf_file(
|
|
|
+ tex_path: str, pdf_dir: str, is_debug: bool = False
|
|
|
+) -> Optional[bool]:
|
|
|
+ """
|
|
|
+ Generates a PDF file from a LaTeX file using pdflatex.
|
|
|
+
|
|
|
+ This function checks if the specified LaTeX file exists, and then runs pdflatex to generate a PDF file
|
|
|
+ in the specified directory. It can run in debug mode to show detailed output or in silent mode.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ tex_path (str): The path to the LaTeX file.
|
|
|
+ pdf_dir (str): The directory where the PDF file will be saved.
|
|
|
+ is_debug (bool, optional): If True, runs pdflatex with detailed output. Defaults to False.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ Optional[bool]: Returns True if the PDF was generated successfully, False if the LaTeX file does not exist,
|
|
|
+ and None if an error occurred during the pdflatex execution.
|
|
|
+ """
|
|
|
if os.path.exists(tex_path):
|
|
|
command = "pdflatex -halt-on-error -output-directory={} {}".format(
|
|
|
pdf_dir, tex_path
|
|
|
@@ -129,13 +181,27 @@ def generate_pdf_file(tex_path, pdf_dir, is_debug=False):
|
|
|
if is_debug:
|
|
|
subprocess.check_call(command, shell=True)
|
|
|
else:
|
|
|
- devNull = open(os.devnull, "w")
|
|
|
+ devNull = custom_open(os.devnull, "w")
|
|
|
subprocess.check_call(
|
|
|
command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True
|
|
|
)
|
|
|
|
|
|
|
|
|
-def crop_white_area(image):
|
|
|
+def crop_white_area(image: np.ndarray) -> Optional[List[int]]:
|
|
|
+ """
|
|
|
+ Finds and returns the bounding box of the non-white area in an image.
|
|
|
+
|
|
|
+ This function converts an image to grayscale and uses binary thresholding to
|
|
|
+ find contours. It then calculates the bounding rectangle around the non-white
|
|
|
+ areas of the image.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ image (np.ndarray): The input image as a NumPy array.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ Optional[List[int]]: A list [x, y, w, h] representing the bounding box of
|
|
|
+ the non-white area, or None if no such area is found.
|
|
|
+ """
|
|
|
image = np.array(image).astype("uint8")
|
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
|
_, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
|
|
|
@@ -147,8 +213,18 @@ def crop_white_area(image):
|
|
|
return None
|
|
|
|
|
|
|
|
|
-def pdf2img(pdf_path, img_path, is_padding=False):
|
|
|
- import fitz
|
|
|
+def pdf2img(pdf_path: str, img_path: str, is_padding: bool = False):
|
|
|
+ """
|
|
|
+ Converts a single-page PDF to an image, optionally cropping white areas and adding padding.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ pdf_path (str): The path to the PDF file.
|
|
|
+ img_path (str): The path where the image will be saved.
|
|
|
+ is_padding (bool): If True, adds a 30-pixel white padding around the image.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ np.ndarray: The resulting image as a NumPy array, or None if the PDF is not single-page.
|
|
|
+ """
|
|
|
|
|
|
pdfDoc = fitz.open(pdf_path)
|
|
|
if pdfDoc.page_count != 1:
|
|
|
@@ -160,11 +236,10 @@ def pdf2img(pdf_path, img_path, is_padding=False):
|
|
|
zoom_y = 2
|
|
|
mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate)
|
|
|
pix = page.get_pixmap(matrix=mat, alpha=False)
|
|
|
- if not os.path.exists(img_path):
|
|
|
- os.makedirs(img_path)
|
|
|
-
|
|
|
- pix._writeIMG(img_path, 7, 100)
|
|
|
- img = cv2.imread(img_path)
|
|
|
+ getpngdata = pix.tobytes(output="png")
|
|
|
+ # decode as np.uint8
|
|
|
+ image_array = np.frombuffer(getpngdata, dtype=np.uint8)
|
|
|
+ img = cv2.imdecode(image_array, cv2.IMREAD_ANYCOLOR)
|
|
|
xywh = crop_white_area(img)
|
|
|
|
|
|
if xywh is not None:
|
|
|
@@ -178,8 +253,21 @@ def pdf2img(pdf_path, img_path, is_padding=False):
|
|
|
return None
|
|
|
|
|
|
|
|
|
-def draw_formula_module(img_size, box, formula, is_debug=False):
|
|
|
- """draw box formula for module"""
|
|
|
+def draw_formula_module(
|
|
|
+ img_size: tuple, box: list, formula: str, is_debug: bool = False
|
|
|
+):
|
|
|
+ """
|
|
|
+ Draw box formula for module.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ img_size (tuple): The size of the image as (width, height).
|
|
|
+ box (list): The coordinates for the bounding box.
|
|
|
+ formula (str): The LaTeX formula to render.
|
|
|
+ is_debug (bool): If True, retains intermediate files for debugging purposes.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ np.ndarray: The resulting image with the formula or an error message.
|
|
|
+ """
|
|
|
box_width, box_height = img_size
|
|
|
with tempfile.TemporaryDirectory() as td:
|
|
|
tex_file_path = os.path.join(td, "temp.tex")
|
|
|
@@ -200,7 +288,13 @@ def draw_formula_module(img_size, box, formula, is_debug=False):
|
|
|
return img_right_text
|
|
|
|
|
|
|
|
|
-def env_valid():
|
|
|
+def env_valid() -> bool:
|
|
|
+ """
|
|
|
+ Validates if the environment is correctly set up to convert LaTeX formulas to images.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ bool: True if the environment is valid and the conversion is successful, False otherwise.
|
|
|
+ """
|
|
|
with tempfile.TemporaryDirectory() as td:
|
|
|
tex_file_path = os.path.join(td, "temp.tex")
|
|
|
pdf_file_path = os.path.join(td, "temp.pdf")
|
|
|
@@ -214,55 +308,19 @@ def env_valid():
|
|
|
formula_img = pdf2img(pdf_file_path, img_file_path, is_padding=False)
|
|
|
|
|
|
|
|
|
-def draw_box_formula_fine(img_size, box, formula, is_debug=False):
|
|
|
- """draw box formula for pipeline"""
|
|
|
- box_height = int(
|
|
|
- math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
|
|
|
- )
|
|
|
- box_width = int(
|
|
|
- math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
|
|
|
- )
|
|
|
- with tempfile.TemporaryDirectory() as td:
|
|
|
- tex_file_path = os.path.join(td, "temp.tex")
|
|
|
- pdf_file_path = os.path.join(td, "temp.pdf")
|
|
|
- img_file_path = os.path.join(td, "temp.jpg")
|
|
|
- generate_tex_file(tex_file_path, formula)
|
|
|
- if os.path.exists(tex_file_path):
|
|
|
- generate_pdf_file(tex_file_path, td, is_debug)
|
|
|
- formula_img = None
|
|
|
- if os.path.exists(pdf_file_path):
|
|
|
- formula_img = pdf2img(pdf_file_path, img_file_path, is_padding=False)
|
|
|
- if formula_img is not None:
|
|
|
- formula_h, formula_w = formula_img.shape[:-1]
|
|
|
- resize_height = box_height
|
|
|
- resize_width = formula_w * resize_height / formula_h
|
|
|
- formula_img = cv2.resize(
|
|
|
- formula_img, (int(resize_width), int(resize_height))
|
|
|
- )
|
|
|
- formula_h, formula_w = formula_img.shape[:-1]
|
|
|
- pts1 = np.float32(
|
|
|
- [[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]]
|
|
|
- )
|
|
|
- pts2 = np.array(box, dtype=np.float32)
|
|
|
- M = cv2.getPerspectiveTransform(pts1, pts2)
|
|
|
- formula_img = np.array(formula_img, dtype=np.uint8)
|
|
|
- img_right_text = cv2.warpPerspective(
|
|
|
- formula_img,
|
|
|
- M,
|
|
|
- img_size,
|
|
|
- flags=cv2.INTER_NEAREST,
|
|
|
- borderMode=cv2.BORDER_CONSTANT,
|
|
|
- borderValue=(255, 255, 255),
|
|
|
- )
|
|
|
- else:
|
|
|
- img_right_text = draw_box_txt_fine(
|
|
|
- img_size, box, "Rendering Failed", PINGFANG_FONT_FILE_PATH
|
|
|
- )
|
|
|
- return img_right_text
|
|
|
+def draw_box_txt_fine(img_size: tuple, box: list, txt: str, font_path: str):
|
|
|
+ """
|
|
|
+ Draw box text.
|
|
|
|
|
|
+ Args:
|
|
|
+ img_size (tuple): Size of the image as (width, height).
|
|
|
+ box (list): List of four points defining the box, each point is a tuple (x, y).
|
|
|
+ txt (str): The text to draw inside the box.
|
|
|
+ font_path (str): Path to the font file to be used for drawing text.
|
|
|
|
|
|
-def draw_box_txt_fine(img_size, box, txt, font_path):
|
|
|
- """draw box text"""
|
|
|
+ Returns:
|
|
|
+ np.ndarray: Image array with the text drawn and transformed to fit the box.
|
|
|
+ """
|
|
|
box_height = int(
|
|
|
math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
|
|
|
)
|
|
|
@@ -302,8 +360,18 @@ def draw_box_txt_fine(img_size, box, txt, font_path):
|
|
|
return img_right_text
|
|
|
|
|
|
|
|
|
-def create_font(txt, sz, font_path):
|
|
|
- """create font"""
|
|
|
+def create_font(txt: str, sz: tuple, font_path: str) -> ImageFont.FreeTypeFont:
|
|
|
+ """
|
|
|
+ Creates a font object with a size that ensures the text fits within the specified dimensions.
|
|
|
+
|
|
|
+ Args:
|
|
|
+ txt (str): The text to fit.
|
|
|
+ sz (tuple): The target size as (width, height).
|
|
|
+ font_path (str): The path to the font file.
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ ImageFont.FreeTypeFont: A PIL font object at the appropriate size.
|
|
|
+ """
|
|
|
font_size = int(sz[1] * 0.8)
|
|
|
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
|
|
|
if int(PIL.__version__.split(".")[0]) < 10:
|