import os import cv2 import numpy as np from loguru import logger from rapid_table import RapidTable, RapidTableInput from mineru.utils.enum_class import ModelPath from mineru.utils.models_download_utils import auto_download_and_get_model_root_path class RapidTableModel(object): def __init__(self, ocr_engine): slanet_plus_model_path = os.path.join(auto_download_and_get_model_root_path(ModelPath.slanet_plus), ModelPath.slanet_plus) input_args = RapidTableInput(model_type='slanet_plus', model_path=slanet_plus_model_path) self.table_model = RapidTable(input_args) self.ocr_engine = ocr_engine def predict(self, image): bgr_image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR) # First check the overall image aspect ratio (height/width) img_height, img_width = bgr_image.shape[:2] img_aspect_ratio = img_height / img_width if img_width > 0 else 1.0 img_is_portrait = img_aspect_ratio > 1.2 if img_is_portrait: det_res = self.ocr_engine.ocr(bgr_image, rec=False)[0] # Check if table is rotated by analyzing text box aspect ratios is_rotated = False if det_res: vertical_count = 0 for box_ocr_res in det_res: p1, p2, p3, p4 = box_ocr_res # Calculate width and height width = p3[0] - p1[0] height = p3[1] - p1[1] aspect_ratio = width / height if height > 0 else 1.0 # Count vertical vs horizontal text boxes if aspect_ratio < 0.8: # Taller than wide - vertical text vertical_count += 1 # elif aspect_ratio > 1.2: # Wider than tall - horizontal text # horizontal_count += 1 # If we have more vertical text boxes than horizontal ones, # and vertical ones are significant, table might be rotated if vertical_count >= len(det_res) * 0.3: is_rotated = True # logger.debug(f"Text orientation analysis: vertical={vertical_count}, det_res={len(det_res)}, rotated={is_rotated}") # Rotate image if necessary if is_rotated: # logger.debug("Table appears to be in portrait orientation, rotating 90 degrees clockwise") image = cv2.rotate(np.asarray(image), cv2.ROTATE_90_CLOCKWISE) bgr_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Continue with OCR on potentially rotated image ocr_result = self.ocr_engine.ocr(bgr_image)[0] if ocr_result: ocr_result = [[item[0], item[1][0], item[1][1]] for item in ocr_result if len(item) == 2 and isinstance(item[1], tuple)] else: ocr_result = None if ocr_result: table_results = self.table_model(np.asarray(image), ocr_result) html_code = table_results.pred_html table_cell_bboxes = table_results.cell_bboxes logic_points = table_results.logic_points elapse = table_results.elapse return html_code, table_cell_bboxes, logic_points, elapse else: return None, None, None, None