| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889 |
- import os
- import html
- import cv2
- import numpy as np
- from loguru import logger
- from rapid_table import RapidTable, RapidTableInput
- from mineru.utils.enum_class import ModelPath
- from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
- def escape_html(input_string):
- """Escape HTML Entities."""
- return html.escape(input_string)
- class RapidTableModel(object):
- def __init__(self, ocr_engine):
- slanet_plus_model_path = os.path.join(auto_download_and_get_model_root_path(ModelPath.slanet_plus), ModelPath.slanet_plus)
- input_args = RapidTableInput(model_type='slanet_plus', model_path=slanet_plus_model_path)
- self.table_model = RapidTable(input_args)
- self.ocr_engine = ocr_engine
- def predict(self, image):
- bgr_image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
- # First check the overall image aspect ratio (height/width)
- img_height, img_width = bgr_image.shape[:2]
- img_aspect_ratio = img_height / img_width if img_width > 0 else 1.0
- img_is_portrait = img_aspect_ratio > 1.2
- if img_is_portrait:
- det_res = self.ocr_engine.ocr(bgr_image, rec=False)[0]
- # Check if table is rotated by analyzing text box aspect ratios
- is_rotated = False
- if det_res:
- vertical_count = 0
- for box_ocr_res in det_res:
- p1, p2, p3, p4 = box_ocr_res
- # Calculate width and height
- width = p3[0] - p1[0]
- height = p3[1] - p1[1]
- aspect_ratio = width / height if height > 0 else 1.0
- # Count vertical vs horizontal text boxes
- if aspect_ratio < 0.8: # Taller than wide - vertical text
- vertical_count += 1
- # elif aspect_ratio > 1.2: # Wider than tall - horizontal text
- # horizontal_count += 1
- # If we have more vertical text boxes than horizontal ones,
- # and vertical ones are significant, table might be rotated
- if vertical_count >= len(det_res) * 0.3:
- is_rotated = True
- # logger.debug(f"Text orientation analysis: vertical={vertical_count}, det_res={len(det_res)}, rotated={is_rotated}")
- # Rotate image if necessary
- if is_rotated:
- # logger.debug("Table appears to be in portrait orientation, rotating 90 degrees clockwise")
- image = cv2.rotate(np.asarray(image), cv2.ROTATE_90_CLOCKWISE)
- bgr_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
- # Continue with OCR on potentially rotated image
- ocr_result = self.ocr_engine.ocr(bgr_image)[0]
- if ocr_result:
- ocr_result = [[item[0], escape_html(item[1][0]), item[1][1]] for item in ocr_result if
- len(item) == 2 and isinstance(item[1], tuple)]
- else:
- ocr_result = None
- if ocr_result:
- try:
- table_results = self.table_model(np.asarray(image), ocr_result)
- html_code = table_results.pred_html
- table_cell_bboxes = table_results.cell_bboxes
- logic_points = table_results.logic_points
- elapse = table_results.elapse
- return html_code, table_cell_bboxes, logic_points, elapse
- except Exception as e:
- logger.exception(e)
- return None, None, None, None
|