Browse Source

refactor(data/utils.py): remove unnecessary decorator and improve image loading

- Remove unused @ImportPIL decorator from load_images_from_pdf function
- Update image shape handling in YOLOv11.py for better compatibility

These changes improve code readability and performance without altering the original functionality.
myhloli 8 tháng trước cách đây
mục cha
commit
4f7ef05d12

+ 0 - 1
magic_pdf/data/utils.py

@@ -29,7 +29,6 @@ def fitz_doc_to_image(doc, dpi=200) -> dict:
 
     return img_dict
 
-@ImportPIL
 def load_images_from_pdf(pdf_bytes: bytes, dpi=200, start_page_id=0, end_page_id=None) -> list:
     images = []
     with fitz.open('pdf', pdf_bytes) as doc:

+ 4 - 10
magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py

@@ -2,7 +2,7 @@
 import time
 from collections import Counter
 from uuid import uuid4
-
+import cv2
 import numpy as np
 import torch
 from loguru import logger
@@ -29,7 +29,7 @@ def split_images(image, result_images=None):
     if result_images is None:
         result_images = []
 
-    width, height = image.shape[:2]
+    height, width = image.shape[:2]
     long_side = max(width, height)  # 获取较长边长度
 
     if long_side <= 400:
@@ -68,14 +68,8 @@ def resize_images_to_224(image):
     Works directly with NumPy arrays.
     """
     try:
-        # Handle numpy array directly
-        if len(image.shape) == 3:  # Color image
-            height, width, channels = image.shape
-        else:  # Grayscale image
-            height, width = image.shape
-            image = np.stack([image] * 3, axis=2)  # Convert to RGB
-
-        import cv2
+        height, width = image.shape[:2]
+
         if width < 224 or height < 224:
             # Create black background
             new_image = np.zeros((224, 224, 3), dtype=np.uint8)