Răsfoiți Sursa

fix: improve docstring for gather_ocr_list_by_row and refactor image loading logic in wired_table_rec_utils.py

myhloli 3 luni în urmă
părinte
comite
86b20f3283

+ 9 - 2
mineru/model/table/rec/unet_table/table_recover_utils.py

@@ -223,8 +223,15 @@ def match_ocr_cell(dt_rec_boxes: List[List[Union[Any, str]]], pred_bboxes: np.nd
 
 def gather_ocr_list_by_row(ocr_list: List[Any], threshold: float = 0.2) -> List[Any]:
     """
-    :param ocr_list: [[[xmin,ymin,xmax,ymax], text]]
-    :return:
+        Groups OCR results by row based on the vertical (y-axis) overlap of their bounding boxes.
+    Args:
+        ocr_list (List[Any]): A list of OCR results, where each item is a list containing a bounding box
+            in the format [xmin, ymin, xmax, ymax] and the recognized text.
+        threshold (float, optional): The threshold for determining if two boxes are in the same row,
+            based on their y-axis overlap. Default is 0.2.
+    Returns:
+        List[Any]: A new list of OCR results where texts in the same row are merged, and their bounding
+            boxes are updated to encompass the merged text.
     """
     for i in range(len(ocr_list)):
         if not ocr_list[i]:

+ 8 - 9
mineru/model/table/rec/unet_table/wired_table_rec_utils.py

@@ -115,11 +115,6 @@ class LoadImage:
         pass
 
     def __call__(self, img: InputType) -> np.ndarray:
-        if not isinstance(img, InputType.__args__):
-            raise LoadImageError(
-                f"The img type {type(img)} does not in {InputType.__args__}"
-            )
-
         img = self.load_img(img)
         img = self.convert_img(img)
         return img
@@ -133,14 +128,18 @@ class LoadImage:
                 raise LoadImageError(f"cannot identify image file {img}") from e
             return img
 
-        if isinstance(img, bytes):
-            img = np.array(Image.open(BytesIO(img)))
+        elif isinstance(img, bytes):
+            try:
+                img = np.array(Image.open(BytesIO(img)))
+            except UnidentifiedImageError as e:
+                raise LoadImageError(f"cannot identify image from bytes data") from e
             return img
 
-        if isinstance(img, np.ndarray):
+        elif isinstance(img, np.ndarray):
             return img
 
-        raise LoadImageError(f"{type(img)} is not supported!")
+        else:
+            raise LoadImageError(f"{type(img)} is not supported!")
 
     def convert_img(self, img: np.ndarray):
         if img.ndim == 2: