Browse Source

refactor: optimize overlap detection logic in block_pre_proc.py for efficiency

myhloli 4 months ago
parent
commit
df15393cac
1 changed files with 26 additions and 25 deletions
  1. 26 25
      mineru/utils/block_pre_proc.py

+ 26 - 25
mineru/utils/block_pre_proc.py

@@ -213,32 +213,33 @@ def remove_overlaps_min_blocks(all_bboxes):
     #  重叠block,小的不能直接删除,需要和大的那个合并成一个更大的。
     #  删除重叠blocks中较小的那些
     need_remove = []
-    for block1 in all_bboxes:
-        for block2 in all_bboxes:
-            if block1 != block2:
-                block1_bbox = block1[:4]
-                block2_bbox = block2[:4]
-                overlap_box = get_minbox_if_overlap_by_ratio(
-                    block1_bbox, block2_bbox, 0.8
+    for i in range(len(all_bboxes)):
+        for j in range(i + 1, len(all_bboxes)):
+            block1 = all_bboxes[i]
+            block2 = all_bboxes[j]
+            block1_bbox = block1[:4]
+            block2_bbox = block2[:4]
+            overlap_box = get_minbox_if_overlap_by_ratio(
+                block1_bbox, block2_bbox, 0.8
+            )
+            if overlap_box is not None:
+                block_to_remove = next(
+                    (block for block in all_bboxes if block[:4] == overlap_box),
+                    None,
                 )
-                if overlap_box is not None:
-                    block_to_remove = next(
-                        (block for block in all_bboxes if block[:4] == overlap_box),
-                        None,
-                    )
-                    if (
-                        block_to_remove is not None
-                        and block_to_remove not in need_remove
-                    ):
-                        large_block = block1 if block1 != block_to_remove else block2
-                        x1, y1, x2, y2 = large_block[:4]
-                        sx1, sy1, sx2, sy2 = block_to_remove[:4]
-                        x1 = min(x1, sx1)
-                        y1 = min(y1, sy1)
-                        x2 = max(x2, sx2)
-                        y2 = max(y2, sy2)
-                        large_block[:4] = [x1, y1, x2, y2]
-                        need_remove.append(block_to_remove)
+                if (
+                    block_to_remove is not None
+                    and block_to_remove not in need_remove
+                ):
+                    large_block = block1 if block1 != block_to_remove else block2
+                    x1, y1, x2, y2 = large_block[:4]
+                    sx1, sy1, sx2, sy2 = block_to_remove[:4]
+                    x1 = min(x1, sx1)
+                    y1 = min(y1, sy1)
+                    x2 = max(x2, sx2)
+                    y2 = max(y2, sy2)
+                    large_block[:4] = [x1, y1, x2, y2]
+                    need_remove.append(block_to_remove)
 
     if len(need_remove) > 0:
         for block in need_remove: