Browse Source

fix remove error

赵小蒙 1 year ago
parent
commit
1936703b71

+ 4 - 3
magic_pdf/pre_proc/ocr_detect_all_bboxes.py

@@ -73,9 +73,9 @@ def remove_need_drop_blocks(all_bboxes, discarded_blocks):
             if calculate_overlap_area_in_bbox1_area_ratio(block_bbox, discarded_block['bbox']) > 0.6:
                 need_remove.append(block)
 
-    for block in need_remove:
-        all_bboxes.remove(block)
-
+    if len(need_remove) > 0:
+        for block in need_remove:
+            all_bboxes.remove(block)
     return all_bboxes
 
 
@@ -92,6 +92,7 @@ def remove_overlaps_min_blocks(all_bboxes):
                     bbox_to_remove = next((block for block in all_bboxes if block[:4] == overlap_box), None)
                     if bbox_to_remove is not None:
                         need_remove.append(bbox_to_remove)
+
     if len(need_remove) > 0:
         for block in need_remove:
             all_bboxes.remove(block)

+ 5 - 3
magic_pdf/pre_proc/ocr_span_list_modify.py

@@ -18,10 +18,11 @@ def remove_overlaps_min_spans(spans):
                     if bbox_to_remove is not None:
                         dropped_spans.append(bbox_to_remove)
 
-    if len(dropped_spans > 0):
+    if len(dropped_spans) > 0:
         for dropped_span in dropped_spans:
             spans.remove(dropped_span)
             dropped_span['tag'] = DropTag.SPAN_OVERLAP
+
     return spans, dropped_spans
 
 
@@ -35,8 +36,9 @@ def remove_spans_by_bboxes(spans, need_remove_spans_bboxes):
                 need_remove_spans.append(span)
                 break
 
-    for span in need_remove_spans:
-        spans.remove(span)
+    if len(need_remove_spans) > 0:
+        for span in need_remove_spans:
+            spans.remove(span)
 
     return spans