Kaynağa Gözat

refactor(pdf_parse): improve code readability and maintainability

myhloli 11 ay önce
ebeveyn
işleme
97bcc8b23b
1 değiştirilmiş dosya ile 9 ekleme ve 10 silme
  1. 9 10
      magic_pdf/pdf_parse_union_core_v2.py

+ 9 - 10
magic_pdf/pdf_parse_union_core_v2.py

@@ -179,16 +179,15 @@ def txt_spans_extract_v2(pdf_page, spans, all_bboxes, all_discarded_blocks, lang
     useful_spans = []
     unuseful_spans = []
     for span in spans:
-            for block in all_bboxes + all_discarded_blocks:
-                if block[7] in [BlockType.ImageBody, BlockType.TableBody, BlockType.InterlineEquation]:
-                    continue
-                overlap_ratio = calculate_overlap_area_in_bbox1_area_ratio(span['bbox'], block[0:4])
-                if overlap_ratio > 0.5:
-                    if block in all_bboxes:
-                        useful_spans.append(span)
-                    else:
-                        unuseful_spans.append(span)
-                    break
+        for block in all_bboxes + all_discarded_blocks:
+            if block[7] in [BlockType.ImageBody, BlockType.TableBody, BlockType.InterlineEquation]:
+                continue
+            if calculate_overlap_area_in_bbox1_area_ratio(span['bbox'], block[0:4]) > 0.5:
+                if block in all_bboxes:
+                    useful_spans.append(span)
+                else:
+                    unuseful_spans.append(span)
+                break
 
     new_spans = []