Bläddra i källkod

span重叠删除的阈值0.8->0.5

赵小蒙 1 år sedan
förälder
incheckning
070139a5bc
1 ändrade filer med 1 tillägg och 1 borttagningar
  1. 1 1
      magic_pdf/pre_proc/ocr_dict_merge.py

+ 1 - 1
magic_pdf/pre_proc/ocr_dict_merge.py

@@ -9,7 +9,7 @@ def remove_overlaps_min_spans(spans):
     for span1 in spans.copy():
     for span1 in spans.copy():
         for span2 in spans.copy():
         for span2 in spans.copy():
             if span1 != span2:
             if span1 != span2:
-                overlap_box = get_minbox_if_overlap_by_ratio(span1['bbox'], span2['bbox'], 0.8)
+                overlap_box = get_minbox_if_overlap_by_ratio(span1['bbox'], span2['bbox'], 0.5)
                 if overlap_box is not None:
                 if overlap_box is not None:
                     bbox_to_remove = next((span for span in spans if span['bbox'] == overlap_box), None)
                     bbox_to_remove = next((span for span in spans if span['bbox'] == overlap_box), None)
                     if bbox_to_remove is not None:
                     if bbox_to_remove is not None: