Browse Source

Merge pull request #1046 from myhloli/dev

refactor(ocr_dict_merge): add threshold parameter for line merging
Xiaomeng Zhao 1 year ago
parent
commit
a8ea5d4aa1
1 changed files with 2 additions and 2 deletions
  1. 2 2
      magic_pdf/pre_proc/ocr_dict_merge.py

+ 2 - 2
magic_pdf/pre_proc/ocr_dict_merge.py

@@ -24,7 +24,7 @@ def line_sort_spans_by_left_to_right(lines):
     return line_objects
 
 
-def merge_spans_to_line(spans):
+def merge_spans_to_line(spans, threshold=0.6):
     if len(spans) == 0:
         return []
     else:
@@ -49,7 +49,7 @@ def merge_spans_to_line(spans):
                 continue
 
             # 如果当前的span与当前行的最后一个span在y轴上重叠,则添加到当前行
-            if __is_overlaps_y_exceeds_threshold(span['bbox'], current_line[-1]['bbox'], 0.5):
+            if __is_overlaps_y_exceeds_threshold(span['bbox'], current_line[-1]['bbox'], threshold):
                 current_line.append(span)
             else:
                 # 否则,开始新行