Browse Source

refactor(ocr): adjust OCR processing parameters

- Lower the Y-axis overlap threshold for merging spans into lines from0.6 to 0.5
- Reduce the unclip ratio for OCR detection from 2.4 to 1.8
myhloli 1 year ago
parent
commit
1807126e7f
2 changed files with 2 additions and 2 deletions
  1. 1 1
      magic_pdf/model/pdf_extract_kit.py
  2. 1 1
      magic_pdf/pre_proc/ocr_dict_merge.py

+ 1 - 1
magic_pdf/model/pdf_extract_kit.py

@@ -83,7 +83,7 @@ def doclayout_yolo_model_init(weight):
     return model
 
 
-def ocr_model_init(show_log: bool = False, det_db_box_thresh=0.3, lang=None, use_dilation=True, det_db_unclip_ratio=2.4):
+def ocr_model_init(show_log: bool = False, det_db_box_thresh=0.3, lang=None, use_dilation=True, det_db_unclip_ratio=1.8):
     if lang is not None:
         model = ModifiedPaddleOCR(show_log=show_log, det_db_box_thresh=det_db_box_thresh, lang=lang, use_dilation=use_dilation, det_db_unclip_ratio=det_db_unclip_ratio)
     else:

+ 1 - 1
magic_pdf/pre_proc/ocr_dict_merge.py

@@ -49,7 +49,7 @@ def merge_spans_to_line(spans):
                 continue
 
             # 如果当前的span与当前行的最后一个span在y轴上重叠,则添加到当前行
-            if __is_overlaps_y_exceeds_threshold(span['bbox'], current_line[-1]['bbox'], 0.6):
+            if __is_overlaps_y_exceeds_threshold(span['bbox'], current_line[-1]['bbox'], 0.5):
                 current_line.append(span)
             else:
                 # 否则,开始新行