1 year ago · 1807126e7f
--- a/magic_pdf/model/pdf_extract_kit.py
+++ b/magic_pdf/model/pdf_extract_kit.py
@@ -83,7 +83,7 @@ def doclayout_yolo_model_init(weight):
 
				     return model
			
 
				 
			
 
				 
			
 
				-def ocr_model_init(show_log: bool = False, det_db_box_thresh=0.3, lang=None, use_dilation=True, det_db_unclip_ratio=2.4):
			
 
				+def ocr_model_init(show_log: bool = False, det_db_box_thresh=0.3, lang=None, use_dilation=True, det_db_unclip_ratio=1.8):
			
 
				     if lang is not None:
			
 
				         model = ModifiedPaddleOCR(show_log=show_log, det_db_box_thresh=det_db_box_thresh, lang=lang, use_dilation=use_dilation, det_db_unclip_ratio=det_db_unclip_ratio)
			
 
				     else:
			
--- a/magic_pdf/pre_proc/ocr_dict_merge.py
+++ b/magic_pdf/pre_proc/ocr_dict_merge.py
@@ -49,7 +49,7 @@ def merge_spans_to_line(spans):
 
				                 continue
			
 
				 
			
 
				             # 如果当前的span与当前行的最后一个span在y轴上重叠，则添加到当前行
			
 
				-            if __is_overlaps_y_exceeds_threshold(span['bbox'], current_line[-1]['bbox'], 0.6):
			
 
				+            if __is_overlaps_y_exceeds_threshold(span['bbox'], current_line[-1]['bbox'], 0.5):
			
 
				                 current_line.append(span)
			
 
				             else:
			
 
				                 # 否则，开始新行