пре 5 месеци · 975ab2f230
--- a/ocr_tools/universal_doc_parser/models/adapters/wired_table/text_filling.py
+++ b/ocr_tools/universal_doc_parser/models/adapters/wired_table/text_filling.py
@@ -424,26 +424,26 @@ class TextFiller:
 
				                     need_reocr = True
			
 
				                     reocr_reason = "强制全量OCR"
			
 
				                 else:
			
 
				-                    # 1. 文本为空且置信度不是极高
			
 
				-                    if (not t or not t.strip()) and scores[i] < 0.95:
			
 
				+                    # 1. OCR 误合并：OCR box 跨多个单元格或过大, 跨单元格中的一个单元格的文本可能是''
			
 
				+                    if i in need_reocr_indices:
			
 
				+                        need_reocr = True
			
 
				+                        reocr_reason = "OCR误合并"
			
 
				+                    # 2. 文本为空且置信度不是极高
			
 
				+                    elif (not t or not t.strip()) and scores[i] < 0.95:
			
 
				                         if pdf_type == 'txt':
			
 
				                             # PDF文本模式下，空文本不触发二次OCR
			
 
				                             need_reocr = False
			
 
				                         else:
			
 
				                             need_reocr = True
			
 
				                             reocr_reason = "空文本"
			
 
				-                    # 2. 置信度过低
			
 
				+                    # 3. 置信度过低
			
 
				                     elif scores[i] < trigger_score_thresh:
			
 
				                         need_reocr = True
			
 
				                         reocr_reason = "低置信度"
			
 
				-                    # 3. 竖排单元格 (高宽比 > 2.5) 且置信度不是极高
			
 
				+                    # 4. 竖排单元格 (高宽比 > 2.5) 且置信度不是极高
			
 
				                     elif h_box > w_box * 2.5 and scores[i] < 0.95:
			
 
				                         need_reocr = True
			
 
				                         reocr_reason = "竖排文本"
			
 
				-                    # 4. OCR 误合并：OCR box 跨多个单元格或过大
			
 
				-                    elif i in need_reocr_indices:
			
 
				-                        need_reocr = True
			
 
				-                        reocr_reason = "OCR误合并"
			
 
				 
			
 
				                 if not need_reocr:
			
 
				                     continue