6 месяцев назад · 752de6fa64
--- a/ocr_tools/universal_doc_parser/models/adapters/wired_table/text_filling.py
+++ b/ocr_tools/universal_doc_parser/models/adapters/wired_table/text_filling.py
@@ -422,8 +422,8 @@ class TextFiller:
 
				                     need_reocr = True
			
 
				                     reocr_reason = "强制全量OCR"
			
 
				                 else:
			
 
				-                    # 1. 文本为空
			
 
				-                    if not t or not t.strip():
			
 
				+                    # 1. 文本为空且置信度不是极高
			
 
				+                    if (not t or not t.strip()) and scores[i] < 0.95:
			
 
				                         need_reocr = True
			
 
				                         reocr_reason = "空文本"
			
 
				                     # 2. 置信度过低
			
@@ -431,7 +431,7 @@ class TextFiller:
 
				                         need_reocr = True
			
 
				                         reocr_reason = "低置信度"
			
 
				                     # 3. 竖排单元格 (高宽比 > 2.5) 且置信度不是极高
			
 
				-                    elif h_box > w_box * 2.5 and scores[i] < 0.98:
			
 
				+                    elif h_box > w_box * 2.5 and scores[i] < 0.95:
			
 
				                         need_reocr = True
			
 
				                         reocr_reason = "竖排文本"
			
 
				                     # 4. OCR 误合并：OCR box 跨多个单元格或过大
			
@@ -468,8 +468,9 @@ class TextFiller:
 
				                     logger.debug(f"单元格({texts[i] if i < len(texts) and len(texts[i]) else 'empty'}) {i} 裁剪后图像过小，放大至 {cw}x{ch} 像素")
			
 
				 
			
 
				                 # 竖排文本旋转为横排
			
 
				-                if ch > cw * 2.0:
			
 
				-                    cell_img = cv2.rotate(cell_img, cv2.ROTATE_90_COUNTERCLOCKWISE)
			
 
				+                # 由于表格已经是正视的，不需要再考虑旋转角度
			
 
				+                # if ch > cw * 2.0:
			
 
				+                #     cell_img = cv2.rotate(cell_img, cv2.ROTATE_90_COUNTERCLOCKWISE)
			
 
				 
			
 
				                 crop_list.append(cell_img)
			
 
				                 crop_indices.append(i)