|
|
@@ -422,8 +422,8 @@ class TextFiller:
|
|
|
need_reocr = True
|
|
|
reocr_reason = "强制全量OCR"
|
|
|
else:
|
|
|
- # 1. 文本为空
|
|
|
- if not t or not t.strip():
|
|
|
+ # 1. 文本为空且置信度不是极高
|
|
|
+ if (not t or not t.strip()) and scores[i] < 0.95:
|
|
|
need_reocr = True
|
|
|
reocr_reason = "空文本"
|
|
|
# 2. 置信度过低
|
|
|
@@ -431,7 +431,7 @@ class TextFiller:
|
|
|
need_reocr = True
|
|
|
reocr_reason = "低置信度"
|
|
|
# 3. 竖排单元格 (高宽比 > 2.5) 且置信度不是极高
|
|
|
- elif h_box > w_box * 2.5 and scores[i] < 0.98:
|
|
|
+ elif h_box > w_box * 2.5 and scores[i] < 0.95:
|
|
|
need_reocr = True
|
|
|
reocr_reason = "竖排文本"
|
|
|
# 4. OCR 误合并:OCR box 跨多个单元格或过大
|
|
|
@@ -468,8 +468,9 @@ class TextFiller:
|
|
|
logger.debug(f"单元格({texts[i] if i < len(texts) and len(texts[i]) else 'empty'}) {i} 裁剪后图像过小,放大至 {cw}x{ch} 像素")
|
|
|
|
|
|
# 竖排文本旋转为横排
|
|
|
- if ch > cw * 2.0:
|
|
|
- cell_img = cv2.rotate(cell_img, cv2.ROTATE_90_COUNTERCLOCKWISE)
|
|
|
+ # 由于表格已经是正视的,不需要再考虑旋转角度
|
|
|
+ # if ch > cw * 2.0:
|
|
|
+ # cell_img = cv2.rotate(cell_img, cv2.ROTATE_90_COUNTERCLOCKWISE)
|
|
|
|
|
|
crop_list.append(cell_img)
|
|
|
crop_indices.append(i)
|