SHA1
--- a/ocr_tools/universal_doc_parser/config/bank_statement_wired_unet.yaml
+++ b/ocr_tools/universal_doc_parser/config/bank_statement_wired_unet.yaml
@@ -36,7 +36,7 @@ table_recognition_wired:
 
															   need_ocr: true
														
 
															   row_threshold: 10
														
 
															   col_threshold: 15
														
 
															-  ocr_conf_threshold: 0.5
														
 
															+  ocr_conf_threshold: 0.8       # 单元格 OCR 置信度阈值
														
 
															   cell_crop_margin: 2
														
 
															   use_custom_postprocess: true  # 是否使用自定义后处理（默认启用）
														
--- a/ocr_tools/universal_doc_parser/core/coordinate_utils.py
+++ b/ocr_tools/universal_doc_parser/core/coordinate_utils.py
@@ -200,20 +200,20 @@ class CoordinateUtils:
 
															     def crop_region(image: np.ndarray, bbox: List[float], padding: int = 0) -> np.ndarray:
														
 
															         """
														
 
															         裁剪图像区域
														
 
															-
														
 
															+        
														
 
															         Args:
														
 
															             image: 原始图像
														
 
															             bbox: 裁剪区域 [x1, y1, x2, y2]
														
 
															             padding: 边缘padding（像素），可以为正数（扩展裁剪区域）或负数（收缩裁剪区域）
														
 
															-
														
 
															+            
														
 
															         Returns:
														
 
															             裁剪后的图像
														
 
															         """
														
 
															         if len(bbox) < 4:
														
 
															             return image
														
 
															-
														
 
															+        
														
 
															         h, w = image.shape[:2]
														
 
															-
														
 
															+        
														
 
															         # 解析padding（支持单个值或四个值）
														
 
															         if isinstance(padding, (int, float)):
														
 
															             pad_left = pad_right = pad_top = pad_bottom = int(padding)
														
@@ -238,7 +238,7 @@ class CoordinateUtils:
 
															         # 检查是否有效区域
														
 
															         if x2 <= x1 or y2 <= y1:
														
 
															             return image
														
 
															-
														
 
															+        
														
 
															         return image[y1:y2, x1:x2]
														
 
															     @staticmethod
														
--- a/ocr_tools/universal_doc_parser/core/element_processors.py
+++ b/ocr_tools/universal_doc_parser/core/element_processors.py
@@ -204,8 +204,11 @@ class ElementProcessors:
 
															         table_height = bbox[3] - bbox[1]
														
 
															         # 为倾斜图片添加padding，确保角落内容不被切掉
														
 
															-        # padding = 表格宽度的1% + 表格高度的1%，最小20像素
														
 
															-        crop_padding = max(20, int(min(table_width, table_height) * 0.01))
														
 
															+        # 使用固定的适度padding（10像素），平衡边缘保护和噪声控制
														
 
															+        # - padding太大（如20+）：会引入较多背景噪声，且坐标转换复杂度增加
														
 
															+        # - padding=0：边缘内容可能被切断，UNet边缘效应明显
														
 
															+        # - padding=10：最佳平衡点，足以保护边缘1-2个像素的偏移，噪声可控
														
 
															+        crop_padding = 10
														
 
															         cropped_table = CoordinateUtils.crop_region(image, bbox, padding=crop_padding)
														
 
															         table_angle = 0
														
--- a/ocr_tools/universal_doc_parser/main_v2.py
+++ b/ocr_tools/universal_doc_parser/main_v2.py
@@ -416,14 +416,14 @@ if __name__ == "__main__":
 
															             # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_005.png",
														
 
															             # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003_270.png",
														
 
															             # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003_270_skew(-0.4).png",
														
 
															-            # "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
														
 
															-            # "output_dir": "./output/2023年度报告母公司/bank_statement_wired_unet",
														
 
															+            "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
														
 
															+            "output_dir": "./output/2023年度报告母公司/bank_statement_wired_unet",
														
 
															             # "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
														
 
															             # "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_yusys_v2",
														
 
															-            "input": "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水.pdf",
														
 
															-            "output_dir": "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/bank_statement_wired_unet",
														
 
															+            # "input": "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水.pdf",
														
 
															+            # "output_dir": "/Users/zhch158/workspace/data/流水分析/A用户_单元格扫描流水/bank_statement_wired_unet",
														
 
															             # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/600916_中国黄金_2022年报_page_096.png",
														
 
															             # "output_dir": "./output/600916_中国黄金_2022年报/bank_statement_wired_unet",
														
--- a/ocr_tools/universal_doc_parser/models/adapters/mineru_wired_table.py
+++ b/ocr_tools/universal_doc_parser/models/adapters/mineru_wired_table.py
@@ -216,6 +216,39 @@ class MinerUWiredTableRecognizer:
 
															                 preprocessed_w = int(w_up_ * scale_factor + 0.5)
														
 
															                 preprocessed_h = int(h_up_ * scale_factor + 0.5)
														
 
															+                # 关键：手动调用 resize_img 获取实际的 w_scale 和 h_scale
														
 
															+                # 因为 keep_ratio=True 时，w_scale 和 h_scale 可能略有不同
														
 
															+                try:
														
 
															+                    from mineru.model.table.rec.unet_table.utils import resize_img
														
 
															+                    img_preprocessed_test, w_scale_actual, h_scale_actual = resize_img(
														
 
															+                        img_up_, (inp_height, inp_width), keep_ratio=True
														
 
															+                    )
														
 
															+                    # 类型检查：确保是numpy数组
														
 
															+                    if isinstance(img_preprocessed_test, np.ndarray):
														
 
															+                        preprocessed_h_actual, preprocessed_w_actual = img_preprocessed_test.shape[:2]
														
 
															+                    else:
														
 
															+                        # 如果不是numpy数组，使用计算值
														
 
															+                        preprocessed_h_actual = preprocessed_h
														
 
															+                        preprocessed_w_actual = preprocessed_w
														
 
															+                    scale_diff = abs(w_scale_actual - h_scale_actual)
														
 
															+                    logger.info(
														
 
															+                        f"🔍 UNet预处理缩放因子验证: "
														
 
															+                        f"w_scale={w_scale_actual:.6f}, h_scale={h_scale_actual:.6f}, "
														
 
															+                        f"差异={scale_diff:.6f}, "
														
 
															+                        f"预处理后实际尺寸=[{preprocessed_h_actual}, {preprocessed_w_actual}]"
														
 
															+                    )
														
 
															+                    if scale_diff > 1e-6:
														
 
															+                        logger.warning(
														
 
															+                            f"⚠️ w_scale 和 h_scale 不相等！这可能导致坐标偏移。"
														
 
															+                            f"w_scale={w_scale_actual:.6f}, h_scale={h_scale_actual:.6f}"
														
 
															+                        )
														
 
															+                except Exception as e:
														
 
															+                    logger.warning(f"无法获取实际缩放因子: {e}")
														
 
															+                    w_scale_actual = scale_factor
														
 
															+                    h_scale_actual = scale_factor
														
 
															+                    preprocessed_h_actual = preprocessed_h
														
 
															+                    preprocessed_w_actual = preprocessed_w
														
 
															+                
														
 
															                 img_info = wired_rec.table_structure.preprocess(img_obj)
														
 
															                 pred_ = wired_rec.table_structure.infer(img_info)
														
@@ -224,11 +257,15 @@ class MinerUWiredTableRecognizer:
 
															                 # 调试：记录尺寸信息
														
 
															                 pred_h, pred_w = pred_.shape[:2]
														
 
															-                logger.debug(
														
 
															-                    f"UNet 推理: 上采样图像尺寸=[{h_up_}, {w_up_}], "
														
 
															-                    f"预处理后尺寸=[{preprocessed_h}, {preprocessed_w}], "
														
 
															-                    f"预测结果尺寸=[{pred_h}, {pred_w}], "
														
 
															-                    f"缩放因子={scale_factor:.6f}, upscale={upscale:.3f}"
														
 
															+                logger.info(
														
 
															+                    f"🔍 UNet 推理详细日志:\n"
														
 
															+                    f"  - 上采样图像尺寸: [{h_up_}, {w_up_}]\n"
														
 
															+                    f"  - 计算预处理后尺寸: [{preprocessed_h}, {preprocessed_w}]\n"
														
 
															+                    f"  - 实际预处理后尺寸: [{preprocessed_h_actual}, {preprocessed_w_actual}]\n"
														
 
															+                    f"  - 预测结果尺寸: [{pred_h}, {pred_w}]\n"
														
 
															+                    f"  - 计算缩放因子: {scale_factor:.6f}\n"
														
 
															+                    f"  - 实际缩放因子: w_scale={w_scale_actual:.6f}, h_scale={h_scale_actual:.6f}\n"
														
 
															+                    f"  - upscale: {upscale:.3f}"
														
 
															                 )
														
 
															                 # 关键修复：正确地将预测结果 resize 回上采样尺寸
														
@@ -237,28 +274,76 @@ class MinerUWiredTableRecognizer:
 
															                 # 所以我们应该使用 img_up_.shape 来 resize 预测结果
														
 
															                 # 但是，由于预处理时改变了图像尺寸（保持长宽比），我们需要确保 resize 是正确的
														
 
															-                # 验证：检查预测结果尺寸是否与预处理后的尺寸一致
														
 
															-                if pred_h != preprocessed_h or pred_w != preprocessed_w:
														
 
															+                # 验证：检查预测结果尺寸是否与预处理后的尺寸一致（仅用于警告）
														
 
															+                if pred_h != preprocessed_h_actual or pred_w != preprocessed_w_actual:
														
 
															                     logger.warning(
														
 
															-                        f"⚠️ 预测结果尺寸 [{pred_h}, {pred_w}] 与预处理后尺寸 [{preprocessed_h}, {preprocessed_w}] 不一致！"
														
 
															-                        f"这可能导致坐标偏移。使用预处理后尺寸进行 resize。"
														
 
															+                        f"⚠️ 预测结果尺寸 [{pred_h}, {pred_w}] 与预处理后实际尺寸 "
														
 
															+                        f"[{preprocessed_h_actual}, {preprocessed_w_actual}] 不一致！"
														
 
															+                        f"这可能导致坐标偏移。"
														
 
															+                    )
														
 
															+                
														
 
															+                # 修复：统一将预测结果resize回上采样尺寸，避免舍入误差
														
 
															+                # 理论上：target_size = pred_size / unet_scale ≈ upsampled_size
														
 
															+                # 但为了确保完全一致，直接使用上采样尺寸作为目标，避免任何舍入误差
														
 
															+                # 这样可以保证：mask坐标系 = 上采样坐标系，坐标转换链路清晰
														
 
															+                hpred_up_ = cv2.resize(hpred_, (w_up_, h_up_), interpolation=cv2.INTER_NEAREST)
														
 
															+                vpred_up_ = cv2.resize(vpred_, (w_up_, h_up_), interpolation=cv2.INTER_NEAREST)
														
 
															+                
														
 
															+                # 记录验证信息：检查理论target尺寸与实际上采样尺寸的差异
														
 
															+                # 这些差异应该非常小（<2像素），如果差异较大说明UNet预处理有问题
														
 
															+                if abs(w_scale_actual - h_scale_actual) > 1e-6:
														
 
															+                    target_w_theoretical = int(pred_w / w_scale_actual + 0.5)
														
 
															+                    target_h_theoretical = int(pred_h / h_scale_actual + 0.5)
														
 
															+                    diff_w = abs(target_w_theoretical - w_up_)
														
 
															+                    diff_h = abs(target_h_theoretical - h_up_)
														
 
															+                    if diff_w > 2 or diff_h > 2:
														
 
															+                        logger.warning(
														
 
															+                            f"⚠️ 理论resize尺寸 [{target_h_theoretical}, {target_w_theoretical}] "
														
 
															+                            f"与上采样尺寸 [{h_up_}, {w_up_}] 差异较大 (diff=[{diff_h}, {diff_w}])！"
														
 
															+                            f"w_scale={w_scale_actual:.6f}, h_scale={h_scale_actual:.6f}"
														
 
															+                        )
														
 
															+                    else:
														
 
															+                        logger.debug(
														
 
															+                            f"✓ 理论resize尺寸 [{target_h_theoretical}, {target_w_theoretical}] "
														
 
															+                            f"与上采样尺寸 [{h_up_}, {w_up_}] 一致 (diff=[{diff_h}, {diff_w}])"
														
 
															+                        )
														
 
															+                
														
 
															+                # 记录resize后的mask尺寸
														
 
															+                hpred_up_h, hpred_up_w = hpred_up_.shape[:2]
														
 
															+                vpred_up_h, vpred_up_w = vpred_up_.shape[:2]
														
 
															+                logger.info(
														
 
															+                    f"🔍 Resize后mask尺寸: "
														
 
															+                    f"hpred_up=[{hpred_up_h}, {hpred_up_w}], "
														
 
															+                    f"vpred_up=[{vpred_up_h}, {vpred_up_w}], "
														
 
															+                    f"img_up=[{h_up_}, {w_up_}]"
														
 
															+                )
														
 
															+                
														
 
															+                # 详细的坐标转换链路日志
														
 
															+                logger.info(
														
 
															+                    f"🔍 UNet推理完成 - 坐标转换链路验证:\n"
														
 
															+                    f"  [1] 原图尺寸: [{h}, {w}]\n"
														
 
															+                    f"  [2] 上采样尺寸: [{h_up_}, {w_up_}] (upscale={upscale:.3f})\n"
														
 
															+                    f"  [3] UNet输入尺寸: [{pred_h}, {pred_w}] (h_scale={h_scale_actual:.6f}, w_scale={w_scale_actual:.6f})\n"
														
 
															+                    f"  [4] Mask尺寸: [{hpred_up_h}, {hpred_up_w}] (已resize回上采样尺寸)\n"
														
 
															+                    f"  验证: 理论upscale = {h_up_ / h:.3f} (h), {w_up_ / w:.3f} (w)"
														
 
															+                )
														
 
															+                
														
 
															+                # 验证mask尺寸是否与上采样图像一致
														
 
															+                if hpred_up_h != h_up_ or hpred_up_w != w_up_:
														
 
															+                    logger.error(
														
 
															+                        f"❌ hpred_up 尺寸 [{hpred_up_h}, {hpred_up_w}] 与上采样图像尺寸 "
														
 
															+                        f"[{h_up_}, {w_up_}] 不一致！"
														
 
															+                    )
														
 
															+                if vpred_up_h != h_up_ or vpred_up_w != w_up_:
														
 
															+                    logger.error(
														
 
															+                        f"❌ vpred_up 尺寸 [{vpred_up_h}, {vpred_up_w}] 与上采样图像尺寸 "
														
 
															+                        f"[{h_up_}, {w_up_}] 不一致！"
														
 
															                     )
														
 
															-                    # 如果尺寸不一致，先 resize 到预处理后尺寸，再 resize 到上采样尺寸
														
 
															-                    # 但实际上，预测结果应该就是预处理后的尺寸，所以这个警告不应该出现
														
 
															-                    hpred_temp = cv2.resize(hpred_, (preprocessed_w, preprocessed_h), interpolation=cv2.INTER_NEAREST)
														
 
															-                    vpred_temp = cv2.resize(vpred_, (preprocessed_w, preprocessed_h), interpolation=cv2.INTER_NEAREST)
														
 
															-                    hpred_up_ = cv2.resize(hpred_temp, (w_up_, h_up_), interpolation=cv2.INTER_NEAREST)
														
 
															-                    vpred_up_ = cv2.resize(vpred_temp, (w_up_, h_up_), interpolation=cv2.INTER_NEAREST)
														
 
															-                else:
														
 
															-                    # 正常情况：预测结果就是预处理后的尺寸，直接 resize 到上采样尺寸
														
 
															-                    # 这相当于 UNet postprocess 中的逻辑：cv2.resize(pred, (ori_shape[1], ori_shape[0]))
														
 
															-                    hpred_up_ = cv2.resize(hpred_, (w_up_, h_up_), interpolation=cv2.INTER_NEAREST)
														
 
															-                    vpred_up_ = cv2.resize(vpred_, (w_up_, h_up_), interpolation=cv2.INTER_NEAREST)
														
 
															-                return hpred_up_, vpred_up_, img_up_
														
 
															+                return hpred_up_, vpred_up_, img_up_, w_scale_actual, h_scale_actual
														
 
															             # Step 1: 首次运行 UNet 获取初步 mask
														
 
															-            hpred_up, vpred_up, img_up = run_unet(table_image)
														
 
															+            hpred_up, vpred_up, img_up, w_scale_actual, h_scale_actual = run_unet(table_image)
														
 
															             # Step 1.1: 基于 Mask 的高精度倾斜检测与矫正
														
 
															             if self.skew_detector.enable_deskew:
														
@@ -275,7 +360,7 @@ class MinerUWiredTableRecognizer:
 
															                     h, w = table_image.shape[:2]
														
 
															                     # 重新运行 UNet (确保 mask 与矫正后的图完全对齐)
														
 
															-                    hpred_up, vpred_up, img_up = run_unet(table_image)
														
 
															+                    hpred_up, vpred_up, img_up, w_scale_actual, h_scale_actual = run_unet(table_image)
														
 
															                 else:
														
 
															                     logger.debug(f"表格倾斜 {skew_angle:.3f}° 小于阈值，无需矫正")
														
@@ -302,7 +387,7 @@ class MinerUWiredTableRecognizer:
 
															                 debug_dir = dbg.output_dir
														
 
															                 debug_prefix = f"{dbg.prefix}_grid" if dbg.prefix else "grid"
														
 
															-            # 传入原图的实际尺寸，用于计算真实的缩放比例
														
 
															+            # 传入原图的实际尺寸和UNet预处理时的缩放因子，用于计算真实的缩放比例
														
 
															             # 这样可以正确处理 UNet 预处理改变图像尺寸的情况
														
 
															             bboxes = self.grid_recovery.compute_cells_from_lines(
														
 
															                 hpred_up, 
														
@@ -310,6 +395,8 @@ class MinerUWiredTableRecognizer:
 
															                 upscale,
														
 
															                 orig_h=h,
														
 
															                 orig_w=w,
														
 
															+                unet_w_scale=w_scale_actual,
														
 
															+                unet_h_scale=h_scale_actual,
														
 
															                 debug_dir=debug_dir,
														
 
															                 debug_prefix=debug_prefix
														
 
															             )
														
@@ -350,10 +437,13 @@ class MinerUWiredTableRecognizer:
 
															             # 策略调整：默认对所有单元格进行 Cropped OCR，以解决 Header 误合并和文本分配错误问题。
														
 
															             # Full-page OCR 结果仅作为 Fallback（在 text_filling.py 中逻辑是: 如果 Cropped OCR 结果为空或低分，才保留原值）
														
 
															             if hasattr(self, 'ocr_engine') and self.ocr_engine:
														
 
															+                # 从 debug_options 中获取输出目录
														
 
															+                output_dir = dbg.output_dir if dbg and dbg.enabled else None
														
 
															                 texts = self.text_filler.second_pass_ocr_fill(
														
 
															                     table_image, bboxes_merged, texts, scores, 
														
 
															                     need_reocr_indices=need_reocr_indices,
														
 
															-                    force_all=False  # Force Per-Cell OCR
														
 
															+                    force_all=False,  # Force Per-Cell OCR
														
 
															+                    output_dir=output_dir
														
 
															                 )
														
 
															             for i, cell in enumerate(merged_cells):
														
--- a/ocr_tools/universal_doc_parser/models/adapters/wired_table/grid_recovery.py
+++ b/ocr_tools/universal_doc_parser/models/adapters/wired_table/grid_recovery.py
@@ -19,6 +19,8 @@ class GridRecovery:
 
															         upscale: float = 1.0,
														
 
															         orig_h: Optional[int] = None,
														
 
															         orig_w: Optional[int] = None,
														
 
															+        unet_w_scale: Optional[float] = None,
														
 
															+        unet_h_scale: Optional[float] = None,
														
 
															         debug_dir: Optional[str] = None,
														
 
															         debug_prefix: str = "",
														
 
															     ) -> List[List[float]]:
														
@@ -38,6 +40,8 @@ class GridRecovery:
 
															             upscale: 上采样比例（用于向后兼容，如果提供了 orig_h/orig_w 则会被覆盖）
														
 
															             orig_h: 原图的实际高度（用于计算真实的缩放比例）
														
 
															             orig_w: 原图的实际宽度（用于计算真实的缩放比例）
														
 
															+            unet_w_scale: UNet预处理时的宽度缩放因子（可选，用于更精确的坐标转换）
														
 
															+            unet_h_scale: UNet预处理时的高度缩放因子（可选，用于更精确的坐标转换）
														
 
															             debug_dir: 调试输出目录 (Optional)
														
 
															             debug_prefix: 调试文件名前缀 (Optional)
														
@@ -269,7 +273,7 @@ class GridRecovery:
 
															         # Step 5b Debug (After Dilation)
														
 
															         save_debug_image("step05b_dilated", line_mask)
														
 
															-        
														
 
															+
														
 
															         # 6. 反转图像
														
 
															         inv_grid = cv2.bitwise_not(line_mask)
														
@@ -279,26 +283,72 @@ class GridRecovery:
 
															         # 7. 连通域
														
 
															         num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(inv_grid, connectivity=8)
														
 
															-        # 计算真实的缩放比例
														
 
															-        # 如果提供了原图尺寸，使用真实的缩放比例；否则使用 upscale（向后兼容）
														
 
															+        # 计算从mask坐标到原图坐标的缩放比例
														
 
															+        # 核心理解：
														
 
															+        # 1. mask已经被resize回上采样尺寸，所以 mask坐标系 = 上采样坐标系
														
 
															+        # 2. 上采样图像 = 原图 × upscale
														
 
															+        # 3. 因此：scale = mask_size / orig_size = (orig_size × upscale) / orig_size = upscale
														
 
															+        # 
														
 
															+        # 实际计算时，我们使用实际的mask尺寸和原图尺寸来计算，这样可以：
														
 
															+        # - 处理任何微小的舍入误差
														
 
															+        # - 验证resize是否正确（scale应该≈upscale）
														
 
															+        # 
														
 
															+        # 注意：unet_w_scale和unet_h_scale是UNet预处理时的缩放因子（上采样→UNet输入），
														
 
															+        # 它们不应该直接用于mask→原图的坐标转换，因为mask已经被resize回上采样尺寸。
														
 
															+        # 这些参数保留仅用于调试和验证。
														
 
															+        
														
 
															         if orig_h is not None and orig_w is not None and orig_h > 0 and orig_w > 0:
														
 
															+            # 使用实际的mask尺寸和原图尺寸计算缩放比例
														
 
															             scale_h = h / orig_h
														
 
															             scale_w = w / orig_w
														
 
															-            logger.debug(
														
 
															-                f"连通域分析: mask尺寸=[{h}, {w}], 原图尺寸=[{orig_h}, {orig_w}], "
														
 
															-                f"真实缩放比例=[{scale_h:.3f}, {scale_w:.3f}], upscale={upscale:.3f}"
														
 
															+            
														
 
															+            # 验证：scale应该非常接近upscale（允许<0.1%的误差）
														
 
															+            scale_diff_h = abs(scale_h - upscale)
														
 
															+            scale_diff_w = abs(scale_w - upscale)
														
 
															+            scale_diff_ratio_h = scale_diff_h / upscale if upscale > 0 else 0
														
 
															+            scale_diff_ratio_w = scale_diff_w / upscale if upscale > 0 else 0
														
 
															+            
														
 
															+            logger.info(
														
 
															+                f"🔍 连通域坐标转换参数:\n"
														
 
															+                f"  - Mask尺寸: [{h}, {w}]\n"
														
 
															+                f"  - 原图尺寸: [{orig_h}, {orig_w}]\n"
														
 
															+                f"  - 计算scale: h={scale_h:.6f}, w={scale_w:.6f}\n"
														
 
															+                f"  - 理论upscale: {upscale:.6f}\n"
														
 
															+                f"  - 差异: h={scale_diff_h:.6f} ({scale_diff_ratio_h*100:.3f}%), w={scale_diff_w:.6f} ({scale_diff_ratio_w*100:.3f}%)"
														
 
															             )
														
 
															+            
														
 
															+            # 如果差异过大，可能表明resize有问题
														
 
															+            if scale_diff_ratio_h > 0.01 or scale_diff_ratio_w > 0.01:  # >1%差异
														
 
															+                logger.warning(
														
 
															+                    f"⚠️ 计算的scale ([{scale_h:.3f}, {scale_w:.3f}]) 与理论upscale ({upscale:.3f}) 差异超过1%！"
														
 
															+                    f"这可能表明mask尺寸不正确或resize有问题。"
														
 
															+                )
														
 
															+            
														
 
															+            # 记录UNet缩放因子（仅用于调试，不参与坐标转换）
														
 
															+            if unet_w_scale is not None and unet_h_scale is not None:
														
 
															+                logger.debug(
														
 
															+                    f"  (调试信息) UNet预处理缩放因子: h_scale={unet_h_scale:.6f}, w_scale={unet_w_scale:.6f}, "
														
 
															+                    f"upscale/unet_scale = [{upscale/unet_h_scale:.3f}, {upscale/unet_w_scale:.3f}]"
														
 
															+                )
														
 
															         else:
														
 
															+            # 如果没有提供原图尺寸，回退到使用upscale
														
 
															             scale_h = upscale
														
 
															             scale_w = upscale
														
 
															-            logger.debug(
														
 
															-                f"连通域分析: mask尺寸=[{h}, {w}], upscale={upscale:.3f}, "
														
 
															-                f"预期原图尺寸≈[{h/upscale:.1f}, {w/upscale:.1f}] (使用 upscale，未提供原图尺寸)"
														
 
															+            logger.info(
														
 
															+                f"🔍 连通域坐标转换参数:\n"
														
 
															+                f"  - Mask尺寸: [{h}, {w}]\n"
														
 
															+                f"  - 使用upscale: {upscale:.3f}\n"
														
 
															+                f"  - 预期原图尺寸≈[{h/upscale:.1f}, {w/upscale:.1f}]"
														
 
															             )
														
 
															         bboxes = []
														
 
															         # 8. 过滤
														
 
															+        # 8. 过滤（增强版：添加贴边连通域过滤）
														
 
															+        # 由于裁剪时添加了 padding=10，表格真实边框应该距离图像边缘至少 10 像素
														
 
															+        # 因此，任何直接贴着图像边缘的连通域都是 padding 区域的背景噪声
														
 
															+        edge_threshold = 5  # 距离边缘小于5px视为"贴边"
														
 
															+
														
 
															         for i in range(1, num_labels):
														
 
															             x = stats[i, cv2.CC_STAT_LEFT]
														
 
															             y = stats[i, cv2.CC_STAT_TOP]
														
@@ -306,15 +356,28 @@ class GridRecovery:
 
															             h_cell = stats[i, cv2.CC_STAT_HEIGHT]
														
 
															             area = stats[i, cv2.CC_STAT_AREA]
														
 
															+            # 过滤1：整图大小的连通域（背景）
														
 
															             if w_cell > w * 0.98 and h_cell > h * 0.98:
														
 
															                 continue
														
 
															+            
														
 
															+            # 过滤2：面积过小的噪点
														
 
															             if area < 50:
														
 
															                 continue
														
 
															-                
														
 
															-            # 使用真实的缩放比例转换为原图坐标
														
 
															+            
														
 
															+            # 过滤3：贴边连通域（padding区域的背景噪声）
														
 
															+            # 判断连通域是否贴着图像边缘
														
 
															+            is_touching_edge = (
														
 
															+                x < edge_threshold or  # 左边缘
														
 
															+                y < edge_threshold or  # 上边缘
														
 
															+                (x + w_cell) > (w - edge_threshold) or  # 右边缘
														
 
															+                (y + h_cell) > (h - edge_threshold)     # 下边缘
														
 
															+            )
														
 
															+            if is_touching_edge:
														
 
															+                continue  # 过滤掉贴边的连通域
														
 
															+            
														
 
															+            # 过滤4：原图坐标下尺寸过小的单元格
														
 
															             cell_orig_h = h_cell / scale_h
														
 
															             cell_orig_w = w_cell / scale_w
														
 
															-            
														
 
															             if cell_orig_h < 4.0 or cell_orig_w < 4.0:
														
 
															                 continue
														
@@ -326,10 +389,111 @@ class GridRecovery:
 
															             ])
														
 
															         bboxes.sort(key=lambda b: (int(b[1] / 10), b[0]))
														
 
															+
														
 
															+        # 添加详细验证
														
 
															+        if len(bboxes) > 0:
														
 
															+            min_y = min(b[1] for b in bboxes)
														
 
															+            max_y = max(b[3] for b in bboxes)
														
 
															+            coverage_h = max_y - min_y
														
 
															+            expected_h = orig_h if orig_h else h / upscale
														
 
															+            
														
 
															+            logger.info(
														
 
															+                f"📏 单元格Y轴覆盖验证:\n"
														
 
															+                f"  - 最小Y: {min_y:.1f}\n"
														
 
															+                f"  - 最大Y: {max_y:.1f}\n"
														
 
															+                f"  - 覆盖高度: {coverage_h:.1f}\n"
														
 
															+                f"  - 原图高度: {expected_h:.1f}\n"
														
 
															+                f"  - 覆盖率: {coverage_h/expected_h*100:.1f}%\n"
														
 
															+                f"  - 顶部空白: {min_y:.1f}px ({min_y/expected_h*100:.1f}%)\n"
														
 
															+                f"  - 底部空白: {expected_h - max_y:.1f}px ({(expected_h-max_y)/expected_h*100:.1f}%)"
														
 
															+            )
														
 
															-        # 调试日志：输出样本 bbox 坐标信息
														
 
															+        # 可视化验证：保存调试图像，显示上采样mask上的连通域bbox和转换后的原图坐标
														
 
															+        if debug_dir and len(bboxes) > 0:
														
 
															+            try:
														
 
															+                os.makedirs(debug_dir, exist_ok=True)
														
 
															+                
														
 
															+                # 创建可视化图像：上采样mask上的连通域bbox（绿色）
														
 
															+                vis_mask = np.zeros((h, w, 3), dtype=np.uint8)
														
 
															+                vis_mask[:, :, 0] = inv_grid  # 背景用反转的grid
														
 
															+                vis_mask[:, :, 1] = inv_grid
														
 
															+                vis_mask[:, :, 2] = inv_grid
														
 
															+                
														
 
															+                # 在上采样mask上绘制连通域bbox（使用上采样坐标）
														
 
															+                for idx, bbox_orig in enumerate(bboxes[:min(20, len(bboxes))]):  # 只绘制前20个，避免太密集
														
 
															+                    # 反推上采样坐标
														
 
															+                    x_up = int(bbox_orig[0] * scale_w)
														
 
															+                    y_up = int(bbox_orig[1] * scale_h)
														
 
															+                    x2_up = int(bbox_orig[2] * scale_w)
														
 
															+                    y2_up = int(bbox_orig[3] * scale_h)
														
 
															+                    
														
 
															+                    # 确保坐标在范围内
														
 
															+                    x_up = max(0, min(x_up, w - 1))
														
 
															+                    y_up = max(0, min(y_up, h - 1))
														
 
															+                    x2_up = max(0, min(x2_up, w - 1))
														
 
															+                    y2_up = max(0, min(y2_up, h - 1))
														
 
															+                    
														
 
															+                    if x2_up > x_up and y2_up > y_up:
														
 
															+                        cv2.rectangle(vis_mask, (x_up, y_up), (x2_up, y2_up), (0, 255, 0), 2)
														
 
															+                        # 标注单元格索引
														
 
															+                        cv2.putText(vis_mask, str(idx), (x_up + 2, y_up + 15), 
														
 
															+                                   cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1)
														
 
															+                
														
 
															+                name = f"{debug_prefix}_coordinate_verification_mask.png" if debug_prefix else "coordinate_verification_mask.png"
														
 
															+                path = os.path.join(debug_dir, name)
														
 
															+                cv2.imwrite(path, vis_mask)
														
 
															+                logger.info(f"保存坐标验证图像（上采样mask）: {path}")
														
 
															+                
														
 
															+            except Exception as e:
														
 
															+                logger.warning(f"保存坐标验证图像失败: {e}")
														
 
															+        
														
 
															+        # 详细的坐标转换调试日志
														
 
															         if len(bboxes) > 0:
														
 
															-            logger.debug(f"样本 bbox (原图坐标): 前3个 = {bboxes[:3]}, 后3个 = {bboxes[-3:]}")
														
 
															+            logger.info(
														
 
															+                f"🔍 坐标转换验证:\n"
														
 
															+                f"  - mask尺寸: [{h}, {w}]\n"
														
 
															+                f"  - 原图尺寸: [{orig_h}, {orig_w}]\n"
														
 
															+                f"  - 缩放比例: scale_h={scale_h:.6f}, scale_w={scale_w:.6f}\n"
														
 
															+                f"  - 缩放比例差异: {abs(scale_h - scale_w):.6f}\n"
														
 
															+                f"  - 提取到 {len(bboxes)} 个单元格"
														
 
															+            )
														
 
															+            
														
 
															+            # 记录前几个和后几个单元格的详细坐标转换过程
														
 
															+            sample_indices = [0, 1, 2] + [len(bboxes) - 3, len(bboxes) - 2, len(bboxes) - 1]
														
 
															+            sample_indices = [i for i in sample_indices if 0 <= i < len(bboxes)]
														
 
															+            
														
 
															+            logger.info("🔍 样本单元格坐标转换详情:")
														
 
															+            for idx in sample_indices:
														
 
															+                bbox_orig = bboxes[idx]
														
 
															+                # 反推上采样坐标（用于验证）
														
 
															+                x_up = bbox_orig[0] * scale_w
														
 
															+                y_up = bbox_orig[1] * scale_h
														
 
															+                w_up = (bbox_orig[2] - bbox_orig[0]) * scale_w
														
 
															+                h_up = (bbox_orig[3] - bbox_orig[1]) * scale_h
														
 
															+                
														
 
															+                logger.info(
														
 
															+                    f"  单元格 {idx}: 原图坐标 [{bbox_orig[0]:.1f}, {bbox_orig[1]:.1f}, "
														
 
															+                    f"{bbox_orig[2]:.1f}, {bbox_orig[3]:.1f}] "
														
 
															+                    f"(尺寸: {bbox_orig[2]-bbox_orig[0]:.1f}x{bbox_orig[3]-bbox_orig[1]:.1f}) "
														
 
															+                    f"-> 反推上采样坐标 [{x_up:.1f}, {y_up:.1f}, {x_up+w_up:.1f}, {y_up+h_up:.1f}] "
														
 
															+                    f"(尺寸: {w_up:.1f}x{h_up:.1f})"
														
 
															+                )
														
 
															+            
														
 
															+            # 检查是否有系统性偏移
														
 
															+            if len(bboxes) >= 2:
														
 
															+                first_y = bboxes[0][1]
														
 
															+                last_y = bboxes[-1][3]
														
 
															+                expected_height = last_y - first_y
														
 
															+                actual_image_height = orig_h if orig_h else h / upscale
														
 
															+                logger.info(
														
 
															+                    f"🔍 系统性偏移检查:\n"
														
 
															+                    f"  - 第一个单元格y1: {first_y:.1f}\n"
														
 
															+                    f"  - 最后一个单元格y2: {last_y:.1f}\n"
														
 
															+                    f"  - 单元格覆盖高度: {expected_height:.1f}\n"
														
 
															+                    f"  - 原图实际高度: {actual_image_height:.1f}\n"
														
 
															+                    f"  - 高度差异: {abs(expected_height - actual_image_height):.1f}"
														
 
															+                )
														
 
															+            
														
 
															             logger.debug(f"bbox 坐标范围: x=[{min(b[0] for b in bboxes):.1f}, {max(b[2] for b in bboxes):.1f}], "
														
 
															                         f"y=[{min(b[1] for b in bboxes):.1f}, {max(b[3] for b in bboxes):.1f}]")
														
--- a/ocr_tools/universal_doc_parser/models/adapters/wired_table/text_filling.py
+++ b/ocr_tools/universal_doc_parser/models/adapters/wired_table/text_filling.py
@@ -7,6 +7,9 @@ from typing import List, Dict, Any, Tuple, Optional
 
															 import bisect
														
 
															 import cv2
														
 
															 import numpy as np
														
 
															+import os
														
 
															+import re
														
 
															+from pathlib import Path
														
 
															 from loguru import logger
														
 
															 from ocr_tools.universal_doc_parser.core.coordinate_utils import CoordinateUtils
														
@@ -25,7 +28,14 @@ class TextFiller:
 
															         """
														
 
															         self.ocr_engine = ocr_engine
														
 
															         self.cell_crop_margin: int = config.get("cell_crop_margin", 2)
														
 
															-        self.ocr_conf_threshold: float = config.get("ocr_conf_threshold", 0.5)
														
 
															+        self.ocr_conf_threshold: float = config.get("ocr_conf_threshold", 0.8)  # 单元格 OCR 置信度阈值
														
 
															+        
														
 
															+        # 跨单元格检测配置参数
														
 
															+        self.overlap_threshold_horizontal: float = config.get("overlap_threshold_horizontal", 0.2)
														
 
															+        self.overlap_threshold_vertical: float = config.get("overlap_threshold_vertical", 0.5)
														
 
															+        self.min_overlap_area: float = config.get("min_overlap_area", 50.0)
														
 
															+        self.center_cell_ratio: float = config.get("center_cell_ratio", 0.5)
														
 
															+        self.other_cell_max_ratio: float = config.get("other_cell_max_ratio", 0.3)
														
 
															     @staticmethod
														
 
															     def calculate_overlap_ratio(ocr_bbox: List[float], cell_bbox: List[float]) -> float:
														
@@ -61,6 +71,7 @@ class TextFiller:
 
															         return inter_area / ocr_area
														
 
															+    
														
 
															     def fill_text_by_center_point(
														
 
															         self,
														
 
															         bboxes: List[List[float]],
														
@@ -190,6 +201,32 @@ class TextFiller:
 
															                 matched_boxes_list[idx] = [box for _, _, _, _, _, box in matched]
														
 
															             else:
														
 
															                 scores[idx] = 0.0 # 无匹配文本，置信度为0
														
 
															+
														
 
															+        # 在外层统一检测 OCR box 是否跨多个单元格或过大（避免重复检测）
														
 
															+        processed_ocr_indices = set()  # 记录已处理的 OCR box 索引，避免重复检测
														
 
															+        for ocr_idx, ocr_item in enumerate(ocr_items):
														
 
															+            if ocr_idx in processed_ocr_indices:
														
 
															+                continue
														
 
															+            
														
 
															+            ocr_bbox = ocr_item["bbox"]
														
 
															+            center_point = (ocr_item["center_x"], ocr_item["center_y"])
														
 
															+            
														
 
															+            # 检测是否跨多个单元格(使用方向感知检测)
														
 
															+            overlapping_cells = self.detect_ocr_box_spanning_cells(
														
 
															+                ocr_bbox, 
														
 
															+                bboxes, 
														
 
															+                overlap_threshold=None,  # 使用配置的方向感知阈值
														
 
															+                center_point=center_point
														
 
															+            )
														
 
															+            
														
 
															+            if len(overlapping_cells) >= 2:
														
 
															+                # OCR box 跨多个单元格，标记所有相关单元格需要二次 OCR
														
 
															+                for cell_idx in overlapping_cells:
														
 
															+                    if cell_idx not in need_reocr_indices:
														
 
															+                        need_reocr_indices.append(cell_idx)
														
 
															+                logger.debug(f"检测到 OCR box 跨 {len(overlapping_cells)} 个单元格[{', '.join(map(str, overlapping_cells))}]: {ocr_item['text'][:20]}...")
														
 
															+                
														
 
															+                processed_ocr_indices.add(ocr_idx)
														
 
															         return texts, scores, matched_boxes_list, need_reocr_indices
														
@@ -221,50 +258,100 @@ class TextFiller:
 
															         y2 = max(c[3] for c in coords_list)
														
 
															         return [float(x1), float(y1), float(x2), float(y2)]
														
 
															-    @staticmethod
														
 
															     def detect_ocr_box_spanning_cells(
														
 
															+        self,
														
 
															         ocr_bbox: List[float],
														
 
															         cell_bboxes: List[List[float]],
														
 
															-        overlap_threshold: float = 0.3
														
 
															+        overlap_threshold: Optional[float] = None,
														
 
															+        center_point: Optional[Tuple[float, float]] = None
														
 
															     ) -> List[int]:
														
 
															         """
														
 
															-        检测 OCR box 是否跨多个单元格
														
 
															+        检测 OCR box 是否跨多个单元格(支持方向感知检测)
														
 
															         Args:
														
 
															             ocr_bbox: OCR box 坐标 [x1, y1, x2, y2]
														
 
															-            cell_bboxes: 单元格坐标列表
														
 
															-            overlap_threshold: 重叠比例阈值（OCR box 与单元格的重叠面积占 OCR box 面积的比例）
														
 
															+            cell_bboxes: 单元格坐标列表 [[x1, y1, x2, y2], ...]
														
 
															+            overlap_threshold: 统一重叠比例阈值(如果为 None,则使用方向感知阈值)
														
 
															+            center_point: OCR box 中心点坐标 (cx, cy),如果提供则用于判断中心点所在的单元格
														
 
															         Returns:
														
 
															-            与 OCR box 重叠的单元格索引列表
														
 
															+            重叠的单元格索引列表(如果 <= 1 个则表示没有跨单元格)
														
 
															         """
														
 
															         if not ocr_bbox or len(ocr_bbox) < 4:
														
 
															             return []
														
 
															-        overlapping_cells = []
														
 
															-        ocr_area = (ocr_bbox[2] - ocr_bbox[0]) * (ocr_bbox[3] - ocr_bbox[1])
														
 
															+        ocr_x1, ocr_y1, ocr_x2, ocr_y2 = ocr_bbox
														
 
															+        ocr_area = (ocr_x2 - ocr_x1) * (ocr_y2 - ocr_y1)
														
 
															+        ocr_width = ocr_x2 - ocr_x1
														
 
															+        ocr_height = ocr_y2 - ocr_y1
														
 
															         if ocr_area <= 0:
														
 
															             return []
														
 
															+        # 找到中心点所在的单元格索引(如果提供了中心点坐标)
														
 
															+        center_cell_idx = None
														
 
															+        if center_point is not None:
														
 
															+            cx, cy = center_point
														
 
															+            for idx, cell_bbox in enumerate(cell_bboxes):
														
 
															+                if not cell_bbox or len(cell_bbox) < 4:
														
 
															+                    continue
														
 
															+                cell_x1, cell_y1, cell_x2, cell_y2 = cell_bbox
														
 
															+                if cell_x1 <= cx <= cell_x2 and cell_y1 <= cy <= cell_y2:
														
 
															+                    center_cell_idx = idx
														
 
															+                    break
														
 
															+        
														
 
															+        cell_overlaps: List[Tuple[int, float]] = []
														
 
															+        
														
 
															         for idx, cell_bbox in enumerate(cell_bboxes):
														
 
															             if not cell_bbox or len(cell_bbox) < 4:
														
 
															                 continue
														
 
															+            cell_x1, cell_y1, cell_x2, cell_y2 = cell_bbox
														
 
															+            
														
 
															             # 计算交集
														
 
															-            inter_x1 = max(ocr_bbox[0], cell_bbox[0])
														
 
															-            inter_y1 = max(ocr_bbox[1], cell_bbox[1])
														
 
															-            inter_x2 = min(ocr_bbox[2], cell_bbox[2])
														
 
															-            inter_y2 = min(ocr_bbox[3], cell_bbox[3])
														
 
															+            inter_x1 = max(ocr_x1, cell_x1)
														
 
															+            inter_y1 = max(ocr_y1, cell_y1)
														
 
															+            inter_x2 = min(ocr_x2, cell_x2)
														
 
															+            inter_y2 = min(ocr_y2, cell_y2)
														
 
															             if inter_x2 > inter_x1 and inter_y2 > inter_y1:
														
 
															                 inter_area = (inter_x2 - inter_x1) * (inter_y2 - inter_y1)
														
 
															                 overlap_ratio = inter_area / ocr_area
														
 
															-                if overlap_ratio > overlap_threshold:
														
 
															-                    overlapping_cells.append(idx)
														
 
															+                # 方向感知检测
														
 
															+                if overlap_threshold is None:
														
 
															+                    # 计算水平和垂直重叠比例
														
 
															+                    h_overlap_ratio = (inter_x2 - inter_x1) / ocr_width if ocr_width > 0 else 0
														
 
															+                    v_overlap_ratio = (inter_y2 - inter_y1) / ocr_height if ocr_height > 0 else 0
														
 
															+                    
														
 
															+                    # 垂直方向使用更严格的阈值,水平方向使用较宽松的阈值
														
 
															+                    # 同时检查重叠面积是否超过最小阈值
														
 
															+                    is_overlapping = (
														
 
															+                        (h_overlap_ratio > self.overlap_threshold_horizontal and 
														
 
															+                         v_overlap_ratio > self.overlap_threshold_vertical) and
														
 
															+                        inter_area >= self.min_overlap_area
														
 
															+                    )
														
 
															+                else:
														
 
															+                    # 使用统一阈值
														
 
															+                    is_overlapping = overlap_ratio > overlap_threshold
														
 
															+                
														
 
															+                if is_overlapping:
														
 
															+                    cell_overlaps.append((idx, overlap_ratio))
														
 
															+        
														
 
															+        # 如果中心点在某个单元格内,且该单元格的重叠比例符合阈值,且没有其他单元格达到次要阈值,则不标记为跨单元格
														
 
															+        if center_cell_idx is not None and cell_overlaps:
														
 
															+            # 找到中心点所在单元格的重叠比例
														
 
															+            center_overlap = next((overlap for idx, overlap in cell_overlaps if idx == center_cell_idx), None)
														
 
															+            if center_overlap is not None and center_overlap >= self.center_cell_ratio:
														
 
															+                # 检查是否有其他单元格的重叠比例也超过次要阈值
														
 
															+                other_high_overlaps = [idx for idx, overlap in cell_overlaps 
														
 
															+                                      if idx != center_cell_idx and overlap >= self.other_cell_max_ratio]
														
 
															+                if not other_high_overlaps:
														
 
															+                    # 中心点所在单元格占主导,不应该标记为跨单元格
														
 
															+                    return []
														
 
															-        return overlapping_cells
														
 
															+        # 返回所有满足阈值的单元格索引
														
 
															+        return [idx for idx, _ in cell_overlaps]
														
 
															     def second_pass_ocr_fill(
														
 
															         self,
														
@@ -274,6 +361,7 @@ class TextFiller:
 
															         scores: Optional[List[float]] = None,
														
 
															         need_reocr_indices: Optional[List[int]] = None,
														
 
															         force_all: bool = False,
														
 
															+        output_dir: Optional[str] = None,
														
 
															     ) -> List[str]:
														
 
															         """
														
 
															         二次OCR统一封装：
														
@@ -282,6 +370,7 @@ class TextFiller:
 
															         - 对竖排单元格（高宽比大）进行旋转后识别
														
 
															         - 对 OCR 误合并的单元格进行重识别（OCR box 跨多个单元格或过大）
														
 
															         - [New] force_all=True: 强制对所有单元格进行裁剪识别 (Full-page OCR 作为 fallback)
														
 
															+        - [New] output_dir: 输出目录，如果提供则保存单元格OCR图片用于调试
														
 
															         Args:
														
 
															             table_image: 表格图像
														
@@ -290,6 +379,7 @@ class TextFiller:
 
															             scores: 当前置信度列表
														
 
															             need_reocr_indices: 需要二次 OCR 的单元格索引列表（OCR 误合并检测结果）
														
 
															             force_all: 是否强制对所有单元格进行 OCR (Default: False)
														
 
															+            output_dir: 输出目录，如果提供则保存单元格OCR图片到 {output_dir}/tablecell_ocr/ 目录
														
 
															         """
														
 
															         try:
														
 
															             if not self.ocr_engine:
														
@@ -303,6 +393,12 @@ class TextFiller:
 
															             if need_reocr_indices is None:
														
 
															                 need_reocr_indices = []
														
 
															+            # 如果提供了输出目录，创建 tablecell_ocr 子目录
														
 
															+            cell_ocr_dir = None
														
 
															+            if output_dir:
														
 
															+                cell_ocr_dir = os.path.join(output_dir, "tablecell_ocr")
														
 
															+                os.makedirs(cell_ocr_dir, exist_ok=True)
														
 
															+
														
 
															             h_img, w_img = table_image.shape[:2]
														
 
															             margin = self.cell_crop_margin
														
@@ -369,6 +465,7 @@ class TextFiller:
 
															                 if ch < 64 or cw < 64:
														
 
															                     cell_img = cv2.resize(cell_img, None, fx=2.0, fy=2.0, interpolation=cv2.INTER_CUBIC)
														
 
															                     ch, cw = cell_img.shape[:2]
														
 
															+                    logger.debug(f"单元格({texts[i] if i < len(texts) and len(texts[i]) else 'empty'}) {i} 裁剪后图像过小，放大至 {cw}x{ch} 像素")
														
 
															                 # 竖排文本旋转为横排
														
 
															                 if ch > cw * 2.0:
														
@@ -381,7 +478,7 @@ class TextFiller:
 
															                 return texts
														
 
															             logger.info(f"触发二次OCR: {len(crop_list)} 个单元格 (总数 {len(texts)})")
														
 
															-
														
 
															+            
														
 
															             # 先批量检测文本块，再批量识别（提高效率）
														
 
															             # Step 1: 批量检测
														
 
															             det_results = []
														
@@ -426,9 +523,6 @@ class TextFiller:
 
															                     if x2 > x1 and y2 > y1:
														
 
															                         cropped = cell_img[y1:y2, x1:x2]
														
 
															                         ch, cw = cropped.shape[:2]
														
 
															-                        # 小图放大
														
 
															-                        if ch < 64 or cw < 64:
														
 
															-                            cropped = cv2.resize(cropped, None, fx=2.0, fy=2.0, interpolation=cv2.INTER_CUBIC)
														
 
															                         if cropped.size > 0:
														
 
															                             rec_img_list.append(cropped)
														
 
															                             rec_indices.append((cell_idx, box_idx))
														
@@ -503,10 +597,42 @@ class TextFiller:
 
															             n = min(len(results) if isinstance(results, list) else 0, len(crop_list), len(crop_indices))
														
 
															             conf_th = self.ocr_conf_threshold
														
 
															+            # 辅助函数：清理文件名中的非法字符
														
 
															+            def sanitize_filename(text: str, max_length: int = 50) -> str:
														
 
															+                """清理文件名，移除非法字符并限制长度"""
														
 
															+                if not text:
														
 
															+                    return "empty"
														
 
															+                # 替换或删除文件名中的非法字符
														
 
															+                # Windows/Linux 文件名非法字符: / \ : * ? " < > |
														
 
															+                illegal_chars = r'[/\\:*?"<>|]'
														
 
															+                sanitized = re.sub(illegal_chars, '_', text)
														
 
															+                # 限制长度
														
 
															+                if len(sanitized) > max_length:
														
 
															+                    sanitized = sanitized[:max_length]
														
 
															+                # 移除首尾空格和下划线
														
 
															+                sanitized = sanitized.strip('_').strip()
														
 
															+                return sanitized if sanitized else "empty"
														
 
															+
														
 
															             for k in range(n):
														
 
															                 text_k, score_k = _parse_item(results[k])
														
 
															+                cell_idx = crop_indices[k]
														
 
															+                cell_img = crop_list[k]
														
 
															+                
														
 
															+                # 保存单元格OCR图片用于调试
														
 
															+                if cell_ocr_dir and cell_img is not None:
														
 
															+                    try:
														
 
															+                        # 生成文件名：序号_识别内容
														
 
															+                        sanitized_text = sanitize_filename(text_k)
														
 
															+                        filename = f"{cell_idx:03d}_{sanitized_text}.png"
														
 
															+                        filepath = os.path.join(cell_ocr_dir, filename)
														
 
															+                        cv2.imwrite(filepath, cell_img)
														
 
															+                    except Exception as e:
														
 
															+                        logger.warning(f"保存单元格OCR图片失败 (cell {cell_idx}): {e}")
														
 
															+                
														
 
															                 if text_k and score_k >= conf_th:
														
 
															-                    texts[crop_indices[k]] = text_k
														
 
															+                    texts[cell_idx] = text_k
														
 
															+                elif text_k:
														
 
															+                    logger.debug(f"单元格 {cell_idx} 二次OCR结果置信度({score_k:.2f})低于阈值({conf_th}): (文本: '{text_k[:30]}...')")
														
 
															         except Exception as e:
														
 
															             logger.warning(f"二次OCR失败: {e}")
Autors	SHA1 Ziņojums	Datums
zhch158_admin	64652051e4 fix: 更新示例输入输出路径，修正注释以提高代码可读性	1 dienu atpakaļ
zhch158_admin	ca720abd31 fix: 增强UNet预处理的缩放因子验证，优化预测结果的尺寸一致性检查，记录详细的调试信息以确保坐标转换的准确性	1 dienu atpakaļ
zhch158_admin	1fbcf06f4a fix: 增强文本填充器的OCR检测能力，支持跨单元格检测和输出调试图像，优化重叠检测逻辑	1 dienu atpakaļ
zhch158_admin	bb0acb2afc fix: 增强网格结构恢复中的坐标转换精度，添加调试信息以验证缩放比例和单元格覆盖情况	1 dienu atpakaļ
zhch158_admin	3cf3aa5085 fix: 调整表格处理中的padding策略，优化边缘保护与噪声控制的平衡	1 dienu atpakaļ
zhch158_admin	2f5c74136e fix: 优化 crop_region 方法中的代码格式，提升可读性	1 dienu atpakaļ
zhch158_admin	0102386803 fix: Update OCR confidence threshold in bank_statement_wired_unet.yaml to improve cell recognition accuracy	1 dienu atpakaļ