|
|
@@ -387,8 +387,23 @@ def enhanced_predict_single_table_recognition_res(
|
|
|
|
|
|
# 🎯 Step 4: **关键改进** - OCR引导的单元格补全
|
|
|
if (use_wired_table_cells_trans_to_html or use_wireless_table_cells_trans_to_html) and use_ocr_results_with_table_cells:
|
|
|
+ # ✅ 修复: 确保 general_ocr_pipeline 被初始化
|
|
|
+ if self.general_ocr_pipeline is None:
|
|
|
+ if hasattr(self, 'general_ocr_config_bak') and self.general_ocr_config_bak is not None:
|
|
|
+ print("🔧 [Adapter] Initializing general_ocr_pipeline from backup config")
|
|
|
+ self.general_ocr_pipeline = self.create_pipeline(self.general_ocr_config_bak)
|
|
|
+ else:
|
|
|
+ print("⚠️ [Adapter] No OCR pipeline available, falling back to original implementation")
|
|
|
+ return _original_predict_single(
|
|
|
+ self, image_array, overall_ocr_res, table_box,
|
|
|
+ use_e2e_wired_table_rec_model, use_e2e_wireless_table_rec_model,
|
|
|
+ use_wired_table_cells_trans_to_html, use_wireless_table_cells_trans_to_html,
|
|
|
+ use_ocr_results_with_table_cells, flag_find_nei_text
|
|
|
+ )
|
|
|
+
|
|
|
# ✅ 对每个单元格做OCR(使用裁剪前的坐标)
|
|
|
cells_texts_list = self.gen_ocr_with_table_cells(image_array, table_cells_result)
|
|
|
+
|
|
|
# ✅ 补全缺失的单元格
|
|
|
completed_cells, cells_texts_list = infer_missing_cells_from_ocr(
|
|
|
detected_cells=table_cells_result_orig,
|
|
|
@@ -401,7 +416,7 @@ def enhanced_predict_single_table_recognition_res(
|
|
|
# ✅ 生成HTML骨架(使用转换后的原图坐标)
|
|
|
html_skeleton = build_robust_html_from_cells(completed_cells)
|
|
|
|
|
|
- # ✅ 填充内容(使用单元格中心点坐标和单元格OCR文本)
|
|
|
+ # ✅ 填充内容(使用单元格bbox和单元格OCR文本)
|
|
|
pred_html = fill_html_with_ocr_by_bbox(
|
|
|
html_skeleton,
|
|
|
completed_cells, # ✅ 单元格bbox
|
|
|
@@ -418,9 +433,7 @@ def enhanced_predict_single_table_recognition_res(
|
|
|
res["neighbor_texts"] = ""
|
|
|
return res
|
|
|
else:
|
|
|
- # 回退到原始实现
|
|
|
- print(f"Fallback to original implementation: {table_cls_result}: use_wired_table_cells_trans_to_html={use_wired_table_cells_trans_to_html}, use_wireless_table_cells_trans_to_html={use_wireless_table_cells_trans_to_html}, use_ocr_results_with_table_cells={use_ocr_results_with_table_cells}")
|
|
|
-
|
|
|
+ print(f"⚠️ Fallback to original implementation: {table_cls_result}")
|
|
|
return _original_predict_single(
|
|
|
self, image_array, overall_ocr_res, table_box,
|
|
|
use_e2e_wired_table_rec_model, use_e2e_wireless_table_rec_model,
|