@workspace 如何针对我特定的应用,采用动态适配器的方式实现个性化的get_table_recognition_res,而不改动原有代码实现
正在收集工作区信息正在筛选到最相关的信息针对你的特定应用需求,可以通过动态适配器模式来实现个性化的 get_table_recognition_res,而不修改原有代码。以下是几种实现方案:
创建一个适配器模块,在运行时替换原函数:
"""表格识别个性化适配器"""
import importlib
from typing import Any, Dict, List
from pathlib import Path
# 保存原始函数的引用
_original_get_table_recognition_res = None

def enhanced_get_table_recognition_res(
table_box: list,
table_structure_result: list,
table_cells_result: list,
overall_ocr_res: Any,
table_ocr_pred: dict,
cells_texts_list: list,
use_table_cells_ocr_results: bool,
use_table_cells_split_ocr: bool,
) -> Any:
"""
个性化的表格识别结果处理函数
主要改进:
1. 使用 BeautifulSoup 生成表格结构
2. 保留单元格内折行文本
3. 自动检测并修复列数不匹配问题
"""
try:
from bs4 import BeautifulSoup
except ImportError:
print("⚠️ BeautifulSoup not available, falling back to original method")
return _original_get_table_recognition_res(
table_box, table_structure_result, table_cells_result,
overall_ocr_res, table_ocr_pred, cells_texts_list,
use_table_cells_ocr_results, use_table_cells_split_ocr
)
# 导入必要的工具函数
from paddlex.inference.pipelines.table_recognition.table_recognition_post_processing_v2 import (
convert_to_four_point_coordinates,
convert_table_structure_pred_bbox,
sort_table_cells_boxes,
find_row_start_index,
map_and_get_max,
match_table_and_ocr,
SingleTableRecognitionResult
)
from paddlex.inference.pipelines.layout_parsing.utils import get_sub_regions_ocr_res
import numpy as np
# 基础处理逻辑(复用原有代码)
table_cells_result = convert_to_four_point_coordinates(table_cells_result)
table_box = np.array([table_box])
if not (use_table_cells_ocr_results == True and use_table_cells_split_ocr == True):
table_ocr_pred = get_sub_regions_ocr_res(overall_ocr_res, table_box)
crop_start_point = [table_box[0][0], table_box[0][1]]
img_shape = overall_ocr_res["doc_preprocessor_res"]["output_img"].shape[0:2]
# 空表格处理
if len(table_cells_result) == 0 or len(table_ocr_pred["rec_boxes"]) == 0:
pred_html = " ".join(table_structure_result)
if len(table_cells_result) != 0:
table_cells_result = convert_table_structure_pred_bbox(
table_cells_result, crop_start_point, img_shape
)
single_img_res = {
"cell_box_list": table_cells_result,
"table_ocr_pred": table_ocr_pred,
"pred_html": pred_html,
}
return SingleTableRecognitionResult(single_img_res)
# 转换坐标
table_cells_result = convert_table_structure_pred_bbox(
table_cells_result, crop_start_point, img_shape
)
# 选择OCR结果源
if use_table_cells_ocr_results == True and use_table_cells_split_ocr == False:
ocr_dt_boxes = table_cells_result
ocr_texts_res = cells_texts_list
else:
ocr_dt_boxes = table_ocr_pred["rec_boxes"]
ocr_texts_res = table_ocr_pred["rec_texts"]
# 排序和标记
table_cells_result, table_cells_flag = sort_table_cells_boxes(table_cells_result)
row_start_index = find_row_start_index(table_structure_result)
table_cells_flag = map_and_get_max(table_cells_flag, row_start_index)
table_cells_flag.append(len(table_cells_result))
row_start_index.append(len(table_cells_result))
# OCR匹配
matched_index = match_table_and_ocr(
table_cells_result, ocr_dt_boxes, table_cells_flag, table_cells_flag
)
# 🎯 关键改进:使用 BeautifulSoup 生成表格
pred_html = generate_table_with_beautifulsoup_enhanced(
table_cells_result, ocr_texts_res, matched_index, table_cells_flag
)
single_img_res = {
"cell_box_list": table_cells_result,
"table_ocr_pred": table_ocr_pred,
"pred_html": pred_html,
}
return SingleTableRecognitionResult(single_img_res)
def generate_table_with_beautifulsoup_enhanced(
table_cells_result: list,
ocr_texts_res: list,
matched_index: dict,
table_cells_flag: list
) -> str:
"""
使用 BeautifulSoup 生成增强版表格
特色功能:
1. 自动处理单元格内折行文本
2. 智能合并多段OCR结果
3. 检测并修复表格结构不一致问题
"""
try:
from bs4 import BeautifulSoup
except ImportError:
return "<html><body><table><tr><td>BeautifulSoup not available</td></tr></table></body></html>"
# 计算表格维度
num_rows = len(table_cells_flag) - 1
if num_rows <= 0:
return "<html><body><table></table></body></html>"
# 创建表格结构
soup = BeautifulSoup("<html><body><table></table></body></html>", 'html.parser')
table = soup.find('table')
# 为每行添加单元格
for row_idx in range(num_rows):
tr = soup.new_tag('tr')
table.append(tr)
# 当前行的单元格数量
num_cols = table_cells_flag[row_idx + 1] - table_cells_flag[row_idx]
for col_idx in range(num_cols):
td = soup.new_tag('td')
tr.append(td)
# 🎯 增强的内容填充逻辑
if row_idx < len(matched_index) and col_idx in matched_index[row_idx]:
content_parts = []
for ocr_idx in matched_index[row_idx][col_idx]:
if ocr_idx < len(ocr_texts_res):
content = ocr_texts_res[ocr_idx].strip()
if content:
content_parts.append(content)
# 🎯 智能合并策略
if content_parts:
# 检测是否为折行文本(如"扫二维码付"和"款")
merged_content = smart_merge_cell_content(content_parts)
td.string = merged_content
return str(soup)
def smart_merge_cell_content(content_parts: List[str]) -> str:
"""
智能合并单元格内容
特殊处理:
1. 检测折行文本模式
2. 保留换行结构
3. 去除冗余空格
"""
if len(content_parts) == 1:
return content_parts[0]
# 检测折行模式(短词组合)
if len(content_parts) == 2 and all(len(part) <= 6 for part in content_parts):
# 如 ["扫二维码付", "款"] -> "扫二维码付款"
return "".join(content_parts)
# 默认用空格连接
return " ".join(content_parts)
def apply_table_recognition_adapter():
"""应用表格识别适配器"""
global _original_get_table_recognition_res
try:
# 导入目标模块
module = importlib.import_module(
"paddlex.inference.pipelines.table_recognition.table_recognition_post_processing_v2"
)
# 保存原函数
_original_get_table_recognition_res = module.get_table_recognition_res
# 替换为增强版
module.get_table_recognition_res = enhanced_get_table_recognition_res
print("✅ Table recognition adapter applied successfully")
return True
except Exception as e:
print(f"❌ Failed to apply table recognition adapter: {e}")
return False
def restore_original_function():
"""恢复原始函数"""
global _original_get_table_recognition_res
if _original_get_table_recognition_res is None:
return False
try:
module = importlib.import_module(
"paddlex.inference.pipelines.table_recognition.table_recognition_post_processing_v2"
)
module.get_table_recognition_res = _original_get_table_recognition_res
print("✅ Original function restored")
return True
except Exception as e:
print(f"❌ Failed to restore original function: {e}")
return False
修改你的 table_recognition_v2_single_process.py 来使用适配器:
"""仅运行 table_recognition_v2 管线,并将表格HTML转为Markdown保存"""
# ...existing imports...
# 🎯 新增:导入适配器
from adapters.table_recognition_adapter import apply_table_recognition_adapter, restore_original_function
def process_images_with_table_pipeline(
image_paths: List[str],
pipeline_cfg: str = "./my_config/table_recognition_v2.yaml",
device: str = "gpu:0",
output_dir: str = "./output",
normalize_numbers: bool = True,
use_enhanced_adapter: bool = True # 🎯 新增参数
) -> List[Dict[str, Any]]:
"""
运行 table_recognition_v2 管线,输出 JSON、可视化图,且将每个表格HTML转为Markdown保存。
"""
output_path = Path(output_dir).resolve()
output_path.mkdir(parents=True, exist_ok=True)
# 🎯 应用适配器
adapter_applied = False
if use_enhanced_adapter:
adapter_applied = apply_table_recognition_adapter()
if adapter_applied:
print("🎯 Enhanced table recognition adapter activated")
else:
print("⚠️ Failed to apply adapter, using original implementation")
print(f"Initializing pipeline '{pipeline_cfg}' on device '{device}'...")
try:
os.environ['PYTHONWARNINGS'] = 'ignore::UserWarning'
pipeline = create_pipeline(pipeline_cfg, device=device)
print(f"Pipeline initialized successfully on {device}")
except Exception as e:
print(f"Failed to initialize pipeline: {e}", file=sys.stderr)
if adapter_applied:
restore_original_function()
return []
try:
# ...existing processing logic...
results_all: List[Dict[str, Any]] = []
total = len(image_paths)
print(f"Processing {total} images with table_recognition_v2")
print(f"🔧 数字标准化: {'启用' if normalize_numbers else '禁用'}")
print(f"🎯 增强适配器: {'启用' if adapter_applied else '禁用'}")
with tqdm(total=total, desc="Processing images", unit="img",
bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]') as pbar:
for img_path in image_paths:
start = time.time()
try:
outputs = pipeline.predict(
img_path,
use_doc_orientation_classify=True,
use_doc_unwarping=False,
use_layout_detection=True,
use_ocr_results_with_table_cells=True,
use_table_orientation_classify=True,
use_wired_table_cells_trans_to_html=True,
# 🎯 注意:适配器模式下不需要这个参数
# use_table_cells_split_ocr=False,
)
# ...existing result processing...
except Exception as e:
# ...existing error handling...
pass
return results_all
finally:
# 🎯 清理:恢复原始函数
if adapter_applied:
restore_original_function()
print("🔄 Original function restored")
def main():
parser = argparse.ArgumentParser(description="table_recognition_v2 单管线运行(输出Markdown表格)")
# ...existing arguments...
parser.add_argument("--no-adapter", action="store_true", help="禁用增强适配器")
args = parser.parse_args()
use_enhanced_adapter = not args.no_adapter
# ...existing input processing...
results = process_images_with_table_pipeline(
input_files,
args.pipeline,
args.device,
args.output_dir,
normalize_numbers=normalize_numbers,
use_enhanced_adapter=use_enhanced_adapter # 🎯 传递参数
)
# ...existing result processing...
创建一个更灵活的配置驱动适配器:
"""配置驱动的表格识别适配器"""
import yaml
from typing import Dict, Any, Callable
from pathlib import Path
class TableRecognitionConfig:
"""表格识别配置"""
def __init__(self, config_path: str = None):
self.config = self.load_config(config_path)
def load_config(self, config_path: str = None) -> Dict[str, Any]:
"""加载配置"""
default_config = {
"cell_content_merge": {
"strategy": "smart", # smart, simple, preserve_newlines
"max_short_length": 6,
"join_separator": ""
},
"table_generation": {
"method": "beautifulsoup", # beautifulsoup, original
"fallback_to_original": True
},
"structure_repair": {
"auto_fix_column_mismatch": True,
"use_cells_skeleton_fallback": True
}
}
if config_path and Path(config_path).exists():
with open(config_path, 'r', encoding='utf-8') as f:
custom_config = yaml.safe_load(f)
default_config.update(custom_config)
return default_config
class AdaptiveTableRecognition:
"""自适应表格识别处理器"""
def __init__(self, config: TableRecognitionConfig):
self.config = config
self._original_function = None
def enhance_get_table_recognition_res(self, original_func: Callable) -> Callable:
"""增强原函数"""
self._original_function = original_func
def enhanced_wrapper(*args, **kwargs):
return self._process_with_config(*args, **kwargs)
return enhanced_wrapper
def _process_with_config(self, *args, **kwargs):
"""基于配置的处理逻辑"""
method = self.config.config["table_generation"]["method"]
if method == "beautifulsoup":
try:
return self._beautifulsoup_processing(*args, **kwargs)
except Exception as e:
if self.config.config["table_generation"]["fallback_to_original"]:
print(f"⚠️ Beautifulsoup processing failed, falling back: {e}")
return self._original_function(*args, **kwargs)
else:
raise
else:
return self._original_function(*args, **kwargs)
def _beautifulsoup_processing(self, *args, **kwargs):
"""BeautifulSoup处理实现"""
# 实现你的增强逻辑
pass
# 使用示例
def apply_config_driven_adapter(config_path: str = None):
"""应用配置驱动适配器"""
config = TableRecognitionConfig(config_path)
adapter = AdaptiveTableRecognition(config)
try:
module = importlib.import_module(
"paddlex.inference.pipelines.table_recognition.table_recognition_post_processing_v2"
)
original_func = module.get_table_recognition_res
enhanced_func = adapter.enhance_get_table_recognition_res(original_func)
module.get_table_recognition_res = enhanced_func
return True
except Exception as e:
print(f"Failed to apply config-driven adapter: {e}")
return False
"""装饰器适配器"""
from functools import wraps
import importlib
def table_recognition_enhancer(func):
"""表格识别增强装饰器"""
@wraps(func)
def wrapper(*args, **kwargs):
# 解析参数
use_table_cells_split_ocr = kwargs.get('use_table_cells_split_ocr', True)
# 🎯 强制设为False以保留折行文本
if use_table_cells_split_ocr:
kwargs['use_table_cells_split_ocr'] = False
print("🎯 Auto-disabled cell splitting to preserve multi-line text")
# 调用原函数
result = func(*args, **kwargs)
# 🎯 后处理增强
if hasattr(result, 'pred_html'):
result['pred_html'] = post_process_html(result['pred_html'])
return result
return wrapper
def post_process_html(html: str) -> str:
"""后处理HTML"""
# 实现你的HTML后处理逻辑
return html
def apply_decorator_adapter():
"""应用装饰器适配器"""
try:
module = importlib.import_module(
"paddlex.inference.pipelines.table_recognition.table_recognition_post_processing_v2"
)
# 保存并替换
original_func = module.get_table_recognition_res
module.get_table_recognition_res = table_recognition_enhancer(original_func)
return True
except Exception as e:
print(f"Failed to apply decorator adapter: {e}")
return False
推荐使用方案1(猴子补丁适配器),因为:
使用时只需:
# 在你的脚本开头
from adapters.table_recognition_adapter import apply_table_recognition_adapter
# 应用适配器
apply_table_recognition_adapter()
# 正常使用pipeline
pipeline = create_pipeline("table_recognition_v2")
results = pipeline.predict(image_path)
这样既保持了原代码的完整性,又能实现你的个性化需求。
正在收集工作区信息正在筛选到最相关的信息你的判断完全正确:
from .table_recognition_post_processing_v2 import get_table_recognition_res
这种直接 import 到局部作用域的写法,会导致你在 module 层做 monkey patch(即importlib.import_module(...).get_table_recognition_res = ...)时,pipeline_v2.py 内部的 get_table_recognition_res 变量不会被替换,因为它已经在 import 时被绑定为本地函数对象。
你可以在 monkey patch 时同时 patch paddlex.inference.pipelines.table_recognition.pipeline_v2 的 get_table_recognition_res:
import paddlex.inference.pipelines.table_recognition.pipeline_v2 as pipeline_v2
from zhch.adapters.table_recognition_adapter import enhanced_get_table_recognition_res
pipeline_v2.get_table_recognition_res = enhanced_get_table_recognition_res
这样,_TableRecognitionPipelineV2 里的所有 get_table_recognition_res(...) 调用都会用你的增强实现。
如果你可以修改源码,建议把
from .table_recognition_post_processing_v2 import get_table_recognition_res
改为
import paddlex.inference.pipelines.table_recognition.table_recognition_post_processing_v2 as postproc_v2
然后用
postproc_v2.get_table_recognition_res(...)
这样你只需要 monkey patch 一处即可,所有引用都能生效。
推荐在适配器里加一行:
import paddlex.inference.pipelines.table_recognition.pipeline_v2 as pipeline_v2
pipeline_v2.get_table_recognition_res = enhanced_get_table_recognition_res
这样你的适配器就能100%生效,无需改动原有 PaddleX 代码。