|
|
@@ -35,8 +35,12 @@ from ppstructurev3_utils import (
|
|
|
)
|
|
|
|
|
|
# 🎯 新增:导入适配器
|
|
|
-from adapters import apply_table_recognition_adapter, restore_original_function
|
|
|
-
|
|
|
+from adapters import (
|
|
|
+ apply_table_recognition_adapter,
|
|
|
+ restore_original_function,
|
|
|
+ apply_enhanced_doc_preprocessor,
|
|
|
+ restore_paddlex_doc_preprocessor
|
|
|
+)
|
|
|
|
|
|
def process_images_unified(image_paths: List[str],
|
|
|
pipeline_name: str = "PP-StructureV3",
|
|
|
@@ -55,9 +59,9 @@ def process_images_unified(image_paths: List[str],
|
|
|
# 🎯 应用适配器
|
|
|
adapter_applied = False
|
|
|
if use_enhanced_adapter:
|
|
|
- adapter_applied = apply_table_recognition_adapter()
|
|
|
+ adapter_applied = apply_table_recognition_adapter() and apply_enhanced_doc_preprocessor()
|
|
|
if adapter_applied:
|
|
|
- print("🎯 Enhanced table recognition adapter activated")
|
|
|
+ print("🎯 Enhanced table recognition adapter activated and document preprocessor applied")
|
|
|
else:
|
|
|
print("⚠️ Failed to apply adapter, using original implementation")
|
|
|
|
|
|
@@ -76,6 +80,7 @@ def process_images_unified(image_paths: List[str],
|
|
|
traceback.print_exc()
|
|
|
if adapter_applied:
|
|
|
restore_original_function()
|
|
|
+ restore_paddlex_doc_preprocessor()
|
|
|
return []
|
|
|
|
|
|
try:
|
|
|
@@ -111,7 +116,7 @@ def process_images_unified(image_paths: List[str],
|
|
|
# PP-StructureV3 使用下划线命名
|
|
|
predict_kwargs = {
|
|
|
'input': img_path,
|
|
|
- 'use_doc_orientation_classify': kwargs.get('use_doc_orientation', False), # 流水分析场景关闭方向分类
|
|
|
+ 'use_doc_orientation_classify': kwargs.get('use_doc_orientation', True), # 流水分析场景关闭方向分类
|
|
|
'use_doc_unwarping': kwargs.get('use_doc_unwarping', False),
|
|
|
'use_layout_detection': kwargs.get('use_layout_detection', True),
|
|
|
'use_seal_recognition': kwargs.get('use_seal_recognition', True),
|
|
|
@@ -221,6 +226,7 @@ def process_images_unified(image_paths: List[str],
|
|
|
# 🎯 清理:恢复原始函数
|
|
|
if adapter_applied:
|
|
|
restore_original_function()
|
|
|
+ restore_paddlex_doc_preprocessor()
|
|
|
print("🔄 Original function restored")
|
|
|
|
|
|
|
|
|
@@ -367,14 +373,18 @@ if __name__ == "__main__":
|
|
|
# 默认配置
|
|
|
default_config = {
|
|
|
# "input_file": "/home/ubuntu/zhch/data/至远彩色印刷工业有限公司/2023年度报告母公司.pdf",
|
|
|
- "input_file": "/home/ubuntu/zhch/data/至远彩色印刷工业有限公司/PPStructureV3_Results/2023年度报告母公司/2023年度报告母公司_page_027.png",
|
|
|
- "output_dir": "/home/ubuntu/zhch/data/至远彩色印刷工业有限公司/PPStructureV3_Results",
|
|
|
- "collect_results": f"/home/ubuntu/zhch/data/至远彩色印刷工业有限公司/PPStructureV3_Results/processed_files_{time.strftime('%Y%m%d_%H%M%S')}.csv",
|
|
|
+ # "input_file": "/home/ubuntu/zhch/data/至远彩色印刷工业有限公司/PPStructureV3_Results/2023年度报告母公司/2023年度报告母公司_page_003.png",
|
|
|
+ # "output_dir": "/home/ubuntu/zhch/data/至远彩色印刷工业有限公司/PPStructureV3_Results",
|
|
|
+ # "collect_results": f"/home/ubuntu/zhch/data/至远彩色印刷工业有限公司/PPStructureV3_Results/processed_files_{time.strftime('%Y%m%d_%H%M%S')}.csv",
|
|
|
+ "input_file": "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/2023年度报告母公司.pdf",
|
|
|
+ # "input_file": "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_PPStructureV3_Results/2023年度报告母公司/2023年度报告母公司_page_003.png",
|
|
|
+ "output_dir": "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_PPStructureV3_Results",
|
|
|
+ "collect_results": f"/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_PPStructureV3_Results/processed_files_{time.strftime('%Y%m%d_%H%M%S')}.csv",
|
|
|
# "input_dir": "../../OmniDocBench/OpenDataLab___OmniDocBench/images",
|
|
|
# "output_dir": "./OmniDocBench_PPStructureV3_Results",
|
|
|
# "collect_results": f"./OmniDocBench_PPStructureV3_Results/processed_files_{time.strftime('%Y%m%d_%H%M%S')}.csv",
|
|
|
- "pipeline": "./my_config/PP-StructureV3.yaml",
|
|
|
- "device": "gpu",
|
|
|
+ "pipeline": "./my_config/PP-StructureV3-zhch.yaml",
|
|
|
+ "device": "cpu",
|
|
|
}
|
|
|
|
|
|
# 构造参数
|