|
@@ -0,0 +1,38 @@
|
|
|
|
|
+"""PaddleOCR-VL批量处理程序(简化版)"""
|
|
|
|
|
+from ppstructurev3_single_process import main as unified_main
|
|
|
|
|
+import sys
|
|
|
|
|
+import os
|
|
|
|
|
+import time
|
|
|
|
|
+
|
|
|
|
|
+# 直接复用统一程序的主函数
|
|
|
|
|
+if __name__ == "__main__":
|
|
|
|
|
+ print(f"🚀 启动统一PDF/图像处理程序...")
|
|
|
|
|
+ print(f"🔧 CUDA_VISIBLE_DEVICES: {os.environ.get('CUDA_VISIBLE_DEVICES', 'Not set')}")
|
|
|
|
|
+
|
|
|
|
|
+ if len(sys.argv) == 1:
|
|
|
|
|
+ # 如果没有命令行参数,使用默认配置运行
|
|
|
|
|
+ print("ℹ️ No command line arguments provided. Running with default configuration...")
|
|
|
|
|
+
|
|
|
|
|
+ # 默认配置
|
|
|
|
|
+ default_config = {
|
|
|
|
|
+ "input_file": "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照.pdf",
|
|
|
|
|
+ # "input_file": "/home/ubuntu/zhch/data/至远彩色印刷工业有限公司/PaddleOCR_VL_Results/2023年度报告母公司/2023年度报告母公司_page_027.png",
|
|
|
|
|
+ "output_dir": "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/PaddleOCR_VL_Results",
|
|
|
|
|
+ "collect_results": f"/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照/PaddleOCR_VL_Results/processed_files_{time.strftime('%Y%m%d_%H%M%S')}.csv",
|
|
|
|
|
+ "pipeline": "./my_config/PaddleOCR-VL-Client.yaml",
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ # 构造参数
|
|
|
|
|
+ sys.argv = [sys.argv[0]]
|
|
|
|
|
+ for key, value in default_config.items():
|
|
|
|
|
+ sys.argv.extend([f"--{key}", str(value)])
|
|
|
|
|
+ # 调用PaddleOCR-VL单进程批量处理程序, 不用适配器
|
|
|
|
|
+ sys.argv.append("--no-adapter")
|
|
|
|
|
+
|
|
|
|
|
+ # 可以添加禁用标准化选项
|
|
|
|
|
+ # sys.argv.append("--no-normalize")
|
|
|
|
|
+
|
|
|
|
|
+ # 测试模式
|
|
|
|
|
+ # sys.argv.append("--test_mode")
|
|
|
|
|
+
|
|
|
|
|
+ sys.exit(unified_main())
|