|
@@ -0,0 +1,85 @@
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+# PDF 批量处理器配置文件
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+
|
|
|
|
|
+# 处理器定义
|
|
|
|
|
+processors:
|
|
|
|
|
+ # -------------------------------------------------------------------------
|
|
|
|
|
+ # PaddleOCR-VL 处理器
|
|
|
|
|
+ # 用于视觉语言模型的 OCR 处理
|
|
|
|
|
+ # -------------------------------------------------------------------------
|
|
|
|
|
+ paddleocr_vl_single_process:
|
|
|
|
|
+ script: "paddleocr_vl_single_process.py"
|
|
|
|
|
+ input_arg: "--input_file"
|
|
|
|
|
+ output_arg: "--output_dir"
|
|
|
|
|
+ extra_args:
|
|
|
|
|
+ - "--pipeline=./my_config/PaddleOCR-VL-Client-RT-DETR-H_layout_17cls.yaml"
|
|
|
|
|
+ - "--no-adapter"
|
|
|
|
|
+ output_subdir: "paddleocr_vl_results"
|
|
|
|
|
+ description: "PaddleOCR-VL 处理器 - 视觉语言模型OCR"
|
|
|
|
|
+
|
|
|
|
|
+ # -------------------------------------------------------------------------
|
|
|
|
|
+ # PP-StructureV3 本地处理器
|
|
|
|
|
+ # 用于文档结构化分析(本地GPU/CPU处理)
|
|
|
|
|
+ # -------------------------------------------------------------------------
|
|
|
|
|
+ ppstructurev3_single_process:
|
|
|
|
|
+ script: "ppstructurev3_single_process.py"
|
|
|
|
|
+ input_arg: "--input_file"
|
|
|
|
|
+ output_arg: "--output_dir"
|
|
|
|
|
+ extra_args:
|
|
|
|
|
+ - "--pipeline=./my_config/PP-StructureV3.yaml"
|
|
|
|
|
+ output_subdir: "ppstructurev3_results"
|
|
|
|
|
+ description: "PP-StructureV3 处理器 - 本地处理"
|
|
|
|
|
+
|
|
|
|
|
+ # -------------------------------------------------------------------------
|
|
|
|
|
+ # PP-StructureV3 GPU 处理器
|
|
|
|
|
+ # 明确使用 GPU 加速
|
|
|
|
|
+ # -------------------------------------------------------------------------
|
|
|
|
|
+ ppstructurev3_gpu:
|
|
|
|
|
+ script: "ppstructurev3_single_process.py"
|
|
|
|
|
+ input_arg: "--input_file"
|
|
|
|
|
+ output_arg: "--output_dir"
|
|
|
|
|
+ extra_args:
|
|
|
|
|
+ - "--pipeline=./my_config/PP-StructureV3.yaml"
|
|
|
|
|
+ - "--device=gpu"
|
|
|
|
|
+ output_subdir: "ppstructurev3_gpu_results"
|
|
|
|
|
+ description: "PP-StructureV3 处理器 - GPU加速"
|
|
|
|
|
+
|
|
|
|
|
+ # -------------------------------------------------------------------------
|
|
|
|
|
+ # PP-StructureV3 CPU 处理器
|
|
|
|
|
+ # 明确使用 CPU 处理
|
|
|
|
|
+ # -------------------------------------------------------------------------
|
|
|
|
|
+ ppstructurev3_cpu:
|
|
|
|
|
+ script: "ppstructurev3_single_process.py"
|
|
|
|
|
+ input_arg: "--input_file"
|
|
|
|
|
+ output_arg: "--output_dir"
|
|
|
|
|
+ extra_args:
|
|
|
|
|
+ - "--pipeline=./my_config/PP-StructureV3.yaml"
|
|
|
|
|
+ - "--device=cpu"
|
|
|
|
|
+ output_subdir: "ppstructurev3_cpu_results"
|
|
|
|
|
+ description: "PP-StructureV3 处理器 - CPU处理"
|
|
|
|
|
+
|
|
|
|
|
+ # -------------------------------------------------------------------------
|
|
|
|
|
+ # PP-StructureV3 API 客户端 (默认)
|
|
|
|
|
+ # 通过 HTTP API 调用远程服务
|
|
|
|
|
+ # -------------------------------------------------------------------------
|
|
|
|
|
+ ppstructurev3_single_client:
|
|
|
|
|
+ script: "ppstructurev3_single_client.py"
|
|
|
|
|
+ input_arg: "--input_file"
|
|
|
|
|
+ output_arg: "--output_dir"
|
|
|
|
|
+ extra_args:
|
|
|
|
|
+ - "--api_url=http://10.192.72.11:8111/layout-parsing"
|
|
|
|
|
+ - "--timeout=300"
|
|
|
|
|
+ output_subdir: "ppstructurev3_client_results"
|
|
|
|
|
+ description: "PP-StructureV3 HTTP API 客户端 - 远程服务"
|
|
|
|
|
+
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+# 全局配置
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+global:
|
|
|
|
|
+ # PDF 文件基础目录
|
|
|
|
|
+ base_dir: "/Users/zhch158/workspace/data/流水分析"
|
|
|
|
|
+
|
|
|
|
|
+ # 默认输出子目录名称(如果处理器未指定)
|
|
|
|
|
+ output_subdir: "results"
|
|
|
|
|
+
|