| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- # ============================================================================
- # PDF 批量处理器配置文件
- # ============================================================================
- # 处理器定义
- processors:
- # -------------------------------------------------------------------------
- # PaddleOCR-VL 处理器
- # 用于视觉语言模型的 OCR 处理
- # -------------------------------------------------------------------------
- paddleocr_vl_single_process:
- script: "paddleocr_vl_single_process.py"
- input_arg: "--input_file"
- output_arg: "--output_dir"
- extra_args:
- - "--pipeline=./my_config/PaddleOCR-VL-Client-RT-DETR-H_layout_17cls.yaml"
- - "--no-adapter"
- output_subdir: "paddleocr_vl_results"
- description: "PaddleOCR-VL 处理器 - 视觉语言模型OCR"
- # -------------------------------------------------------------------------
- # PP-StructureV3 本地处理器
- # 用于文档结构化分析(本地GPU/CPU处理)
- # -------------------------------------------------------------------------
- ppstructurev3_single_process:
- script: "ppstructurev3_single_process.py"
- input_arg: "--input_file"
- output_arg: "--output_dir"
- extra_args:
- - "--pipeline=./my_config/PP-StructureV3.yaml"
- output_subdir: "ppstructurev3_results"
- description: "PP-StructureV3 处理器 - 本地处理"
- # -------------------------------------------------------------------------
- # PP-StructureV3 GPU 处理器
- # 明确使用 GPU 加速
- # -------------------------------------------------------------------------
- ppstructurev3_gpu:
- script: "ppstructurev3_single_process.py"
- input_arg: "--input_file"
- output_arg: "--output_dir"
- extra_args:
- - "--pipeline=./my_config/PP-StructureV3.yaml"
- - "--device=gpu"
- output_subdir: "ppstructurev3_gpu_results"
- description: "PP-StructureV3 处理器 - GPU加速"
- # -------------------------------------------------------------------------
- # PP-StructureV3 CPU 处理器
- # 明确使用 CPU 处理
- # -------------------------------------------------------------------------
- ppstructurev3_cpu:
- script: "ppstructurev3_single_process.py"
- input_arg: "--input_file"
- output_arg: "--output_dir"
- extra_args:
- - "--pipeline=./my_config/PP-StructureV3.yaml"
- - "--device=cpu"
- output_subdir: "ppstructurev3_cpu_results"
- description: "PP-StructureV3 处理器 - CPU处理"
- # -------------------------------------------------------------------------
- # PP-StructureV3 API 客户端 (默认)
- # 通过 HTTP API 调用远程服务
- # -------------------------------------------------------------------------
- ppstructurev3_single_client:
- script: "ppstructurev3_single_client.py"
- input_arg: "--input_file"
- output_arg: "--output_dir"
- extra_args:
- - "--api_url=http://10.192.72.11:8111/layout-parsing"
- - "--timeout=300"
- output_subdir: "ppstructurev3_client_results"
- description: "PP-StructureV3 HTTP API 客户端 - 远程服务"
- # ============================================================================
- # 全局配置
- # ============================================================================
- global:
- # PDF 文件基础目录
- base_dir: "/Users/zhch158/workspace/data/流水分析"
-
- # 默认输出子目录名称(如果处理器未指定)
- output_subdir: "results"
-
|