# ============================================================================ # PDF 批量处理器配置文件 # ============================================================================ # 处理器定义 processors: # ------------------------------------------------------------------------- # PaddleOCR-VL 处理器 # 用于视觉语言模型的 OCR 处理 # ------------------------------------------------------------------------- paddleocr_vl_single_process: script: "paddleocr_vl_single_process.py" input_arg: "--input_file" output_arg: "--output_dir" extra_args: - "--pipeline=./my_config/PaddleOCR-VL-Client-RT-DETR-H_layout_17cls.yaml" - "--no-adapter" output_subdir: "paddleocr_vl_results" description: "PaddleOCR-VL 处理器 - 视觉语言模型OCR" # ------------------------------------------------------------------------- # PP-StructureV3 本地处理器 # 用于文档结构化分析(本地GPU/CPU处理) # ------------------------------------------------------------------------- ppstructurev3_single_process: script: "ppstructurev3_single_process.py" input_arg: "--input_file" output_arg: "--output_dir" extra_args: - "--pipeline=./my_config/PP-StructureV3.yaml" output_subdir: "ppstructurev3_results" description: "PP-StructureV3 处理器 - 本地处理" # ------------------------------------------------------------------------- # PP-StructureV3 GPU 处理器 # 明确使用 GPU 加速 # ------------------------------------------------------------------------- ppstructurev3_gpu: script: "ppstructurev3_single_process.py" input_arg: "--input_file" output_arg: "--output_dir" extra_args: - "--pipeline=./my_config/PP-StructureV3.yaml" - "--device=gpu" output_subdir: "ppstructurev3_gpu_results" description: "PP-StructureV3 处理器 - GPU加速" # ------------------------------------------------------------------------- # PP-StructureV3 CPU 处理器 # 明确使用 CPU 处理 # ------------------------------------------------------------------------- ppstructurev3_cpu: script: "ppstructurev3_single_process.py" input_arg: "--input_file" output_arg: "--output_dir" extra_args: - "--pipeline=./my_config/PP-StructureV3.yaml" - "--device=cpu" output_subdir: "ppstructurev3_cpu_results" description: "PP-StructureV3 处理器 - CPU处理" # ------------------------------------------------------------------------- # PP-StructureV3 API 客户端 (默认) # 通过 HTTP API 调用远程服务 # ------------------------------------------------------------------------- ppstructurev3_single_client: script: "ppstructurev3_single_client.py" input_arg: "--input_file" output_arg: "--output_dir" extra_args: - "--api_url=http://10.192.72.11:8111/layout-parsing" - "--timeout=300" output_subdir: "ppstructurev3_client_results" description: "PP-StructureV3 HTTP API 客户端 - 远程服务" # ============================================================================ # 全局配置 # ============================================================================ global: # PDF 文件基础目录 base_dir: "/Users/zhch158/workspace/data/流水分析" # 默认输出子目录名称(如果处理器未指定) output_subdir: "results"