processor_configs.yaml 3.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. # ============================================================================
  2. # PDF 批量处理器配置文件
  3. # ============================================================================
  4. # 处理器定义
  5. processors:
  6. # -------------------------------------------------------------------------
  7. # PaddleOCR-VL 处理器
  8. # 用于视觉语言模型的 OCR 处理
  9. # -------------------------------------------------------------------------
  10. paddleocr_vl_single_process:
  11. script: "paddleocr_vl_single_process.py"
  12. input_arg: "--input_file"
  13. output_arg: "--output_dir"
  14. extra_args:
  15. - "--pipeline=./my_config/PaddleOCR-VL-Client-RT-DETR-H_layout_17cls.yaml"
  16. - "--no-adapter"
  17. output_subdir: "paddleocr_vl_results"
  18. description: "PaddleOCR-VL 处理器 - 视觉语言模型OCR"
  19. # -------------------------------------------------------------------------
  20. # PP-StructureV3 本地处理器
  21. # 用于文档结构化分析(本地GPU/CPU处理)
  22. # -------------------------------------------------------------------------
  23. ppstructurev3_single_process:
  24. script: "ppstructurev3_single_process.py"
  25. input_arg: "--input_file"
  26. output_arg: "--output_dir"
  27. extra_args:
  28. - "--pipeline=./my_config/PP-StructureV3.yaml"
  29. output_subdir: "ppstructurev3_results"
  30. description: "PP-StructureV3 处理器 - 本地处理"
  31. # -------------------------------------------------------------------------
  32. # PP-StructureV3 GPU 处理器
  33. # 明确使用 GPU 加速
  34. # -------------------------------------------------------------------------
  35. ppstructurev3_gpu:
  36. script: "ppstructurev3_single_process.py"
  37. input_arg: "--input_file"
  38. output_arg: "--output_dir"
  39. extra_args:
  40. - "--pipeline=./my_config/PP-StructureV3.yaml"
  41. - "--device=gpu"
  42. output_subdir: "ppstructurev3_gpu_results"
  43. description: "PP-StructureV3 处理器 - GPU加速"
  44. # -------------------------------------------------------------------------
  45. # PP-StructureV3 CPU 处理器
  46. # 明确使用 CPU 处理
  47. # -------------------------------------------------------------------------
  48. ppstructurev3_cpu:
  49. script: "ppstructurev3_single_process.py"
  50. input_arg: "--input_file"
  51. output_arg: "--output_dir"
  52. extra_args:
  53. - "--pipeline=./my_config/PP-StructureV3.yaml"
  54. - "--device=cpu"
  55. output_subdir: "ppstructurev3_cpu_results"
  56. description: "PP-StructureV3 处理器 - CPU处理"
  57. # -------------------------------------------------------------------------
  58. # PP-StructureV3 API 客户端 (默认)
  59. # 通过 HTTP API 调用远程服务
  60. # -------------------------------------------------------------------------
  61. ppstructurev3_single_client:
  62. script: "ppstructurev3_single_client.py"
  63. input_arg: "--input_file"
  64. output_arg: "--output_dir"
  65. extra_args:
  66. - "--api_url=http://10.192.72.11:8111/layout-parsing"
  67. - "--timeout=300"
  68. output_subdir: "ppstructurev3_client_results"
  69. description: "PP-StructureV3 HTTP API 客户端 - 远程服务"
  70. # ============================================================================
  71. # 全局配置
  72. # ============================================================================
  73. global:
  74. # PDF 文件基础目录
  75. base_dir: "/Users/zhch158/workspace/data/流水分析"
  76. # 默认输出子目录名称(如果处理器未指定)
  77. output_subdir: "results"