德_内蒙古银行照.yaml 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. # 文档: 德_内蒙古银行照
  2. document:
  3. name: "德_内蒙古银行照"
  4. base_dir: "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照"
  5. # 🎯 关键改进:定义该文档使用的 OCR 工具及其结果目录
  6. ocr_results:
  7. # PPStructV3
  8. - tool: "ppstructv3"
  9. result_dir: "ppstructurev3_client_results"
  10. image_dir: "ppstructurev3_client_results/{{name}}"
  11. description: "PPStructV3 图片合成结果"
  12. enabled: true
  13. # PaddleOCR-VL
  14. - tool: "paddleocr_vl"
  15. result_dir: "paddleocr_vl_results"
  16. image_dir: "paddleocr_vl_results/{{name}}"
  17. description: "PaddleOCR VLM 图片合成结果"
  18. enabled: true
  19. # PaddleOCR-VL (带 cell bbox)
  20. - tool: "mineru" # 格式同 MinerU
  21. result_dir: "paddleocr_vl_results_cell_bbox"
  22. image_dir: "paddleocr_vl_results/{{name}}"
  23. description: "PaddleOCR VLM + PaddleOCR 坐标"
  24. enabled: true
  25. # MinerU
  26. - tool: "mineru"
  27. result_dir: "mineru_vllm_results"
  28. image_dir: "mineru_vllm_results/{{name}}"
  29. description: "MinerU 图片合成结果"
  30. enabled: true
  31. # MinerU (带 cell bbox)
  32. - tool: "mineru"
  33. result_dir: "mineru_vllm_results_cell_bbox"
  34. image_dir: "mineru_vllm_results/{{name}}"
  35. description: "MinerU + PaddleOCR 坐标"
  36. enabled: true
  37. # DotsOCR
  38. - tool: "dots_ocr"
  39. result_dir: "dotsocr_vllm_results"
  40. image_dir: "dotsocr_vllm_results/{{name}}"
  41. description: "Dots OCR 图片合成结果"
  42. enabled: true