康强_北京农村商业银行.yaml 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. # 文档: 康强_北京农村商业银行
  2. document:
  3. name: "康强_北京农村商业银行"
  4. base_dir: "/Users/zhch158/workspace/data/流水分析/康强_北京农村商业银行"
  5. # 🎯 关键改进:定义该文档使用的 OCR 工具及其结果目录
  6. ocr_results:
  7. # bank_statement_yusys_v2
  8. - tool: "mineru"
  9. result_dir: "bank_statement_yusys_v2"
  10. image_dir: "mineru_vllm_results/{{name}}"
  11. description: "YUSYS统一OCR框架"
  12. enabled: true
  13. # MinerU
  14. - tool: "mineru"
  15. result_dir: "mineru_vllm_results"
  16. image_dir: "mineru_vllm_results/{{name}}"
  17. description: "MinerU 图片合成结果"
  18. enabled: true
  19. # MinerU (带 cell bbox)
  20. - tool: "mineru"
  21. result_dir: "mineru_vllm_results_cell_bbox"
  22. image_dir: "mineru_vllm_results/{{name}}"
  23. description: "MinerU + PaddleOCR 坐标"
  24. enabled: true
  25. # PaddleOCR-VL
  26. - tool: "paddleocr_vl"
  27. result_dir: "paddleocr_vl_results"
  28. image_dir: "paddleocr_vl_results/{{name}}"
  29. description: "PaddleOCR VLM 图片合成结果"
  30. enabled: true
  31. # PaddleOCR-VL (带 cell bbox)
  32. - tool: "mineru" # 格式同 MinerU
  33. result_dir: "paddleocr_vl_results_cell_bbox"
  34. image_dir: "paddleocr_vl_results/{{name}}"
  35. description: "PaddleOCR VLM + PaddleOCR 坐标"
  36. enabled: true
  37. # DotsOCR
  38. - tool: "dots_ocr"
  39. result_dir: "dotsocr_vllm_results"
  40. image_dir: "dotsocr_vllm_results/{{name}}"
  41. description: "Dots OCR 图片合成结果"
  42. enabled: true
  43. # DotsOCR (带 cell bbox)
  44. - tool: "mineru"
  45. result_dir: "dotsocr_vllm_results_cell_bbox"
  46. image_dir: "dotsocr_vllm_results/{{name}}"
  47. description: "Dots OCR + PaddleOCR 坐标"
  48. enabled: true
  49. # PPStructV3
  50. - tool: "ppstructv3"
  51. result_dir: "ppstructurev3_client_results"
  52. image_dir: "ppstructurev3_client_results/{{name}}"
  53. description: "PPStructV3 图片合成结果"
  54. enabled: true