康强_北京农村商业银行.yaml 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. # 文档: 康强_北京农村商业银行
  2. document:
  3. name: "康强_北京农村商业银行"
  4. base_dir: "/Users/zhch158/workspace/data/流水分析/康强_北京农村商业银行"
  5. # 🎯 关键改进:定义该文档使用的 OCR 工具及其结果目录
  6. ocr_results:
  7. # bank_statement_yusys_v4
  8. - tool: "mineru"
  9. result_dir: "bank_statement_yusys_v4"
  10. image_dir: "bank_statement_yusys_v4/{{name}}"
  11. description: "YUSYS-OCR框架 v4.0 GLM-OCR"
  12. enabled: true
  13. # bank_statement_yusys_v3
  14. - tool: "mineru"
  15. result_dir: "bank_statement_yusys_v3"
  16. image_dir: "bank_statement_yusys_v3/{{name}}"
  17. description: "YUSYS-OCR框架 v3.0"
  18. enabled: true
  19. # bank_statement_yusys_v2
  20. - tool: "mineru"
  21. result_dir: "bank_statement_yusys_v2"
  22. image_dir: "mineru_vllm_results/{{name}}"
  23. description: "YUSYS统一OCR框架"
  24. enabled: true
  25. # MinerU
  26. - tool: "mineru"
  27. result_dir: "mineru_vllm_results"
  28. image_dir: "mineru_vllm_results/{{name}}"
  29. description: "MinerU 图片合成结果"
  30. enabled: true
  31. # MinerU (带 cell bbox)
  32. - tool: "mineru"
  33. result_dir: "mineru_vllm_results_cell_bbox"
  34. image_dir: "mineru_vllm_results/{{name}}"
  35. description: "MinerU + PaddleOCR 坐标"
  36. enabled: true
  37. # PaddleOCR-VL
  38. - tool: "paddleocr_vl"
  39. result_dir: "paddleocr_vl_results"
  40. image_dir: "paddleocr_vl_results/{{name}}"
  41. description: "PaddleOCR VLM 图片合成结果"
  42. enabled: true
  43. # PaddleOCR-VL (带 cell bbox)
  44. - tool: "mineru" # 格式同 MinerU
  45. result_dir: "paddleocr_vl_results_cell_bbox"
  46. image_dir: "paddleocr_vl_results/{{name}}"
  47. description: "PaddleOCR VLM + PaddleOCR 坐标"
  48. enabled: true
  49. # DotsOCR
  50. - tool: "dots_ocr"
  51. result_dir: "dotsocr_vllm_results"
  52. image_dir: "dotsocr_vllm_results/{{name}}"
  53. description: "Dots OCR 图片合成结果"
  54. enabled: true
  55. # DotsOCR (带 cell bbox)
  56. - tool: "mineru"
  57. result_dir: "dotsocr_vllm_results_cell_bbox"
  58. image_dir: "dotsocr_vllm_results/{{name}}"
  59. description: "Dots OCR + PaddleOCR 坐标"
  60. enabled: true
  61. # PPStructV3
  62. - tool: "ppstructv3"
  63. result_dir: "ppstructurev3_client_results"
  64. image_dir: "ppstructurev3_client_results/{{name}}"
  65. description: "PPStructV3 图片合成结果"
  66. enabled: true