山西云集科技有限公司.yaml 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. document:
  2. name: "山西云集科技有限公司"
  3. base_dir: "/Users/zhch158/workspace/data/流水分析/山西云集科技有限公司"
  4. # 🎯 关键改进:定义该文档使用的 OCR 工具及其结果目录
  5. ocr_results:
  6. # bank_statement_yusys_v4
  7. - tool: "mineru"
  8. result_dir: "bank_statement_yusys_v4"
  9. image_dir: "bank_statement_yusys_v4/{{name}}"
  10. description: "YUSYS-OCR框架 v4.0 GLM-OCR"
  11. enabled: true
  12. # bank_statement_yusys_local
  13. - tool: "mineru"
  14. result_dir: "bank_statement_yusys_local"
  15. image_dir: "bank_statement_yusys_local/{{name}}"
  16. description: "YUSYS-OCR框架(local) GLM-OCR"
  17. enabled: true
  18. # bank_statement_yusys_v3
  19. - tool: "mineru"
  20. result_dir: "bank_statement_yusys_v3"
  21. image_dir: "bank_statement_yusys_v3/{{name}}"
  22. description: "YUSYS-OCR框架 v3.0"
  23. enabled: true
  24. # bank_statement_yusys_v2
  25. - tool: "mineru"
  26. result_dir: "bank_statement_yusys_v2"
  27. image_dir: "bank_statement_yusys_v2/{{name}}"
  28. description: "YUSYS统一OCR框架"
  29. enabled: true
  30. # MinerU
  31. - tool: "mineru"
  32. result_dir: "mineru_vllm_results"
  33. image_dir: "mineru_vllm_results/{{name}}"
  34. description: "MinerU 图片合成结果"
  35. enabled: true
  36. # MinerU (带 cell bbox)
  37. - tool: "mineru"
  38. result_dir: "mineru_vllm_results_cell_bbox"
  39. image_dir: "mineru_vllm_results/{{name}}"
  40. description: "MinerU + PaddleOCR 坐标"
  41. enabled: true
  42. # PaddleOCR-VL
  43. - tool: "paddleocr_vl"
  44. result_dir: "paddleocr_vl_results"
  45. image_dir: "paddleocr_vl_results/{{name}}"
  46. description: "PaddleOCR VLM 图片合成结果"
  47. enabled: true
  48. # PaddleOCR-VL (带 cell bbox)
  49. - tool: "mineru" # 格式同 MinerU
  50. result_dir: "paddleocr_vl_results_cell_bbox"
  51. image_dir: "paddleocr_vl_results/{{name}}"
  52. description: "PaddleOCR VLM + PaddleOCR 坐标"
  53. enabled: true
  54. # DotsOCR
  55. - tool: "dots_ocr"
  56. result_dir: "dotsocr_vllm_results"
  57. image_dir: "dotsocr_vllm_results/{{name}}"
  58. description: "Dots OCR 图片合成结果"
  59. enabled: true
  60. # DotsOCR (带 cell bbox)
  61. - tool: "mineru"
  62. result_dir: "dotsocr_vllm_results_cell_bbox"
  63. image_dir: "dotsocr_vllm_results/{{name}}"
  64. description: "Dots OCR + PaddleOCR 坐标"
  65. enabled: true
  66. # PPStructV3
  67. - tool: "ppstructv3"
  68. result_dir: "ppstructurev3_client_results"
  69. image_dir: "ppstructurev3_client_results/{{name}}"
  70. description: "PPStructV3 图片合成结果"
  71. enabled: true