杨万益_福建农信.yaml 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. # 文档: 德_内蒙古银行照
  2. document:
  3. name: "杨万益_福建农信"
  4. base_dir: "/Users/zhch158/workspace/data/流水分析/杨万益_福建农信"
  5. # 🎯 关键改进:定义该文档使用的 OCR 工具及其结果目录
  6. ocr_results:
  7. # bank_statement_yusys_v4
  8. # - tool: "mineru"
  9. # result_dir: "bank_statement_yusys_v4"
  10. # image_dir: "bank_statement_yusys_v4/{{name}}"
  11. # description: "YUSYS-OCR框架 v4.0 GLM-OCR"
  12. # enabled: true
  13. # # bank_statement_yusys_v3
  14. # - tool: "mineru"
  15. # result_dir: "bank_statement_yusys_v3"
  16. # image_dir: "bank_statement_yusys_v3/{{name}}"
  17. # description: "YUSYS-OCR框架 v3.0"
  18. # enabled: true
  19. # bank_statement_yusys_local glm-ocr
  20. - tool: "mineru"
  21. result_dir: "bank_statement_yusys_local"
  22. image_dir: "bank_statement_yusys_local/{{name}}"
  23. description: "YUSYS-OCR框架(local) GLM-OCR"
  24. enabled: true
  25. # bank_statement_yusys_local paddleocr_vl
  26. - tool: "mineru"
  27. result_dir: "bank_statement_paddle_vl_local"
  28. image_dir: "bank_statement_paddle_vl_local/{{name}}"
  29. description: "YUSYS-OCR框架(local) PaddleOCR-VL"
  30. enabled: true
  31. # # MinerU
  32. # - tool: "mineru"
  33. # result_dir: "mineru_vllm_results"
  34. # image_dir: "mineru_vllm_results/{{name}}"
  35. # description: "MinerU 图片合成结果"
  36. # enabled: true
  37. # # MinerU (带 cell bbox)
  38. # - tool: "mineru"
  39. # result_dir: "mineru_vllm_results_cell_bbox"
  40. # image_dir: "mineru_vllm_results/{{name}}"
  41. # description: "MinerU + PaddleOCR 坐标"
  42. # enabled: true
  43. # # PaddleOCR-VL
  44. # - tool: "paddleocr_vl"
  45. # result_dir: "paddleocr_vl_results"
  46. # image_dir: "paddleocr_vl_results/{{name}}"
  47. # description: "PaddleOCR VLM 图片合成结果"
  48. # enabled: true
  49. # # PaddleOCR-VL (带 cell bbox)
  50. # - tool: "mineru" # 格式同 MinerU
  51. # result_dir: "paddleocr_vl_results_cell_bbox"
  52. # image_dir: "paddleocr_vl_results/{{name}}"
  53. # description: "PaddleOCR VLM + PaddleOCR 坐标"
  54. # enabled: true
  55. # # DotsOCR
  56. # - tool: "dots_ocr"
  57. # result_dir: "dotsocr_vllm_results"
  58. # image_dir: "dotsocr_vllm_results/{{name}}"
  59. # description: "Dots OCR 图片合成结果"
  60. # enabled: true
  61. # # DotsOCR (带 cell bbox)
  62. # - tool: "mineru"
  63. # result_dir: "dotsocr_vllm_results_cell_bbox"
  64. # image_dir: "dotsocr_vllm_results/{{name}}"
  65. # description: "Dots OCR + PaddleOCR 坐标"
  66. # enabled: true
  67. # # PPStructV3
  68. # - tool: "ppstructv3"
  69. # result_dir: "ppstructurev3_client_results"
  70. # image_dir: "ppstructurev3_client_results/{{name}}"
  71. # description: "PPStructV3 图片合成结果"
  72. # enabled: true