|
@@ -0,0 +1,55 @@
|
|
|
|
|
+# 文档: 康强_北京农村商业银行
|
|
|
|
|
+document:
|
|
|
|
|
+ name: "康强_北京农村商业银行"
|
|
|
|
|
+ base_dir: "/Users/zhch158/workspace/data/流水分析/康强_北京农村商业银行"
|
|
|
|
|
+
|
|
|
|
|
+ # 🎯 关键改进:定义该文档使用的 OCR 工具及其结果目录
|
|
|
|
|
+ ocr_results:
|
|
|
|
|
+ # PPStructV3
|
|
|
|
|
+ - tool: "ppstructv3"
|
|
|
|
|
+ result_dir: "ppstructurev3_client_results"
|
|
|
|
|
+ image_dir: "ppstructurev3_client_results/{{name}}"
|
|
|
|
|
+ description: "PPStructV3 图片合成结果"
|
|
|
|
|
+ enabled: true
|
|
|
|
|
+
|
|
|
|
|
+ # PaddleOCR-VL
|
|
|
|
|
+ - tool: "paddleocr_vl"
|
|
|
|
|
+ result_dir: "paddleocr_vl_results"
|
|
|
|
|
+ image_dir: "paddleocr_vl_results/{{name}}"
|
|
|
|
|
+ description: "PaddleOCR VLM 图片合成结果"
|
|
|
|
|
+ enabled: true
|
|
|
|
|
+
|
|
|
|
|
+ # PaddleOCR-VL (带 cell bbox)
|
|
|
|
|
+ - tool: "mineru" # 格式同 MinerU
|
|
|
|
|
+ result_dir: "paddleocr_vl_results_cell_bbox"
|
|
|
|
|
+ image_dir: "paddleocr_vl_results/{{name}}"
|
|
|
|
|
+ description: "PaddleOCR VLM + PaddleOCR 坐标"
|
|
|
|
|
+ enabled: true
|
|
|
|
|
+
|
|
|
|
|
+ # MinerU
|
|
|
|
|
+ - tool: "mineru"
|
|
|
|
|
+ result_dir: "mineru_vllm_results"
|
|
|
|
|
+ image_dir: "mineru_vllm_results/{{name}}"
|
|
|
|
|
+ description: "MinerU 图片合成结果"
|
|
|
|
|
+ enabled: true
|
|
|
|
|
+
|
|
|
|
|
+ # MinerU (带 cell bbox)
|
|
|
|
|
+ - tool: "mineru"
|
|
|
|
|
+ result_dir: "mineru_vllm_results_cell_bbox"
|
|
|
|
|
+ image_dir: "mineru_vllm_results/{{name}}"
|
|
|
|
|
+ description: "MinerU + PaddleOCR 坐标"
|
|
|
|
|
+ enabled: true
|
|
|
|
|
+
|
|
|
|
|
+ # DotsOCR
|
|
|
|
|
+ - tool: "dots_ocr"
|
|
|
|
|
+ result_dir: "dotsocr_vllm_results"
|
|
|
|
|
+ image_dir: "dotsocr_vllm_results/{{name}}"
|
|
|
|
|
+ description: "Dots OCR 图片合成结果"
|
|
|
|
|
+ enabled: true
|
|
|
|
|
+
|
|
|
|
|
+ # DotsOCR (带 cell bbox)
|
|
|
|
|
+ - tool: "mineru"
|
|
|
|
|
+ result_dir: "dotsocr_vllm_results_cell_bbox"
|
|
|
|
|
+ image_dir: "dotsocr_vllm_results/{{name}}"
|
|
|
|
|
+ description: "Dots OCR + PaddleOCR 坐标"
|
|
|
|
|
+ enabled: true
|