| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- document:
- name: "2023年度报告母公司"
- base_dir: "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司"
-
- # 🎯 关键改进:定义该文档使用的 OCR 工具及其结果目录
- ocr_results:
- # PPStructV3
- - tool: "ppstructv3"
- result_dir: "ppstructurev3_client_results"
- image_dir: "ppstructurev3_client_results/{{name}}"
- description: "PPStructV3 图片合成结果"
- enabled: true
-
- # PaddleOCR-VL
- - tool: "paddleocr_vl"
- result_dir: "paddleocr_vl_results"
- image_dir: "paddleocr_vl_results/{{name}}"
- description: "PaddleOCR VLM 图片合成结果"
- enabled: true
-
- # PaddleOCR-VL (带 cell bbox)
- - tool: "mineru" # 格式同 MinerU
- result_dir: "paddleocr_vl_results_cell_bbox"
- image_dir: "paddleocr_vl_results/{{name}}"
- description: "PaddleOCR VLM + PaddleOCR 坐标"
- enabled: true
-
- # MinerU
- - tool: "mineru"
- result_dir: "mineru_vllm_results"
- image_dir: "mineru_vllm_results/{{name}}"
- description: "MinerU 图片合成结果"
- enabled: true
-
- # MinerU (带 cell bbox)
- - tool: "mineru"
- result_dir: "mineru_vllm_results_cell_bbox"
- image_dir: "mineru_vllm_results/{{name}}"
- description: "MinerU + PaddleOCR 坐标"
- enabled: true
-
- # DotsOCR
- - tool: "dots_ocr"
- result_dir: "dotsocr_vllm_results"
- image_dir: "dotsocr_vllm_results/{{name}}"
- description: "Dots OCR 图片合成结果"
- enabled: true
-
- # DotsOCR (带 cell bbox)
- - tool: "mineru"
- result_dir: "dotsocr_vllm_results_cell_bbox"
- image_dir: "dotsocr_vllm_results/{{name}}"
- description: "Dots OCR + PaddleOCR 坐标"
- enabled: true
|