document: name: "至远彩色_2023年报" base_dir: "/Users/zhch158/workspace/data/流水分析/至远彩色_2023年报" # 🎯 关键改进:定义该文档使用的 OCR 工具及其结果目录 ocr_results: # PPStructV3 - tool: "ppstructv3" result_dir: "ppstructurev3_client_results" image_dir: "ppstructurev3_client_results/{{name}}" description: "PPStructV3 图片合成结果" enabled: true # PaddleOCR-VL - tool: "paddleocr_vl" result_dir: "paddleocr_vl_results" image_dir: "paddleocr_vl_results/{{name}}" description: "PaddleOCR VLM 图片合成结果" enabled: true # PaddleOCR-VL (带 cell bbox) - tool: "mineru" # 格式同 MinerU result_dir: "paddleocr_vl_results_cell_bbox" image_dir: "paddleocr_vl_results/{{name}}" description: "PaddleOCR VLM + PaddleOCR 坐标" enabled: true # MinerU - tool: "mineru" result_dir: "mineru_vllm_results" image_dir: "mineru_vllm_results/{{name}}" description: "MinerU 图片合成结果" enabled: true # MinerU (带 cell bbox) - tool: "mineru" result_dir: "mineru_vllm_results_cell_bbox" image_dir: "mineru_vllm_results/{{name}}" description: "MinerU + PaddleOCR 坐标" enabled: true # DotsOCR - tool: "dots_ocr" result_dir: "dotsocr_vllm_results" image_dir: "dotsocr_vllm_results/{{name}}" description: "Dots OCR 图片合成结果" enabled: true