# OCR验证工具配置文件 # 样式配置 styles: font_sizes: small: 10 medium: 12 large: 14 extra_large: 16 colors: primary: "#0288d1" secondary: "#ff9800" success: "#4caf50" error: "#f44336" warning: "#ff9800" background: "#fafafa" text: "#333333" layout: default_zoom: 1.0 default_height: 600 sidebar_width: 1 content_width: 0.7 # 界面配置 ui: page_title: "OCR可视化校验工具" page_icon: "🔍" layout: "wide" sidebar_state: "expanded" # 默认设置 default_font_size: "small" default_layout: "标准布局" # 文件路径配置 paths: ocr_out_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_PPStructureV3_Results" src_img_dir: "/Users/zhch158/workspace/data/至远彩色印刷工业有限公司/data_PPStructureV3_Results/2023年度报告母公司" supported_image_formats: [".png", ".jpg", ".jpeg"] # OCR数据配置 ocr: min_text_length: 2 default_confidence: 1.0 exclude_texts: ["Picture", ""] # OCR工具类型配置 tools: dots_ocr: name: "Dots OCR" description: "专业VLM OCR" json_structure: "array" # JSON为数组格式 text_field: "text" bbox_field: "bbox" category_field: "category" confidence_field: "confidence" ppstructv3: name: "PPStructV3" description: "PaddleOCR PP-StructureV3" json_structure: "object" # JSON为对象格式 parsing_results_field: "parsing_res_list" text_field: "block_content" bbox_field: "block_bbox" category_field: "block_label" confidence_field: "confidence" # 自动检测工具类型的规则 auto_detection: enabled: true rules: - field_exists: "parsing_res_list" # 如果存在该字段,判断为ppstructv3 tool_type: "ppstructv3" - json_is_array: true # 如果JSON是数组,判断为dots_ocr tool_type: "dots_ocr"