#!/usr/bin/env python3 """ 简化版OCR验证脚本 - 获取原始分析内容 """ import os import base64 import json from pathlib import Path from openai import OpenAI from dotenv import load_dotenv # 加载环境变量 load_dotenv() def simple_ocr_verification(): """简化的OCR验证,返回原始分析文本""" # 获取配置 api_key = os.getenv("YUSYS_MULTIMODAL_API_KEY") api_base = os.getenv("YUSYS_MULTIMODAL_API_BASE") model_id = os.getenv("YUSYS_MULTIMODAL_ID") model_name = model_id.replace("openai/", "") if model_id else "" # 文件路径 image_path = "工大照片-1.jpg" ocr_json_path = "demo_54fa7ad0_page_1.json" # 读取图片 with open(image_path, "rb") as f: image_data = base64.b64encode(f.read()).decode('utf-8') # 读取OCR结果 with open(ocr_json_path, "r", encoding='utf-8') as f: ocr_results = json.load(f) # 构建OCR文本摘要 ocr_summary = f"OCR识别了{len(ocr_results)}个项目:\\n" for i, item in enumerate(ocr_results[:5], 1): # 只显示前5个 bbox = item.get('bbox', []) text = item.get('text', '')[:50] # 限制文本长度 ocr_summary += f"{i}. 位置{bbox} - 文本: {text}\\n" if len(ocr_results) > 5: ocr_summary += f"... 还有{len(ocr_results) - 5}个项目\\n" # 简化的提示词 prompt = f"""请分析这张图片,并与OCR识别结果进行对比。 {ocr_summary} 请详细描述: 1. 图片的内容是什么? 2. OCR识别的结果是否与图片内容匹配? 3. 你发现了哪些明显的错误或问题? 4. 图片与OCR结果是否匹配同一份文档? 请用中文详细回答,不需要JSON格式。""" # 构建消息 messages = [ { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{image_data}" } } ] } ] # 调用API client = OpenAI(api_key=api_key, base_url=api_base) print("正在分析图片和OCR结果...") response = client.chat.completions.create( model=model_name, messages=messages, # type: ignore temperature=0.3, max_tokens=2048 ) analysis = response.choices[0].message.content or "未获取到分析结果" print("\\n=== VLM详细分析结果 ===") print(analysis) # 保存原始分析 with open("raw_analysis.txt", "w", encoding="utf-8") as f: f.write(f"图片: {image_path}\\n") f.write(f"OCR文件: {ocr_json_path}\\n") f.write(f"模型: {model_name}\\n") f.write(f"OCR结果摘要:\\n{ocr_summary}\\n\\n") f.write("=== VLM分析 ===\\n") f.write(analysis) print(f"\\n原始分析已保存到: raw_analysis.txt") if __name__ == "__main__": simple_ocr_verification()