| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103 |
- #!/usr/bin/env python3
- """
- 简化版OCR验证脚本 - 获取原始分析内容
- """
- import os
- import base64
- import json
- from pathlib import Path
- from openai import OpenAI
- from dotenv import load_dotenv
- # 加载环境变量
- load_dotenv()
- def simple_ocr_verification():
- """简化的OCR验证,返回原始分析文本"""
-
- # 获取配置
- api_key = os.getenv("YUSYS_MULTIMODAL_API_KEY")
- api_base = os.getenv("YUSYS_MULTIMODAL_API_BASE")
- model_id = os.getenv("YUSYS_MULTIMODAL_ID")
- model_name = model_id.replace("openai/", "") if model_id else ""
-
- # 文件路径
- image_path = "工大照片-1.jpg"
- ocr_json_path = "demo_54fa7ad0_page_1.json"
-
- # 读取图片
- with open(image_path, "rb") as f:
- image_data = base64.b64encode(f.read()).decode('utf-8')
-
- # 读取OCR结果
- with open(ocr_json_path, "r", encoding='utf-8') as f:
- ocr_results = json.load(f)
-
- # 构建OCR文本摘要
- ocr_summary = f"OCR识别了{len(ocr_results)}个项目:\\n"
- for i, item in enumerate(ocr_results[:5], 1): # 只显示前5个
- bbox = item.get('bbox', [])
- text = item.get('text', '')[:50] # 限制文本长度
- ocr_summary += f"{i}. 位置{bbox} - 文本: {text}\\n"
- if len(ocr_results) > 5:
- ocr_summary += f"... 还有{len(ocr_results) - 5}个项目\\n"
-
- # 简化的提示词
- prompt = f"""请分析这张图片,并与OCR识别结果进行对比。
- {ocr_summary}
- 请详细描述:
- 1. 图片的内容是什么?
- 2. OCR识别的结果是否与图片内容匹配?
- 3. 你发现了哪些明显的错误或问题?
- 4. 图片与OCR结果是否匹配同一份文档?
- 请用中文详细回答,不需要JSON格式。"""
-
- # 构建消息
- messages = [
- {
- "role": "user",
- "content": [
- {"type": "text", "text": prompt},
- {
- "type": "image_url",
- "image_url": {
- "url": f"data:image/jpeg;base64,{image_data}"
- }
- }
- ]
- }
- ]
-
- # 调用API
- client = OpenAI(api_key=api_key, base_url=api_base)
-
- print("正在分析图片和OCR结果...")
- response = client.chat.completions.create(
- model=model_name,
- messages=messages, # type: ignore
- temperature=0.3,
- max_tokens=2048
- )
-
- analysis = response.choices[0].message.content or "未获取到分析结果"
-
- print("\\n=== VLM详细分析结果 ===")
- print(analysis)
-
- # 保存原始分析
- with open("raw_analysis.txt", "w", encoding="utf-8") as f:
- f.write(f"图片: {image_path}\\n")
- f.write(f"OCR文件: {ocr_json_path}\\n")
- f.write(f"模型: {model_name}\\n")
- f.write(f"OCR结果摘要:\\n{ocr_summary}\\n\\n")
- f.write("=== VLM分析 ===\\n")
- f.write(analysis)
-
- print(f"\\n原始分析已保存到: raw_analysis.txt")
- if __name__ == "__main__":
- simple_ocr_verification()
|