2 bulan lalu · 15a720768c
--- a/image_edit/config.yaml
+++ b/image_edit/config.yaml
@@ -0,0 +1,235 @@
 
				+# 本地大模型配置文件
			
 
				+models:
			
 
				+  qwen2_vl:
			
 
				+    name: "Qwen2.5-VL-72B-Instruct-AWQ"
			
 
				+    api_base: "http://10.192.72.12:9991/v1"
			
 
				+    api_key: "${YUSYS_MULTIMODAL_API_KEY}"
			
 
				+    model_id: "Qwen2.5-VL-72B-Instruct-AWQ"
			
 
				+    default_params:
			
 
				+      temperature: 0.1
			
 
				+      max_tokens: 4096
			
 
				+      timeout: 180
			
 
				+    
			
 
				+  llava:
			
 
				+    name: "LLaVA-v1.6-34B"
			
 
				+    api_base: "http://localhost:11434/v1"
			
 
				+    api_key: ""
			
 
				+    model_id: "llava:34b"
			
 
				+    default_params:
			
 
				+      temperature: 0.1
			
 
				+      max_tokens: 4096
			
 
				+      timeout: 180
			
 
				+
			
 
				+  cogvlm:
			
 
				+    name: "CogVLM2-19B"
			
 
				+    api_base: "http://localhost:11434/v1"
			
 
				+    api_key: ""
			
 
				+    model_id: "cogvlm2:19b"
			
 
				+    default_params:
			
 
				+      temperature: 0.1
			
 
				+      max_tokens: 4096
			
 
				+      timeout: 180
			
 
				+
			
 
				+# 提示词模板
			
 
				+prompts:
			
 
				+  photo_analysis:
			
 
				+    name: "照片分析"
			
 
				+    template: |
			
 
				+      请仔细观察这张照片，分析以下内容：
			
 
				+      1. 照片的拍摄环境和背景
			
 
				+      2. 人物的外貌特征和着装
			
 
				+      3. 照片的拍摄时间推测（基于服装、环境等）
			
 
				+      4. 照片的保存状况（是否有破损、褪色等）
			
 
				+      5. 照片的历史价值和意义
			
 
				+      
			
 
				+      请用中文详细描述，分条列出分析结果。
			
 
				+
			
 
				+  ocr_standard:
			
 
				+    name: "标准OCR识别"
			
 
				+    template: |
			
 
				+      You are an AI assistant specialized in converting PDF images to Markdown format. Please follow these instructions for the conversion:
			
 
				+
			
 
				+      1. Text Processing:
			
 
				+      - Accurately recognize all text content in the PDF image without guessing or inferring.
			
 
				+      - Convert the recognized text into Markdown format.
			
 
				+      - Maintain the original document structure, including headings, paragraphs, lists, etc.
			
 
				+      - For financial amounts, use standard half-width characters (e.g., use "," for thousands separator and "." for decimal point)
			
 
				+
			
 
				+      2. Mathematical Formula Processing:
			
 
				+      - Convert all mathematical formulas to LaTeX format.
			
 
				+      - Enclose inline formulas with \( \). For example: This is an inline formula \( E = mc^2 \)
			
 
				+      - Enclose block formulas with \\[ \\]. For example: \[ \frac{-b \pm \sqrt{b^2 - 4ac}}{2a} \]
			
 
				+
			
 
				+      3. Table Processing:
			
 
				+      - Convert tables to HTML format.
			
 
				+      - Wrap the entire table with <table> and </table>.
			
 
				+      - For financial data in tables, ensure numbers use standard format with half-width commas and periods
			
 
				+
			
 
				+      4. Figure Handling:
			
 
				+      - Ignore figures content in the PDF image. Do not attempt to describe or convert images.
			
 
				+
			
 
				+      5. Output Format:
			
 
				+      - Ensure the output Markdown document has a clear structure with appropriate line breaks between elements.
			
 
				+      - For complex layouts, try to maintain the original document's structure and format as closely as possible.
			
 
				+      - Use standard ASCII characters for punctuation and numbers
			
 
				+
			
 
				+      Please strictly follow these guidelines to ensure accuracy and consistency in the conversion.
			
 
				+
			
 
				+  table_extract:
			
 
				+    name: "表格提取"
			
 
				+    template: |
			
 
				+      请从这张图片中提取所有表格数据，要求：
			
 
				+      
			
 
				+      1. **表格识别**：
			
 
				+         - 准确识别所有表格边界
			
 
				+         - 正确分辨表头和数据行
			
 
				+      
			
 
				+      2. **数据提取**：
			
 
				+         - 逐行逐列提取所有数据
			
 
				+         - 保持数据的原始格式和精度
			
 
				+         - 特别注意数字、金额的准确性
			
 
				+      
			
 
				+      3. **格式输出**：
			
 
				+         - 输出为HTML表格格式
			
 
				+         - 保持表格的原始结构
			
 
				+         - 使用标准的半角字符
			
 
				+      
			
 
				+      4. **质量检查**：
			
 
				+         - 确保没有遗漏任何数据
			
 
				+         - 验证数字格式的正确性
			
 
				+         - 检查表格结构的完整性
			
 
				+
			
 
				+  photo_restore_classroom:
			
 
				+    name: "照片修复（教室背景）"
			
 
				+    template: |
			
 
				+      请对这张老照片进行全面修复和背景替换，具体要求如下：
			
 
				+      
			
 
				+      **修复要求**：
			
 
				+      1. 去除所有折痕、裂痕、污渍和划痕
			
 
				+      2. 补全缺失的细节，提升清晰度
			
 
				+      3. 人物面貌务必保持不变，只进行修复不改变特征
			
 
				+      4. 进行适度的彩色化处理
			
 
				+      
			
 
				+      **背景替换**：
			
 
				+      - 将现有的宿舍背景完全替换为教室场景
			
 
				+      - 后方是黑板（深绿色或深灰色黑板）
			
 
				+      - 前方是课桌（木质课桌，呈棕色）
			
 
				+      - 营造1980-1990年代大学教室的氛围
			
 
				+      - 保持照片的年代感和真实性
			
 
				+      
			
 
				+      **色彩方案**：
			
 
				+      - 人物肤色：健康自然的亚洲人肤色
			
 
				+      - 头发：自然黑色
			
 
				+      - 服装色彩：
			
 
				+        * 左一：米色或卡其色外套，绿色裤子
			
 
				+        * 左二：浅灰色工作服
			
 
				+        * 左三：深蓝色毛衣
			
 
				+        * 右一：深棕色或深绿色外套
			
 
				+      - 教室环境：
			
 
				+        * 黑板：深绿色，略有粉笔痕迹
			
 
				+        * 课桌：深棕色木质纹理
			
 
				+        * 整体光线：自然的教室照明
			
 
				+      
			
 
				+      **技术要求**：
			
 
				+      - 保持人物的相对位置和姿态不变
			
 
				+      - 确保新背景与人物的光影一致
			
 
				+      - 维持照片的复古质感和年代感
			
 
				+      - 背景过渡要自然，避免生硬的拼接感
			
 
				+      
			
 
				+      请生成修复后的照片。
			
 
				+
			
 
				+  photo_restore_advanced:
			
 
				+    name: "高级照片修复"
			
 
				+    template: |
			
 
				+      作为专业的照片修复专家，请对这张珍贵的老照片进行全面修复：
			
 
				+      
			
 
				+      **第一步：损伤修复**
			
 
				+      - 识别并修复所有可见的折痕、裂痕、污渍
			
 
				+      - 去除表面划痕和磨损痕迹
			
 
				+      - 修复边缘破损和不平整部分
			
 
				+      - 消除照片上的灰尘和水渍
			
 
				+      
			
 
				+      **第二步：画质增强**
			
 
				+      - 提升整体清晰度和锐度
			
 
				+      - 增强对比度和层次感
			
 
				+      - 降噪处理，保持细节的同时减少颗粒感
			
 
				+      - 色温校正，消除黄褐色偏色
			
 
				+      
			
 
				+      **第三步：彩色化处理**
			
 
				+      人物特征（从左到右）：
			
 
				+      1. 第一人：健康肤色，黑色头发，米色夹克，绿色裤子
			
 
				+      2. 第二人：健康肤色，黑色头发，眼镜，浅色工作服
			
 
				+      3. 第三人：健康肤色，黑色头发，深色毛衣
			
 
				+      4. 第四人：健康肤色，黑色头发，深色外套
			
 
				+      
			
 
				+      **第四步：背景重构**
			
 
				+      - 移除原有宿舍背景
			
 
				+      - 替换为1980年代大学教室场景：
			
 
				+        * 后方：标准教室黑板（深绿色，有轻微粉笔使用痕迹）
			
 
				+        * 前方：传统木质课桌（深棕色，简约设计）
			
 
				+        * 侧面：教室墙面（浅色，符合当时建筑风格）
			
 
				+        * 照明：自然的教室光线，柔和均匀
			
 
				+      
			
 
				+      **质量标准**：
			
 
				+      - 人物面部特征100%保持原貌
			
 
				+      - 新背景与人物光影完美融合
			
 
				+      - 色彩自然协调，符合年代特征
			
 
				+      - 整体画面清晰，细节丰富
			
 
				+      - 保持照片的历史真实感
			
 
				+      
			
 
				+      请生成高质量的修复照片。
			
 
				+
			
 
				+  photo_colorize_classroom:
			
 
				+    name: "照片上色（教室版）"
			
 
				+    template: |
			
 
				+      请为这张黑白老照片进行专业的彩色化处理，并调整背景：
			
 
				+      
			
 
				+      **彩色化标准**：
			
 
				+      严格按照1980-1990年代中国大学生的真实色彩进行上色：
			
 
				+      
			
 
				+      人物色彩：
			
 
				+      - 肤色：自然健康的东亚人肤色，偏暖但不过分红润
			
 
				+      - 头发：统一为自然黑色，略带光泽
			
 
				+      - 眼镜（第二人）：深色镜框，透明镜片
			
 
				+      
			
 
				+      服装色彩：
			
 
				+      - 左一：浅驼色或米色夹克，深绿色裤子
			
 
				+      - 左二：浅灰蓝色工装，内搭白色或米色衬衫
			
 
				+      - 左三：深蓝色或藏青色毛衣，下身深色裤子
			
 
				+      - 右一：深棕色或军绿色外套
			
 
				+      
			
 
				+      **背景改造**：
			
 
				+      原背景（宿舍）→ 新背景（教室）
			
 
				+      - 后墙：标准教室黑板，深绿色，表面有自然的使用痕迹
			
 
				+      - 前景：木质课桌，深棕色，简洁的1980年代设计风格
			
 
				+      - 环境光：教室的自然照明，明亮但柔和
			
 
				+      - 整体氛围：营造温馨的校园学习环境
			
 
				+      
			
 
				+      **技术要求**：
			
 
				+      1. 保持原有构图和人物姿态
			
 
				+      2. 确保色彩过渡自然，无明显色块
			
 
				+      3. 背景替换要无缝融合
			
 
				+      4. 保持照片的年代质感
			
 
				+      5. 色彩饱和度适中，避免过于鲜艳
			
 
				+      
			
 
				+      请生成彩色化且背景更新的照片。
			
 
				+
			
 
				+  simple_photo_fix:
			
 
				+    name: "简单照片修复"
			
 
				+    template: |
			
 
				+      请帮我修复这张老照片：
			
 
				+      
			
 
				+      1. 去除所有折痕、划痕、污渍
			
 
				+      2. 提升清晰度和对比度
			
 
				+      3. 进行彩色化处理（自然色彩）
			
 
				+      4. 将背景改为教室场景：后面是黑板，前面是课桌
			
 
				+      5. 保持人物面貌不变，只做修复和美化
			
 
				+      6. 整体风格要符合1980年代大学生活
			
 
				+      
			
 
				+      请生成修复后的彩色照片。
			
 
				+
			
 
				+# 默认配置
			
 
				+default:
			
 
				+  model: "qwen2_vl"
			
 
				+  prompt: "photo_analysis"
			
 
				+  normalize_numbers: false
			
--- a/image_edit/fix_photo.py
+++ b/image_edit/fix_photo.py
@@ -1,220 +0,0 @@
 
				-import os
			
 
				-import requests
			
 
				-import base64
			
 
				-import json
			
 
				-import time
			
 
				-from pathlib import Path
			
 
				-
			
 
				-def fix_old_photo(image_path, output_path="fixed_photo.png"):
			
 
				-    """
			
 
				-    使用Gemini API修复老照片
			
 
				-    
			
 
				-    Args:
			
 
				-        image_path: 输入图片路径
			
 
				-        output_path: 输出图片路径
			
 
				-    """
			
 
				-    # 从环境变量获取API密钥
			
 
				-    api_key = os.getenv("GEMINI_API_KEY")
			
 
				-    if not api_key:
			
 
				-        raise ValueError("未找到GEMINI_API_KEY环境变量")
			
 
				-    
			
 
				-    # 读取图片文件并转换为base64
			
 
				-    try:
			
 
				-        with open(image_path, "rb") as image_file:
			
 
				-            image_data = base64.b64encode(image_file.read()).decode('utf-8')
			
 
				-    except FileNotFoundError:
			
 
				-        raise FileNotFoundError(f"找不到图片文件: {image_path}")
			
 
				-    
			
 
				-    # 获取图片的MIME类型
			
 
				-    file_extension = Path(image_path).suffix.lower()
			
 
				-    mime_type_map = {
			
 
				-        '.jpg': 'image/jpeg',
			
 
				-        '.jpeg': 'image/jpeg',
			
 
				-        '.png': 'image/png',
			
 
				-        '.gif': 'image/gif',
			
 
				-        '.webp': 'image/webp'
			
 
				-    }
			
 
				-    mime_type = mime_type_map.get(file_extension, 'image/jpeg')
			
 
				-    
			
 
				-    # 修复提示词
			
 
				-    prompt = """修复这张受损的老照片，去除所有折痕、裂痕、污渍和划痕，补全缺失的细节，提升清晰度，让画面自然完整。人物的面貌务必保持不变，在修复的基础上进行彩色化处理：人物的肤色柔和自然，毛衣呈现温暖的深色调，肤色健康自然，头发为黑色，衣服保持复古质感；去掉人物手里的烟；背景调整为教室，后面是黑板，前面是课桌"""
			
 
				-    
			
 
				-    # 构建请求数据
			
 
				-    request_data = {
			
 
				-        "contents": [{
			
 
				-            "parts": [
			
 
				-                {"text": prompt},
			
 
				-                {
			
 
				-                    "inline_data": {
			
 
				-                        "mime_type": mime_type,
			
 
				-                        "data": image_data
			
 
				-                    }
			
 
				-                }
			
 
				-            ]
			
 
				-        }],
			
 
				-        "generationConfig": {
			
 
				-            "temperature": 0.7,
			
 
				-            "candidateCount": 1,
			
 
				-            "maxOutputTokens": 2048,
			
 
				-        }
			
 
				-    }
			
 
				-    
			
 
				-    # 尝试不同的模型，避免图像生成模型（它们有更严格的配额限制）
			
 
				-    models = [
			
 
				-        "gemini-1.5-flash-latest",
			
 
				-        "gemini-1.5-flash", 
			
 
				-        "gemini-1.5-pro-latest",
			
 
				-        "gemini-1.5-pro"
			
 
				-    ]
			
 
				-    
			
 
				-    headers = {
			
 
				-        "x-goog-api-key": api_key,
			
 
				-        "Content-Type": "application/json"
			
 
				-    }
			
 
				-    
			
 
				-    for model in models:
			
 
				-        url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
			
 
				-        
			
 
				-        try:
			
 
				-            print(f"正在尝试模型: {model}")
			
 
				-            response = requests.post(url, headers=headers, json=request_data, timeout=120)
			
 
				-            
			
 
				-            # 处理429错误（配额超限）
			
 
				-            if response.status_code == 429:
			
 
				-                error_data = response.json()
			
 
				-                print(f"模型 {model} 配额超限，错误信息:")
			
 
				-                print(json.dumps(error_data, indent=2, ensure_ascii=False))
			
 
				-                
			
 
				-                # 检查是否有重试建议
			
 
				-                if "error" in error_data and "details" in error_data["error"]:
			
 
				-                    for detail in error_data["error"]["details"]:
			
 
				-                        if detail.get("@type") == "type.googleapis.com/google.rpc.RetryInfo":
			
 
				-                            retry_delay = detail.get("retryDelay", "30s")
			
 
				-                            print(f"建议等待 {retry_delay} 后重试")
			
 
				-                
			
 
				-                continue
			
 
				-            
			
 
				-            # 处理其他HTTP错误
			
 
				-            if response.status_code != 200:
			
 
				-                print(f"HTTP状态码: {response.status_code}")
			
 
				-                print(f"响应内容: {response.text}")
			
 
				-                continue
			
 
				-            
			
 
				-            # 解析响应
			
 
				-            result = response.json()
			
 
				-            print("API响应成功!")
			
 
				-            
			
 
				-            # 提取文本内容（分析结果）
			
 
				-            if "candidates" in result and len(result["candidates"]) > 0:
			
 
				-                candidate = result["candidates"][0]
			
 
				-                if "content" in candidate and "parts" in candidate["content"]:
			
 
				-                    for part in candidate["content"]["parts"]:
			
 
				-                        if "text" in part:
			
 
				-                            analysis_text = part["text"]
			
 
				-                            print("\n=== 照片分析结果 ===")
			
 
				-                            print(analysis_text)
			
 
				-                            
			
 
				-                            # 保存分析结果到文件
			
 
				-                            analysis_file = output_path.replace('.png', '_analysis.txt')
			
 
				-                            with open(analysis_file, 'w', encoding='utf-8') as f:
			
 
				-                                f.write(analysis_text)
			
 
				-                            print(f"\n分析结果已保存到: {analysis_file}")
			
 
				-                            return True
			
 
				-            
			
 
				-            print("未找到有效的分析内容")
			
 
				-            return False
			
 
				-            
			
 
				-        except requests.exceptions.RequestException as e:
			
 
				-            print(f"模型 {model} 请求错误: {e}")
			
 
				-            continue
			
 
				-        except json.JSONDecodeError as e:
			
 
				-            print(f"模型 {model} JSON解析错误: {e}")
			
 
				-            continue
			
 
				-        except Exception as e:
			
 
				-            print(f"模型 {model} 未知错误: {e}")
			
 
				-            continue
			
 
				-    
			
 
				-    print("所有模型都尝试失败")
			
 
				-    return False
			
 
				-
			
 
				-def check_api_status():
			
 
				-    """
			
 
				-    检查API状态和配额
			
 
				-    """
			
 
				-    api_key = os.getenv("GEMINI_API_KEY")
			
 
				-    if not api_key:
			
 
				-        print("未找到GEMINI_API_KEY环境变量")
			
 
				-        return False
			
 
				-    
			
 
				-    # 简单的测试请求
			
 
				-    test_data = {
			
 
				-        "contents": [{
			
 
				-            "parts": [{"text": "Hello, how are you?"}]
			
 
				-        }]
			
 
				-    }
			
 
				-    
			
 
				-    url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent"
			
 
				-    headers = {
			
 
				-        "x-goog-api-key": api_key,
			
 
				-        "Content-Type": "application/json"
			
 
				-    }
			
 
				-    
			
 
				-    try:
			
 
				-        print("检查API状态...")
			
 
				-        response = requests.post(url, headers=headers, json=test_data, timeout=30)
			
 
				-        
			
 
				-        if response.status_code == 200:
			
 
				-            print("✅ API工作正常")
			
 
				-            return True
			
 
				-        elif response.status_code == 429:
			
 
				-            print("⚠️ API配额超限")
			
 
				-            error_data = response.json()
			
 
				-            if "error" in error_data:
			
 
				-                print(f"错误信息: {error_data['error']['message']}")
			
 
				-            return False
			
 
				-        else:
			
 
				-            print(f"❌ API错误，状态码: {response.status_code}")
			
 
				-            print(f"响应: {response.text}")
			
 
				-            return False
			
 
				-            
			
 
				-    except Exception as e:
			
 
				-        print(f"❌ API测试失败: {e}")
			
 
				-        return False
			
 
				-
			
 
				-def main():
			
 
				-    """主函数"""
			
 
				-    print("=== Gemini 照片分析工具 ===\n")
			
 
				-    
			
 
				-    # 检查API状态
			
 
				-    if not check_api_status():
			
 
				-        print("\n由于API配额限制，当前无法使用图像生成功能。")
			
 
				-        print("建议:")
			
 
				-        print("1. 等待一段时间后重试（通常几分钟到几小时）")
			
 
				-        print("2. 升级到付费版本获得更高的配额")
			
 
				-        print("3. 使用其他AI图像修复工具")
			
 
				-        return
			
 
				-    
			
 
				-    # 输入图片路径
			
 
				-    input_image = "./工大照片-1.jpg"
			
 
				-    output_analysis = "photo_analysis.txt"
			
 
				-    
			
 
				-    # 检查输入文件是否存在
			
 
				-    if not os.path.exists(input_image):
			
 
				-        print(f"错误: 找不到输入图片文件 {input_image}")
			
 
				-        print("请将您的老照片放在当前目录下，或修改input_image变量指向正确的路径")
			
 
				-        return
			
 
				-    
			
 
				-    print(f"正在分析图片: {input_image}")
			
 
				-    
			
 
				-    # 分析照片
			
 
				-    success = fix_old_photo(input_image, output_analysis)
			
 
				-    
			
 
				-    if success:
			
 
				-        print("\n✅ 照片分析完成!")
			
 
				-        print("由于API配额限制，当前只能提供分析报告。")
			
 
				-        print("您可以根据分析结果使用其他工具进行照片修复。")
			
 
				-    else:
			
 
				-        print("\n❌ 照片分析失败")
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    main()
			
--- a/image_edit/local_vlm_processor.py
+++ b/image_edit/local_vlm_processor.py
@@ -0,0 +1,598 @@
 
				+import os
			
 
				+import re
			
 
				+import yaml
			
 
				+import base64
			
 
				+import json
			
 
				+import time
			
 
				+import argparse
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, Any, Optional
			
 
				+from openai import OpenAI
			
 
				+from dotenv import load_dotenv
			
 
				+
			
 
				+# 加载环境变量
			
 
				+load_dotenv(override=True)
			
 
				+
			
 
				+class LocalVLMProcessor:
			
 
				+    def __init__(self, config_path: str = "config.yaml"):
			
 
				+        """
			
 
				+        初始化本地VLM处理器
			
 
				+        
			
 
				+        Args:
			
 
				+            config_path: 配置文件路径
			
 
				+        """
			
 
				+        self.config_path = Path(config_path)
			
 
				+        self.config = self._load_config()
			
 
				+        
			
 
				+    def _load_config(self) -> Dict[str, Any]:
			
 
				+        """加载配置文件"""
			
 
				+        if not self.config_path.exists():
			
 
				+            raise FileNotFoundError(f"配置文件不存在: {self.config_path}")
			
 
				+        
			
 
				+        with open(self.config_path, 'r', encoding='utf-8') as f:
			
 
				+            config = yaml.safe_load(f)
			
 
				+        
			
 
				+        return config
			
 
				+    
			
 
				+    def _resolve_env_variable(self, value: str) -> str:
			
 
				+        """
			
 
				+        解析环境变量：将 ${VAR_NAME} 格式替换为实际的环境变量值
			
 
				+        
			
 
				+        Args:
			
 
				+            value: 可能包含环境变量的字符串
			
 
				+            
			
 
				+        Returns:
			
 
				+            解析后的字符串
			
 
				+        """
			
 
				+        if not isinstance(value, str):
			
 
				+            return value
			
 
				+            
			
 
				+        # 匹配 ${VAR_NAME} 格式的环境变量
			
 
				+        pattern = r'\$\{([^}]+)\}'
			
 
				+        
			
 
				+        def replace_env_var(match):
			
 
				+            env_var_name = match.group(1)
			
 
				+            env_value = os.getenv(env_var_name)
			
 
				+            if env_value is None:
			
 
				+                print(f"⚠️ 警告: 环境变量 {env_var_name} 未设置，使用原值")
			
 
				+                return match.group(0)
			
 
				+            return env_value
			
 
				+        
			
 
				+        return re.sub(pattern, replace_env_var, value)
			
 
				+    
			
 
				+    def _is_image_generation_prompt(self, prompt_name: str) -> bool:
			
 
				+        """
			
 
				+        判断是否为图片生成相关的提示词
			
 
				+        
			
 
				+        Args:
			
 
				+            prompt_name: 提示词名称
			
 
				+            
			
 
				+        Returns:
			
 
				+            True if 是图片生成任务
			
 
				+        """
			
 
				+        image_generation_prompts = [
			
 
				+            'photo_restore_classroom',
			
 
				+            'photo_restore_advanced', 
			
 
				+            'photo_colorize_classroom',
			
 
				+            'simple_photo_fix'
			
 
				+        ]
			
 
				+        return prompt_name in image_generation_prompts
			
 
				+    
			
 
				+    def _extract_base64_image(self, response_text: str) -> Optional[str]:
			
 
				+        """
			
 
				+        从响应文本中提取base64编码的图片
			
 
				+        
			
 
				+        Args:
			
 
				+            response_text: API响应文本
			
 
				+            
			
 
				+        Returns:
			
 
				+            base64编码的图片数据，如果没找到返回None
			
 
				+        """
			
 
				+        # 常见的base64图片数据格式
			
 
				+        patterns = [
			
 
				+            r'data:image/[^;]+;base64,([A-Za-z0-9+/=]+)',  # data URL格式
			
 
				+            r'base64:([A-Za-z0-9+/=]{100,})',              # base64:前缀
			
 
				+            r'```base64\s*\n([A-Za-z0-9+/=\s]+)\n```',     # markdown代码块
			
 
				+            r'<img[^>]*src="data:image/[^;]+;base64,([A-Za-z0-9+/=]+)"[^>]*>',  # HTML img标签
			
 
				+        ]
			
 
				+        
			
 
				+        for pattern in patterns:
			
 
				+            match = re.search(pattern, response_text, re.MULTILINE | re.DOTALL)
			
 
				+            if match:
			
 
				+                base64_data = match.group(1).replace('\n', '').replace(' ', '')
			
 
				+                if len(base64_data) > 1000:  # 合理的图片大小
			
 
				+                    return base64_data
			
 
				+        
			
 
				+        return None
			
 
				+    
			
 
				+    def list_models(self) -> None:
			
 
				+        """列出所有可用的模型"""
			
 
				+        print("📋 可用模型列表:")
			
 
				+        for model_key, model_config in self.config['models'].items():
			
 
				+            resolved_api_key = self._resolve_env_variable(model_config['api_key'])
			
 
				+            api_key_status = "✅ 已配置" if resolved_api_key else "❌ 未配置"
			
 
				+            
			
 
				+            print(f"  🤖 {model_key}: {model_config['name']}")
			
 
				+            print(f"      API地址: {model_config['api_base']}")
			
 
				+            print(f"      模型ID: {model_config['model_id']}")
			
 
				+            print(f"      API密钥: {api_key_status}")
			
 
				+            print()
			
 
				+    
			
 
				+    def list_prompts(self) -> None:
			
 
				+        """列出所有可用的提示词模板"""
			
 
				+        print("📝 可用提示词模板:")
			
 
				+        for prompt_key, prompt_config in self.config['prompts'].items():
			
 
				+            is_image_gen = self._is_image_generation_prompt(prompt_key)
			
 
				+            task_type = "🖼️ 图片生成" if is_image_gen else "📝 文本生成"
			
 
				+            
			
 
				+            print(f"  💬 {prompt_key}: {prompt_config['name']} ({task_type})")
			
 
				+            # 显示模板的前100个字符
			
 
				+            template_preview = prompt_config['template'][:100].replace('\n', ' ')
			
 
				+            print(f"      预览: {template_preview}...")
			
 
				+            print()
			
 
				+    
			
 
				+    def get_model_config(self, model_name: str) -> Dict[str, Any]:
			
 
				+        """获取模型配置"""
			
 
				+        if model_name not in self.config['models']:
			
 
				+            raise ValueError(f"未找到模型配置: {model_name}，可用模型: {list(self.config['models'].keys())}")
			
 
				+        
			
 
				+        model_config = self.config['models'][model_name].copy()
			
 
				+        
			
 
				+        # 解析环境变量
			
 
				+        model_config['api_key'] = self._resolve_env_variable(model_config['api_key'])
			
 
				+        
			
 
				+        return model_config
			
 
				+    
			
 
				+    def get_prompt_template(self, prompt_name: str) -> str:
			
 
				+        """获取提示词模板"""
			
 
				+        if prompt_name not in self.config['prompts']:
			
 
				+            raise ValueError(f"未找到提示词模板: {prompt_name}，可用模板: {list(self.config['prompts'].keys())}")
			
 
				+        
			
 
				+        return self.config['prompts'][prompt_name]['template']
			
 
				+    
			
 
				+    def normalize_financial_numbers(self, text: str) -> str:
			
 
				+        """
			
 
				+        标准化财务数字：将全角字符转换为半角字符
			
 
				+        """
			
 
				+        if not text:
			
 
				+            return text
			
 
				+        
			
 
				+        # 定义全角到半角的映射
			
 
				+        fullwidth_to_halfwidth = {
			
 
				+            '０': '0', '１': '1', '２': '2', '３': '3', '４': '4',
			
 
				+            '５': '5', '６': '6', '７': '7', '８': '8', '９': '9',
			
 
				+            '，': ',', '。': '.', '．': '.', '：': ':',
			
 
				+            '；': ';', '（': '(', '）': ')', '－': '-',
			
 
				+            '＋': '+', '％': '%',
			
 
				+        }
			
 
				+        
			
 
				+        # 执行字符替换
			
 
				+        normalized_text = text
			
 
				+        for fullwidth, halfwidth in fullwidth_to_halfwidth.items():
			
 
				+            normalized_text = normalized_text.replace(fullwidth, halfwidth)
			
 
				+        
			
 
				+        return normalized_text
			
 
				+    
			
 
				+    def process_image(self, 
			
 
				+                     image_path: str,
			
 
				+                     model_name: Optional[str] = None,
			
 
				+                     prompt_name: Optional[str] = None,
			
 
				+                     output_dir: str = "./output",
			
 
				+                     temperature: Optional[float] = None,
			
 
				+                     max_tokens: Optional[int] = None,
			
 
				+                     timeout: Optional[int] = None,
			
 
				+                     normalize_numbers: Optional[bool] = None,
			
 
				+                     custom_prompt: Optional[str] = None) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        处理单张图片
			
 
				+        
			
 
				+        Args:
			
 
				+            image_path: 图片路径
			
 
				+            model_name: 模型名称
			
 
				+            prompt_name: 提示词模板名称
			
 
				+            output_dir: 输出目录
			
 
				+            temperature: 生成温度
			
 
				+            max_tokens: 最大token数
			
 
				+            timeout: 超时时间
			
 
				+            normalize_numbers: 是否标准化数字
			
 
				+            custom_prompt: 自定义提示词（优先级高于prompt_name）
			
 
				+        
			
 
				+        Returns:
			
 
				+            处理结果字典
			
 
				+        """
			
 
				+        # 使用默认值或配置值
			
 
				+        model_name = model_name or self.config['default']['model']
			
 
				+        prompt_name = prompt_name or self.config['default']['prompt']
			
 
				+        
			
 
				+        # 判断是否为图片生成任务
			
 
				+        is_image_generation = custom_prompt is None and self._is_image_generation_prompt(prompt_name)
			
 
				+        
			
 
				+        # 图片生成任务默认不进行数字标准化
			
 
				+        if is_image_generation:
			
 
				+            normalize_numbers = False
			
 
				+            print(f"🖼️ 检测到图片生成任务，自动禁用数字标准化")
			
 
				+        else:
			
 
				+            normalize_numbers = normalize_numbers if normalize_numbers is not None else self.config['default']['normalize_numbers']
			
 
				+        
			
 
				+        # 获取模型配置
			
 
				+        model_config = self.get_model_config(model_name)
			
 
				+        
			
 
				+        # 设置参数，优先使用传入的参数
			
 
				+        temperature = temperature if temperature is not None else model_config['default_params']['temperature']
			
 
				+        max_tokens = max_tokens if max_tokens is not None else model_config['default_params']['max_tokens']
			
 
				+        timeout = timeout if timeout is not None else model_config['default_params']['timeout']
			
 
				+        
			
 
				+        # 获取提示词
			
 
				+        if custom_prompt:
			
 
				+            prompt = custom_prompt
			
 
				+            print(f"🎯 使用自定义提示词")
			
 
				+        else:
			
 
				+            prompt = self.get_prompt_template(prompt_name)
			
 
				+            task_type = "图片生成" if is_image_generation else "文本分析"
			
 
				+            print(f"🎯 使用提示词模板: {prompt_name} ({task_type})")
			
 
				+        
			
 
				+        # 读取图片文件并转换为base64
			
 
				+        if not Path(image_path).exists():
			
 
				+            raise FileNotFoundError(f"找不到图片文件: {image_path}")
			
 
				+        
			
 
				+        with open(image_path, "rb") as image_file:
			
 
				+            image_data = base64.b64encode(image_file.read()).decode('utf-8')
			
 
				+        
			
 
				+        # 获取图片的MIME类型
			
 
				+        file_extension = Path(image_path).suffix.lower()
			
 
				+        mime_type_map = {
			
 
				+            '.jpg': 'image/jpeg',
			
 
				+            '.jpeg': 'image/jpeg',
			
 
				+            '.png': 'image/png',
			
 
				+            '.gif': 'image/gif',
			
 
				+            '.webp': 'image/webp'
			
 
				+        }
			
 
				+        mime_type = mime_type_map.get(file_extension, 'image/jpeg')
			
 
				+        
			
 
				+        # 创建OpenAI客户端
			
 
				+        client = OpenAI(
			
 
				+            api_key=model_config['api_key'] or "dummy-key",
			
 
				+            base_url=model_config['api_base']
			
 
				+        )
			
 
				+        
			
 
				+        # 构建消息
			
 
				+        messages = [
			
 
				+            {
			
 
				+                "role": "user",
			
 
				+                "content": [
			
 
				+                    {
			
 
				+                        "type": "text",
			
 
				+                        "text": prompt
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "type": "image_url",
			
 
				+                        "image_url": {
			
 
				+                            "url": f"data:{mime_type};base64,{image_data}"
			
 
				+                        }
			
 
				+                    }
			
 
				+                ]
			
 
				+            }
			
 
				+        ]
			
 
				+        
			
 
				+        # 显示处理信息
			
 
				+        print(f"\n🚀 开始处理图片: {Path(image_path).name}")
			
 
				+        print(f"🤖 使用模型: {model_config['name']} ({model_name})")
			
 
				+        print(f"🌐 API地址: {model_config['api_base']}")
			
 
				+        print(f"🔧 参数配置:")
			
 
				+        print(f"   - 温度: {temperature}")
			
 
				+        print(f"   - 最大Token: {max_tokens}")
			
 
				+        print(f"   - 超时时间: {timeout}秒")
			
 
				+        print(f"   - 数字标准化: {'启用' if normalize_numbers else '禁用'}")
			
 
				+        print(f"   - 任务类型: {'图片生成' if is_image_generation else '文本分析'}")
			
 
				+        
			
 
				+        try:
			
 
				+            # 调用API
			
 
				+            response = client.chat.completions.create(
			
 
				+                model=model_config['model_id'],
			
 
				+                messages=messages,
			
 
				+                temperature=temperature,
			
 
				+                max_tokens=max_tokens,
			
 
				+                timeout=timeout
			
 
				+            )
			
 
				+            
			
 
				+            # 提取响应内容
			
 
				+            generated_text = response.choices[0].message.content
			
 
				+            
			
 
				+            if not generated_text:
			
 
				+                raise Exception("模型没有生成内容")
			
 
				+            
			
 
				+            # 处理图片生成结果
			
 
				+            if is_image_generation:
			
 
				+                # 尝试提取base64图片数据
			
 
				+                base64_image = self._extract_base64_image(generated_text)
			
 
				+                if base64_image:
			
 
				+                    print("🖼️ 检测到生成的图片数据")
			
 
				+                    return self._save_image_results(
			
 
				+                        image_path=image_path,
			
 
				+                        output_dir=output_dir,
			
 
				+                        generated_text=generated_text,
			
 
				+                        base64_image=base64_image,
			
 
				+                        model_name=model_name,
			
 
				+                        prompt_name=prompt_name,
			
 
				+                        model_config=model_config,
			
 
				+                        processing_params={
			
 
				+                            'temperature': temperature,
			
 
				+                            'max_tokens': max_tokens,
			
 
				+                            'timeout': timeout,
			
 
				+                            'normalize_numbers': normalize_numbers,
			
 
				+                            'custom_prompt_used': custom_prompt is not None,
			
 
				+                            'is_image_generation': True
			
 
				+                        }
			
 
				+                    )
			
 
				+                else:
			
 
				+                    print("⚠️ 未检测到图片数据，保存为文本结果")
			
 
				+            
			
 
				+            # 标准化数字格式（如果启用）
			
 
				+            original_text = generated_text
			
 
				+            if normalize_numbers:
			
 
				+                print("🔧 正在标准化数字格式...")
			
 
				+                generated_text = self.normalize_financial_numbers(generated_text)
			
 
				+                
			
 
				+                # 统计标准化的变化
			
 
				+                changes_count = len([1 for o, n in zip(original_text, generated_text) if o != n])
			
 
				+                if changes_count > 0:
			
 
				+                    print(f"✅ 已标准化 {changes_count} 个字符（全角→半角）")
			
 
				+                else:
			
 
				+                    print("ℹ️ 无需标准化（已是标准格式）")
			
 
				+            
			
 
				+            print(f"✅ 成功完成处理!")
			
 
				+            
			
 
				+            # 保存文本结果
			
 
				+            return self._save_text_results(
			
 
				+                image_path=image_path,
			
 
				+                output_dir=output_dir,
			
 
				+                generated_text=generated_text,
			
 
				+                original_text=original_text,
			
 
				+                model_name=model_name,
			
 
				+                prompt_name=prompt_name,
			
 
				+                model_config=model_config,
			
 
				+                processing_params={
			
 
				+                    'temperature': temperature,
			
 
				+                    'max_tokens': max_tokens,
			
 
				+                    'timeout': timeout,
			
 
				+                    'normalize_numbers': normalize_numbers,
			
 
				+                    'custom_prompt_used': custom_prompt is not None,
			
 
				+                    'is_image_generation': is_image_generation
			
 
				+                }
			
 
				+            )
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            print(f"❌ 处理失败: {e}")
			
 
				+            raise
			
 
				+    
			
 
				+    def _save_image_results(self, 
			
 
				+                           image_path: str,
			
 
				+                           output_dir: str,
			
 
				+                           generated_text: str,
			
 
				+                           base64_image: str,
			
 
				+                           model_name: str,
			
 
				+                           prompt_name: str,
			
 
				+                           model_config: Dict[str, Any],
			
 
				+                           processing_params: Dict[str, Any]) -> Dict[str, Any]:
			
 
				+        """保存图片生成结果"""
			
 
				+        # 创建输出目录
			
 
				+        output_path = Path(output_dir)
			
 
				+        output_path.mkdir(parents=True, exist_ok=True)
			
 
				+        
			
 
				+        # 生成输出文件名
			
 
				+        base_name = Path(image_path).stem
			
 
				+        timestamp = time.strftime("%Y%m%d_%H%M%S")
			
 
				+        
			
 
				+        # 保存生成的图片
			
 
				+        try:
			
 
				+            image_bytes = base64.b64decode(base64_image)
			
 
				+            image_file = output_path / f"{base_name}_{model_name}_{prompt_name}_{timestamp}.png"
			
 
				+            
			
 
				+            with open(image_file, 'wb') as f:
			
 
				+                f.write(image_bytes)
			
 
				+            print(f"🖼️ 生成的图片已保存到: {image_file}")
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            print(f"❌ 图片保存失败: {e}")
			
 
				+            # 如果图片保存失败，保存为文本
			
 
				+            text_file = output_path / f"{base_name}_{model_name}_{prompt_name}_{timestamp}.txt"
			
 
				+            with open(text_file, 'w', encoding='utf-8') as f:
			
 
				+                f.write(generated_text)
			
 
				+            print(f"📄 响应内容已保存为文本: {text_file}")
			
 
				+            image_file = text_file
			
 
				+        
			
 
				+        # 保存原始响应文本（包含可能的说明文字）
			
 
				+        if len(generated_text.strip()) > len(base64_image) + 100:  # 如果有额外的说明文字
			
 
				+            description_file = output_path / f"{base_name}_{model_name}_{prompt_name}_{timestamp}_description.txt"
			
 
				+            with open(description_file, 'w', encoding='utf-8') as f:
			
 
				+                f.write(generated_text)
			
 
				+            print(f"📝 响应说明已保存到: {description_file}")
			
 
				+        
			
 
				+        # 保存元数据
			
 
				+        metadata = {
			
 
				+            "processing_info": {
			
 
				+                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
			
 
				+                "image_path": Path(image_path).resolve().as_posix(),
			
 
				+                "output_file": image_file.resolve().as_posix(),
			
 
				+                "model_used": model_name,
			
 
				+                "model_config": model_config,
			
 
				+                "prompt_template": prompt_name,
			
 
				+                "processing_params": processing_params,
			
 
				+                "result_type": "image",
			
 
				+                "text_stats": {
			
 
				+                    "response_length": len(generated_text),
			
 
				+                    "has_image_data": True,
			
 
				+                    "base64_length": len(base64_image)
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+        metadata_file = output_path / f"{base_name}_{model_name}_{prompt_name}_{timestamp}_metadata.json"
			
 
				+        with open(metadata_file, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(metadata, f, ensure_ascii=False, indent=2)
			
 
				+        print(f"📊 元数据已保存到: {metadata_file}")
			
 
				+        
			
 
				+        return metadata
			
 
				+    
			
 
				+    def _save_text_results(self, 
			
 
				+                          image_path: str,
			
 
				+                          output_dir: str,
			
 
				+                          generated_text: str,
			
 
				+                          original_text: str,
			
 
				+                          model_name: str,
			
 
				+                          prompt_name: str,
			
 
				+                          model_config: Dict[str, Any],
			
 
				+                          processing_params: Dict[str, Any]) -> Dict[str, Any]:
			
 
				+        """保存文本结果"""
			
 
				+        # 创建输出目录
			
 
				+        output_path = Path(output_dir)
			
 
				+        output_path.mkdir(parents=True, exist_ok=True)
			
 
				+        
			
 
				+        # 生成输出文件名
			
 
				+        base_name = Path(image_path).stem
			
 
				+        
			
 
				+        # 保存主结果文件
			
 
				+        if prompt_name in ['ocr_standard', 'table_extract']:
			
 
				+            # OCR相关任务保存为Markdown格式
			
 
				+            result_file = output_path / f"{base_name}_{model_name}.md"
			
 
				+            with open(result_file, 'w', encoding='utf-8') as f:
			
 
				+                f.write(generated_text)
			
 
				+            print(f"📄 结果已保存到: {result_file}")
			
 
				+        else:
			
 
				+            # 其他任务保存为文本格式
			
 
				+            result_file = output_path / f"{base_name}_{model_name}_{prompt_name}.txt"
			
 
				+            with open(result_file, 'w', encoding='utf-8') as f:
			
 
				+                f.write(generated_text)
			
 
				+            print(f"📄 结果已保存到: {result_file}")
			
 
				+        
			
 
				+        # 如果进行了数字标准化，保存原始版本
			
 
				+        if processing_params['normalize_numbers'] and original_text != generated_text:
			
 
				+            original_file = output_path / f"{base_name}_{model_name}_original.txt"
			
 
				+            with open(original_file, 'w', encoding='utf-8') as f:
			
 
				+                f.write(original_text)
			
 
				+            print(f"📄 原始结果已保存到: {original_file}")
			
 
				+        
			
 
				+        # 保存元数据
			
 
				+        metadata = {
			
 
				+            "processing_info": {
			
 
				+                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
			
 
				+                "image_path": Path(image_path).resolve().as_posix(),
			
 
				+                "output_file": result_file.resolve().as_posix(),
			
 
				+                "model_used": model_name,
			
 
				+                "model_config": model_config,
			
 
				+                "prompt_template": prompt_name,
			
 
				+                "processing_params": processing_params,
			
 
				+                "result_type": "text",
			
 
				+                "text_stats": {
			
 
				+                    "original_length": len(original_text),
			
 
				+                    "final_length": len(generated_text),
			
 
				+                    "character_changes": len([1 for o, n in zip(original_text, generated_text) if o != n]) if processing_params['normalize_numbers'] else 0
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+        metadata_file = output_path / f"{base_name}_{model_name}_metadata.json"
			
 
				+        with open(metadata_file, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(metadata, f, ensure_ascii=False, indent=2)
			
 
				+        print(f"📊 元数据已保存到: {metadata_file}")
			
 
				+        
			
 
				+        return metadata
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    parser = argparse.ArgumentParser(description='本地VLM图片处理工具')
			
 
				+    
			
 
				+    # 基本参数
			
 
				+    parser.add_argument('image_path', nargs='?', help='图片文件路径')
			
 
				+    parser.add_argument('-c', '--config', default='config.yaml', help='配置文件路径')
			
 
				+    parser.add_argument('-o', '--output', default='./output', help='输出目录')
			
 
				+    
			
 
				+    # 模型和提示词选择
			
 
				+    parser.add_argument('-m', '--model', help='模型名称')
			
 
				+    parser.add_argument('-p', '--prompt', help='提示词模板名称')
			
 
				+    parser.add_argument('--custom-prompt', help='自定义提示词（优先级高于-p参数）')
			
 
				+    
			
 
				+    # 处理参数
			
 
				+    parser.add_argument('-t', '--temperature', type=float, help='生成温度')
			
 
				+    parser.add_argument('--max-tokens', type=int, help='最大token数')
			
 
				+    parser.add_argument('--timeout', type=int, help='超时时间（秒）')
			
 
				+    parser.add_argument('--no-normalize', action='store_true', help='禁用数字标准化, 只有提取表格或ocr相关任务才启用')
			
 
				+    
			
 
				+    # 信息查询
			
 
				+    parser.add_argument('--list-models', action='store_true', help='列出所有可用模型')
			
 
				+    parser.add_argument('--list-prompts', action='store_true', help='列出所有提示词模板')
			
 
				+    
			
 
				+    args = parser.parse_args()
			
 
				+    
			
 
				+    try:
			
 
				+        # 初始化处理器
			
 
				+        processor = LocalVLMProcessor(args.config)
			
 
				+        
			
 
				+        # 处理信息查询请求
			
 
				+        if args.list_models:
			
 
				+            processor.list_models()
			
 
				+            return 0
			
 
				+            
			
 
				+        if args.list_prompts:
			
 
				+            processor.list_prompts()
			
 
				+            return 0
			
 
				+        
			
 
				+        # 检查是否提供了图片路径
			
 
				+        if not args.image_path:
			
 
				+            print("❌ 错误: 请提供图片文件路径")
			
 
				+            print("\n使用示例:")
			
 
				+            print("  python local_vlm_processor.py image.jpg")
			
 
				+            print("  python local_vlm_processor.py image.jpg -m qwen2_vl -p photo_analysis")
			
 
				+            print("  python local_vlm_processor.py image.jpg -p simple_photo_fix  # 图片修复")
			
 
				+            print("  python local_vlm_processor.py --list-models")
			
 
				+            print("  python local_vlm_processor.py --list-prompts")
			
 
				+            return 1
			
 
				+        
			
 
				+        # 处理图片
			
 
				+        result = processor.process_image(
			
 
				+            image_path=args.image_path,
			
 
				+            model_name=args.model,
			
 
				+            prompt_name=args.prompt,
			
 
				+            output_dir=args.output,
			
 
				+            temperature=args.temperature,
			
 
				+            max_tokens=args.max_tokens,
			
 
				+            timeout=args.timeout,
			
 
				+            normalize_numbers=not args.no_normalize,
			
 
				+            custom_prompt=args.custom_prompt
			
 
				+        )
			
 
				+        
			
 
				+        print(f"\n🎉 处理完成！")
			
 
				+        print(f"📊 处理统计:")
			
 
				+        
			
 
				+        if result['processing_info']['result_type'] == 'image':
			
 
				+            stats = result['processing_info']['text_stats']
			
 
				+            print(f"   响应长度: {stats['response_length']} 字符")
			
 
				+            print(f"   图片数据: {'包含' if stats['has_image_data'] else '不包含'}")
			
 
				+            if stats['has_image_data']:
			
 
				+                print(f"   Base64长度: {stats['base64_length']} 字符")
			
 
				+        else:
			
 
				+            stats = result['processing_info']['text_stats']
			
 
				+            print(f"   原始长度: {stats['original_length']} 字符")
			
 
				+            print(f"   最终长度: {stats['final_length']} 字符")
			
 
				+            if stats['character_changes'] > 0:
			
 
				+                print(f"   标准化变更: {stats['character_changes']} 字符")
			
 
				+        
			
 
				+        return 0
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        print(f"❌ 程序执行失败: {e}")
			
 
				+        return 1
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # 如果sys.argv没有被传入参数，则提供默认参数用于测试
			
 
				+    import sys
			
 
				+    if len(sys.argv) == 1:
			
 
				+        sys.argv.extend([
			
 
				+            '../sample_data/工大照片-1.jpg', 
			
 
				+			'-p', 'simple_photo_fix',
			
 
				+            '-o', './output', 
			
 
				+            '--no-normalize'])
			
 
				+
			
 
				+    exit(main())