hai 2 meses · e129dc6ca8
--- a/image_edit/config.yaml
+++ b/image_edit/config.yaml
@@ -1,10 +1,12 @@
 
				 # 本地大模型配置文件
			
 
				 models:
			
 
				+  # VLM模型（用于图片理解和修复）
			
 
				   qwen2_vl:
			
 
				     name: "Qwen2.5-VL-72B-Instruct-AWQ"
			
 
				     api_base: "http://10.192.72.12:9991/v1"
			
 
				     api_key: "${YUSYS_MULTIMODAL_API_KEY}"
			
 
				     model_id: "Qwen2.5-VL-72B-Instruct-AWQ"
			
 
				+    type: "vlm"
			
 
				     default_params:
			
 
				       temperature: 0.1
			
 
				       max_tokens: 4096
			
@@ -15,6 +17,7 @@ models:
 
				     api_base: "http://localhost:11434/v1"
			
 
				     api_key: ""
			
 
				     model_id: "llava:34b"
			
 
				+    type: "vlm"
			
 
				     default_params:
			
 
				       temperature: 0.1
			
 
				       max_tokens: 4096
			
@@ -25,15 +28,68 @@ models:
 
				     api_base: "http://localhost:11434/v1"
			
 
				     api_key: ""
			
 
				     model_id: "cogvlm2:19b"
			
 
				+    type: "vlm"
			
 
				     default_params:
			
 
				       temperature: 0.1
			
 
				       max_tokens: 4096
			
 
				       timeout: 180
			
 
				 
			
 
				+  # 图片生成模型
			
 
				+  dashscope_wanx:
			
 
				+    name: "通义万相-风格重绘"
			
 
				+    api_base: "https://dashscope.aliyuncs.com/api/v1/services/aigc/image-generation/generation"
			
 
				+    api_key: "${DASHSCOPE_API_KEY}"
			
 
				+    model_id: "wanx-style-repaint-v1"
			
 
				+    type: "image_generation"
			
 
				+    generation_type: "style_repaint"  # 风格重绘
			
 
				+    default_params:
			
 
				+      style_index: 6
			
 
				+      timeout: 300
			
 
				+      poll_interval: 5
			
 
				+  
			
 
				+  modelscope_qwen:
			
 
				+    name: "ModelScope-Qwen文生图"
			
 
				+    api_base: "https://api-inference.modelscope.cn/v1/images/generations"
			
 
				+    api_key: "${MODELSCOPE_API_KEY}"
			
 
				+    model_id: "Qwen/Qwen-Image"
			
 
				+    type: "image_generation"
			
 
				+    generation_type: "text_to_image"  # 文生图
			
 
				+    default_params:
			
 
				+      timeout: 300
			
 
				+      poll_interval: 5
			
 
				+
			
 
				+  dashscope_flux:
			
 
				+    name: "通义万相-FLUX"
			
 
				+    api_base: "https://dashscope.aliyuncs.com/api/v1/services/aigc/text2image/image-synthesis"
			
 
				+    api_key: "${DASHSCOPE_API_KEY}"
			
 
				+    model_id: "flux-schnell"
			
 
				+    type: "image_generation"
			
 
				+    generation_type: "text_to_image"  # 文生图
			
 
				+    default_params:
			
 
				+      size: "1024*1024"
			
 
				+      timeout: 300
			
 
				+      poll_interval: 5
			
 
				+
			
 
				+  # 新增背景生成模型
			
 
				+  dashscope_background:
			
 
				+    name: "通义万相-背景生成"
			
 
				+    api_base: "https://dashscope.aliyuncs.com/api/v1/services/aigc/background-generation/generation/"
			
 
				+    api_key: "${DASHSCOPE_API_KEY}"
			
 
				+    model_id: "wanx-background-generation-v2"
			
 
				+    type: "image_generation"
			
 
				+    generation_type: "background_generation"  # 背景生成
			
 
				+    default_params:
			
 
				+      model_version: "v3"
			
 
				+      n: 1
			
 
				+      timeout: 300
			
 
				+      poll_interval: 5
			
 
				+
			
 
				 # 提示词模板
			
 
				 prompts:
			
 
				+  # 原有的VLM提示词...
			
 
				   photo_analysis:
			
 
				     name: "照片分析"
			
 
				+    type: "vlm"
			
 
				     template: |
			
 
				       请仔细观察这张照片，分析以下内容：
			
 
				       1. 照片的拍摄环境和背景
			
@@ -44,192 +100,66 @@ prompts:
 
				       
			
 
				       请用中文详细描述，分条列出分析结果。
			
 
				 
			
 
				-  ocr_standard:
			
 
				-    name: "标准OCR识别"
			
 
				+  # 图片生成提示词
			
 
				+  photo_style_repaint:
			
 
				+    name: "照片风格重绘"
			
 
				+    type: "image_generation"
			
 
				+    compatible_models: ["dashscope_wanx"]
			
 
				     template: |
			
 
				-      You are an AI assistant specialized in converting PDF images to Markdown format. Please follow these instructions for the conversion:
			
 
				-
			
 
				-      1. Text Processing:
			
 
				-      - Accurately recognize all text content in the PDF image without guessing or inferring.
			
 
				-      - Convert the recognized text into Markdown format.
			
 
				-      - Maintain the original document structure, including headings, paragraphs, lists, etc.
			
 
				-      - For financial amounts, use standard half-width characters (e.g., use "," for thousands separator and "." for decimal point)
			
 
				-
			
 
				-      2. Mathematical Formula Processing:
			
 
				-      - Convert all mathematical formulas to LaTeX format.
			
 
				-      - Enclose inline formulas with \( \). For example: This is an inline formula \( E = mc^2 \)
			
 
				-      - Enclose block formulas with \\[ \\]. For example: \[ \frac{-b \pm \sqrt{b^2 - 4ac}}{2a} \]
			
 
				-
			
 
				-      3. Table Processing:
			
 
				-      - Convert tables to HTML format.
			
 
				-      - Wrap the entire table with <table> and </table>.
			
 
				-      - For financial data in tables, ensure numbers use standard format with half-width commas and periods
			
 
				+      对输入的照片进行风格重绘，保持人物特征和基本构图，应用指定的艺术风格。
			
 
				+      风格选项：0=复古漫画, 1=3D童话, 2=二次元, 3=小清新, 4=未来科技, 5=国画古风, 6=油画质感
			
 
				 
			
 
				-      4. Figure Handling:
			
 
				-      - Ignore figures content in the PDF image. Do not attempt to describe or convert images.
			
 
				-
			
 
				-      5. Output Format:
			
 
				-      - Ensure the output Markdown document has a clear structure with appropriate line breaks between elements.
			
 
				-      - For complex layouts, try to maintain the original document's structure and format as closely as possible.
			
 
				-      - Use standard ASCII characters for punctuation and numbers
			
 
				-
			
 
				-      Please strictly follow these guidelines to ensure accuracy and consistency in the conversion.
			
 
				-
			
 
				-  table_extract:
			
 
				-    name: "表格提取"
			
 
				+  text_to_image_simple:
			
 
				+    name: "简单文生图"
			
 
				+    type: "image_generation"
			
 
				+    compatible_models: ["modelscope_qwen", "dashscope_flux"]
			
 
				     template: |
			
 
				-      请从这张图片中提取所有表格数据，要求：
			
 
				-      
			
 
				-      1. **表格识别**：
			
 
				-         - 准确识别所有表格边界
			
 
				-         - 正确分辨表头和数据行
			
 
				-      
			
 
				-      2. **数据提取**：
			
 
				-         - 逐行逐列提取所有数据
			
 
				-         - 保持数据的原始格式和精度
			
 
				-         - 特别注意数字、金额的准确性
			
 
				-      
			
 
				-      3. **格式输出**：
			
 
				-         - 输出为HTML表格格式
			
 
				-         - 保持表格的原始结构
			
 
				-         - 使用标准的半角字符
			
 
				-      
			
 
				-      4. **质量检查**：
			
 
				-         - 确保没有遗漏任何数据
			
 
				-         - 验证数字格式的正确性
			
 
				-         - 检查表格结构的完整性
			
 
				+      根据文本描述生成高质量图片。请提供详细的场景描述、人物特征、光线效果和艺术风格要求。
			
 
				 
			
 
				-  photo_restore_classroom:
			
 
				-    name: "照片修复（教室背景）"
			
 
				+  photo_restoration:
			
 
				+    name: "AI照片修复"
			
 
				+    type: "image_generation"
			
 
				+    compatible_models: ["dashscope_wanx"]
			
 
				     template: |
			
 
				-      请对这张老照片进行全面修复和背景替换，具体要求如下：
			
 
				-      
			
 
				-      **修复要求**：
			
 
				-      1. 去除所有折痕、裂痕、污渍和划痕
			
 
				-      2. 补全缺失的细节，提升清晰度
			
 
				-      3. 人物面貌务必保持不变，只进行修复不改变特征
			
 
				-      4. 进行适度的彩色化处理
			
 
				-      
			
 
				-      **背景替换**：
			
 
				-      - 将现有的宿舍背景完全替换为教室场景
			
 
				-      - 后方是黑板（深绿色或深灰色黑板）
			
 
				-      - 前方是课桌（木质课桌，呈棕色）
			
 
				-      - 营造1980-1990年代大学教室的氛围
			
 
				-      - 保持照片的年代感和真实性
			
 
				-      
			
 
				-      **色彩方案**：
			
 
				-      - 人物肤色：健康自然的亚洲人肤色
			
 
				-      - 头发：自然黑色
			
 
				-      - 服装色彩：
			
 
				-        * 左一：米色或卡其色外套，绿色裤子
			
 
				-        * 左二：浅灰色工作服
			
 
				-        * 左三：深蓝色毛衣
			
 
				-        * 右一：深棕色或深绿色外套
			
 
				-      - 教室环境：
			
 
				-        * 黑板：深绿色，略有粉笔痕迹
			
 
				-        * 课桌：深棕色木质纹理
			
 
				-        * 整体光线：自然的教室照明
			
 
				-      
			
 
				-      **技术要求**：
			
 
				-      - 保持人物的相对位置和姿态不变
			
 
				-      - 确保新背景与人物的光影一致
			
 
				-      - 维持照片的复古质感和年代感
			
 
				-      - 背景过渡要自然，避免生硬的拼接感
			
 
				-      
			
 
				-      请生成修复后的照片。
			
 
				+      修复老旧照片：去除折痕、划痕、污渍，提升清晰度，保持原有人物特征。
			
 
				+      {prompt}
			
 
				 
			
 
				-  photo_restore_advanced:
			
 
				-    name: "高级照片修复"
			
 
				+  # 新增背景生成提示词
			
 
				+  background_classroom:
			
 
				+    name: "教室背景生成"
			
 
				+    type: "image_generation"
			
 
				+    compatible_models: ["dashscope_background"]
			
 
				     template: |
			
 
				-      作为专业的照片修复专家，请对这张珍贵的老照片进行全面修复：
			
 
				-      
			
 
				-      **第一步：损伤修复**
			
 
				-      - 识别并修复所有可见的折痕、裂痕、污渍
			
 
				-      - 去除表面划痕和磨损痕迹
			
 
				-      - 修复边缘破损和不平整部分
			
 
				-      - 消除照片上的灰尘和水渍
			
 
				-      
			
 
				-      **第二步：画质增强**
			
 
				-      - 提升整体清晰度和锐度
			
 
				-      - 增强对比度和层次感
			
 
				-      - 降噪处理，保持细节的同时减少颗粒感
			
 
				-      - 色温校正，消除黄褐色偏色
			
 
				+      1980年代中国大学教室场景：后方是深绿色黑板，略有粉笔痕迹，前方是深褐色木质课桌，教室光线明亮柔和，营造温馨的学习氛围
			
 
				       
			
 
				-      **第三步：彩色化处理**
			
 
				-      人物特征（从左到右）：
			
 
				-      1. 第一人：健康肤色，黑色头发，米色夹克，绿色裤子
			
 
				-      2. 第二人：健康肤色，黑色头发，眼镜，浅色工作服
			
 
				-      3. 第三人：健康肤色，黑色头发，深色毛衣
			
 
				-      4. 第四人：健康肤色，黑色头发，深色外套
			
 
				-      
			
 
				-      **第四步：背景重构**
			
 
				-      - 移除原有宿舍背景
			
 
				-      - 替换为1980年代大学教室场景：
			
 
				-        * 后方：标准教室黑板（深绿色，有轻微粉笔使用痕迹）
			
 
				-        * 前方：传统木质课桌（深棕色，简约设计）
			
 
				-        * 侧面：教室墙面（浅色，符合当时建筑风格）
			
 
				-        * 照明：自然的教室光线，柔和均匀
			
 
				-      
			
 
				-      **质量标准**：
			
 
				-      - 人物面部特征100%保持原貌
			
 
				-      - 新背景与人物光影完美融合
			
 
				-      - 色彩自然协调，符合年代特征
			
 
				-      - 整体画面清晰，细节丰富
			
 
				-      - 保持照片的历史真实感
			
 
				-      
			
 
				-      请生成高质量的修复照片。
			
 
				-
			
 
				-  photo_colorize_classroom:
			
 
				-    name: "照片上色（教室版）"
			
 
				+  background_landscape:
			
 
				+    name: "风景背景生成"
			
 
				+    type: "image_generation"
			
 
				+    compatible_models: ["dashscope_background"]
			
 
				     template: |
			
 
				-      请为这张黑白老照片进行专业的彩色化处理，并调整背景：
			
 
				+      {prompt}
			
 
				       
			
 
				-      **彩色化标准**：
			
 
				-      严格按照1980-1990年代中国大学生的真实色彩进行上色：
			
 
				-      
			
 
				-      人物色彩：
			
 
				-      - 肤色：自然健康的东亚人肤色，偏暖但不过分红润
			
 
				-      - 头发：统一为自然黑色，略带光泽
			
 
				-      - 眼镜（第二人）：深色镜框，透明镜片
			
 
				-      
			
 
				-      服装色彩：
			
 
				-      - 左一：浅驼色或米色夹克，深绿色裤子
			
 
				-      - 左二：浅灰蓝色工装，内搭白色或米色衬衫
			
 
				-      - 左三：深蓝色或藏青色毛衣，下身深色裤子
			
 
				-      - 右一：深棕色或军绿色外套
			
 
				-      
			
 
				-      **背景改造**：
			
 
				-      原背景（宿舍）→ 新背景（教室）
			
 
				-      - 后墙：标准教室黑板，深绿色，表面有自然的使用痕迹
			
 
				-      - 前景：木质课桌，深棕色，简洁的1980年代设计风格
			
 
				-      - 环境光：教室的自然照明，明亮但柔和
			
 
				-      - 整体氛围：营造温馨的校园学习环境
			
 
				-      
			
 
				-      **技术要求**：
			
 
				-      1. 保持原有构图和人物姿态
			
 
				-      2. 确保色彩过渡自然，无明显色块
			
 
				-      3. 背景替换要无缝融合
			
 
				-      4. 保持照片的年代质感
			
 
				-      5. 色彩饱和度适中，避免过于鲜艳
			
 
				-      
			
 
				-      请生成彩色化且背景更新的照片。
			
 
				-
			
 
				-  simple_photo_fix:
			
 
				-    name: "简单照片修复"
			
 
				+  background_studio:
			
 
				+    name: "影棚背景生成"
			
 
				+    type: "image_generation"
			
 
				+    compatible_models: ["dashscope_background"]
			
 
				     template: |
			
 
				-      请帮我修复这张老照片：
			
 
				-      
			
 
				-      1. 去除所有折痕、划痕、污渍
			
 
				-      2. 提升清晰度和对比度
			
 
				-      3. 进行彩色化处理（自然色彩）
			
 
				-      4. 将背景改为教室场景：后面是黑板，前面是课桌
			
 
				-      5. 保持人物面貌不变，只做修复和美化
			
 
				-      6. 整体风格要符合1980年代大学生活
			
 
				-      
			
 
				-      请生成修复后的彩色照片。
			
 
				+      专业摄影影棚背景，柔和灯光，纯色或渐变背景，适合人像摄影
			
 
				+
			
 
				+# 预设风格配置
			
 
				+style_presets:
			
 
				+  dashscope_styles:
			
 
				+    - {index: 0, name: "复古漫画", description: "复古漫画风格，线条清晰，色彩饱和"}
			
 
				+    - {index: 1, name: "3D童话", description: "3D童话风格，梦幻可爱，立体感强"}
			
 
				+    - {index: 2, name: "二次元", description: "二次元动漫风格，日式插画感"}
			
 
				+    - {index: 3, name: "小清新", description: "小清新风格，色彩柔和，清新自然"}
			
 
				+    - {index: 4, name: "未来科技", description: "未来科技风格，金属质感，科幻氛围"}
			
 
				+    - {index: 5, name: "国画古风", description: "中国风水墨画风格，古典雅致"}
			
 
				+    - {index: 6, name: "油画质感", description: "油画质感，色彩丰富，艺术感强"}
			
 
				 
			
 
				 # 默认配置
			
 
				 default:
			
 
				-  model: "qwen2_vl"
			
 
				+  vlm_model: "qwen2_vl"
			
 
				+  image_gen_model: "dashscope_wanx"
			
 
				   prompt: "photo_analysis"
			
 
				-  normalize_numbers: false
			
 
				+  style_index: -1  # -1表示使用使用自定义风格, 0-6表示使用对应风格
			
--- a/image_edit/image_generator.py
+++ b/image_edit/image_generator.py
@@ -0,0 +1,692 @@
 
				+import os
			
 
				+import requests
			
 
				+import time
			
 
				+import json
			
 
				+import yaml
			
 
				+import base64
			
 
				+import argparse
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, Any, Optional, List
			
 
				+from PIL import Image
			
 
				+from io import BytesIO
			
 
				+from dotenv import load_dotenv
			
 
				+
			
 
				+# 加载环境变量
			
 
				+load_dotenv()
			
 
				+
			
 
				+class ImageGenerator:
			
 
				+    def __init__(self, config_path: str = "config.yaml"):
			
 
				+        """
			
 
				+        初始化图片生成器
			
 
				+        
			
 
				+        Args:
			
 
				+            config_path: 配置文件路径
			
 
				+        """
			
 
				+        self.config_path = Path(config_path)
			
 
				+        self.config = self._load_config()
			
 
				+
			
 
				+    def _load_config(self) -> Dict[str, Any]:
			
 
				+        """加载配置文件"""
			
 
				+        if not self.config_path.exists():
			
 
				+            raise FileNotFoundError(f"配置文件不存在: {self.config_path}")
			
 
				+
			
 
				+        with open(self.config_path, 'r', encoding='utf-8') as f:
			
 
				+            config = yaml.safe_load(f)
			
 
				+
			
 
				+        return config
			
 
				+
			
 
				+    def _resolve_env_variable(self, value: str) -> str:
			
 
				+        """解析环境变量"""
			
 
				+        if not isinstance(value, str):
			
 
				+            return value
			
 
				+
			
 
				+        import re
			
 
				+        pattern = r'\$\{([^}]+)\}'
			
 
				+
			
 
				+        def replace_env_var(match):
			
 
				+            env_var_name = match.group(1)
			
 
				+            env_value = os.getenv(env_var_name)
			
 
				+            if env_value is None:
			
 
				+                print(f"⚠️ 警告: 环境变量 {env_var_name} 未设置")
			
 
				+                return ""
			
 
				+            return env_value
			
 
				+
			
 
				+        return re.sub(pattern, replace_env_var, value)
			
 
				+
			
 
				+    def list_models(self, model_type: str = "image_generation") -> None:
			
 
				+        """列出指定类型的模型"""
			
 
				+        print(f"📋 可用的{model_type}模型列表:")
			
 
				+        for model_key, model_config in self.config['models'].items():
			
 
				+            if model_config.get('type') == model_type:
			
 
				+                api_key = self._resolve_env_variable(model_config['api_key'])
			
 
				+                api_key_status = "✅ 已配置" if api_key else "❌ 未配置"
			
 
				+
			
 
				+                print(f"  🎨 {model_key}: {model_config['name']}")
			
 
				+                print(f"      生成类型: {model_config.get('generation_type', 'N/A')}")
			
 
				+                print(f"      API地址: {model_config['api_base']}")
			
 
				+                print(f"      API密钥: {api_key_status}")
			
 
				+                print()
			
 
				+
			
 
				+    def list_styles(self) -> None:
			
 
				+        """列出可用的风格预设"""
			
 
				+        print("🎨 可用风格预设:")
			
 
				+        for style_key, styles in self.config.get('style_presets', {}).items():
			
 
				+            print(f"\n  📝 {style_key}:")
			
 
				+            for style in styles:
			
 
				+                print(f"    {style['index']}: {style['name']} - {style['description']}")
			
 
				+
			
 
				+    def list_prompts(self, prompt_type: str = "image_generation") -> None:
			
 
				+        """列出指定类型的提示词模板"""
			
 
				+        print(f"📝 可用的{prompt_type}提示词模板:")
			
 
				+        for prompt_key, prompt_config in self.config.get('prompts', {}).items():
			
 
				+            if prompt_config.get('type') == prompt_type:
			
 
				+                print(f"  💬 {prompt_key}: {prompt_config['name']}")
			
 
				+                
			
 
				+                # 显示兼容的模型
			
 
				+                compatible_models = prompt_config.get('compatible_models', [])
			
 
				+                if compatible_models:
			
 
				+                    print(f"      兼容模型: {', '.join(compatible_models)}")
			
 
				+                
			
 
				+                # 显示模板预览（前100个字符）
			
 
				+                template_preview = prompt_config.get('template', '')[:100].replace('\n', ' ')
			
 
				+                print(f"      模板预览: {template_preview}...")
			
 
				+                print()
			
 
				+
			
 
				+    def get_model_config(self, model_name: str) -> Dict[str, Any]:
			
 
				+        """获取模型配置并解析环境变量"""
			
 
				+        if model_name not in self.config['models']:
			
 
				+            raise ValueError(f"未找到模型配置: {model_name}")
			
 
				+
			
 
				+        model_config = self.config['models'][model_name].copy()
			
 
				+        model_config['api_key'] = self._resolve_env_variable(model_config['api_key'])
			
 
				+
			
 
				+        return model_config
			
 
				+
			
 
				+    def get_prompt_template(self, prompt_name: str) -> str:
			
 
				+        """获取提示词模板"""
			
 
				+        if prompt_name not in self.config.get('prompts', {}):
			
 
				+            raise ValueError(f"未找到提示词模板: {prompt_name}，可用模板: {list(self.config.get('prompts', {}).keys())}")
			
 
				+        
			
 
				+        return self.config['prompts'][prompt_name]['template']
			
 
				+
			
 
				+    def check_prompt_model_compatibility(self, prompt_name: str, model_name: str) -> bool:
			
 
				+        """检查提示词模板与模型的兼容性"""
			
 
				+        prompt_config = self.config.get('prompts', {}).get(prompt_name, {})
			
 
				+        compatible_models = prompt_config.get('compatible_models', [])
			
 
				+        
			
 
				+        # 如果没有指定兼容模型，则认为所有模型都兼容
			
 
				+        if not compatible_models:
			
 
				+            return True
			
 
				+            
			
 
				+        return model_name in compatible_models
			
 
				+
			
 
				+    def upload_image_to_temp(self, image_path: str, convert_to_rgba: bool = False) -> str:
			
 
				+        """
			
 
				+        上传图片到临时存储并转换为指定格式
			
 
				+        
			
 
				+        Args:
			
 
				+            image_path: 图片路径
			
 
				+            convert_to_rgba: 是否转换为RGBA格式
			
 
				+        
			
 
				+        Returns:
			
 
				+            base64编码的图片数据URL
			
 
				+        """
			
 
				+        if not Path(image_path).exists():
			
 
				+            raise FileNotFoundError(f"找不到图片文件: {image_path}")
			
 
				+
			
 
				+        # 使用PIL打开图片
			
 
				+        with Image.open(image_path) as img:
			
 
				+            # 如果需要转换为RGBA格式
			
 
				+            if convert_to_rgba:
			
 
				+                if img.mode != 'RGBA':
			
 
				+                    print(f"🔄 将图片从 {img.mode} 模式转换为 RGBA 模式")
			
 
				+                    # 转换为RGBA模式
			
 
				+                    if img.mode == 'RGB':
			
 
				+                        # RGB转RGBA，添加不透明度通道
			
 
				+                        img = img.convert('RGBA')
			
 
				+                    elif img.mode == 'L':
			
 
				+                        # 灰度转RGBA
			
 
				+                        img = img.convert('RGBA')
			
 
				+                    elif img.mode == 'P':
			
 
				+                        # 调色板模式转RGBA
			
 
				+                        img = img.convert('RGBA')
			
 
				+                    else:
			
 
				+                        # 其他模式先转RGB再转RGBA
			
 
				+                        img = img.convert('RGB').convert('RGBA')
			
 
				+                    print(f"✅ 图片已转换为 RGBA 模式")
			
 
				+            
			
 
				+            # 保存为PNG格式的字节流（PNG支持RGBA）
			
 
				+            from io import BytesIO
			
 
				+            img_buffer = BytesIO()
			
 
				+            
			
 
				+            # 如果是RGBA模式，保存为PNG；否则保存为JPEG
			
 
				+            if img.mode == 'RGBA':
			
 
				+                img.save(img_buffer, format='PNG')
			
 
				+                mime_type = 'image/png'
			
 
				+            else:
			
 
				+                # 对于RGB等模式，转换为RGB再保存为JPEG
			
 
				+                if img.mode != 'RGB':
			
 
				+                    img = img.convert('RGB')
			
 
				+                img.save(img_buffer, format='JPEG', quality=95)
			
 
				+                mime_type = 'image/jpeg'
			
 
				+            
			
 
				+            img_buffer.seek(0)
			
 
				+            image_data = base64.b64encode(img_buffer.getvalue()).decode('utf-8')
			
 
				+        
			
 
				+        return f"data:{mime_type};base64,{image_data}"
			
 
				+
			
 
				+    def generate_image_dashscope_style_repaint(self, 
			
 
				+                                             model_config: Dict[str, Any],
			
 
				+                                             image_path: str,
			
 
				+                                             style_index: int = None,
			
 
				+                                             custom_style_url: str = None,
			
 
				+                                             prompt_template: str = None) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        使用通义万相进行风格重绘
			
 
				+        注意：通义万相的风格重绘API不支持文本提示词，只能通过style_index或style_ref_url控制风格
			
 
				+        """
			
 
				+        headers = {
			
 
				+            "Authorization": f"Bearer {model_config['api_key']}",
			
 
				+            "Content-Type": "application/json",
			
 
				+            "X-DashScope-Async": "enable"
			
 
				+        }
			
 
				+
			
 
				+        # 上传图片（风格重绘不需要RGBA格式）
			
 
				+        print(f"📤 读取图片: {Path(image_path).name}")
			
 
				+        image_url = self.upload_image_to_temp(image_path, convert_to_rgba=False)
			
 
				+
			
 
				+        # 构建请求体
			
 
				+        if custom_style_url:
			
 
				+            # 使用自定义风格
			
 
				+            body = {
			
 
				+                "model": model_config['model_id'],
			
 
				+                "input": {
			
 
				+                    "image_url": image_url,
			
 
				+                    "style_ref_url": custom_style_url,
			
 
				+                    "style_index": -1
			
 
				+                }
			
 
				+            }
			
 
				+            print(f"🎨 使用自定义风格参考: {custom_style_url}")
			
 
				+        else:
			
 
				+            # 使用预置风格
			
 
				+            style_idx = style_index if style_index is not None else model_config['default_params']['style_index']
			
 
				+            
			
 
				+            # 如果有提示词模板但没有指定风格索引，尝试根据模板内容智能选择风格
			
 
				+            if prompt_template and style_index is None:
			
 
				+                style_idx = self._select_style_from_template(prompt_template)
			
 
				+                print(f"🤖 根据提示词模板智能选择风格索引: {style_idx}")
			
 
				+            
			
 
				+            body = {
			
 
				+                "model": model_config['model_id'],
			
 
				+                "input": {
			
 
				+                    "image_url": image_url,
			
 
				+                    "style_index": style_idx
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+        # 显示提示词模板信息（仅用于记录，不影响API调用）
			
 
				+        if prompt_template:
			
 
				+            print(f"📝 提示词模板内容（仅作为风格选择参考）:")
			
 
				+            print(f"   {prompt_template[:200]}...")
			
 
				+            print(f"⚠️ 注意: 通义万相风格重绘API不支持文本提示，仅通过风格索引控制效果")
			
 
				+            
			
 
				+        # 提交任务
			
 
				+        print(f"🚀 提交风格重绘任务...")
			
 
				+        print(f"   风格索引: {body['input'].get('style_index', '自定义')}")
			
 
				+        
			
 
				+        response = requests.post(model_config['api_base'], headers=headers, json=body)
			
 
				+
			
 
				+        if response.status_code != 200:
			
 
				+            raise Exception(f"任务提交失败: {response.status_code}, {response.text}")
			
 
				+
			
 
				+        task_id = response.json().get('output', {}).get('task_id')
			
 
				+        if not task_id:
			
 
				+            raise Exception("未获取到任务ID")
			
 
				+
			
 
				+        print(f"✅ 任务提交成功，任务ID: {task_id}")
			
 
				+
			
 
				+        # 轮询查询结果
			
 
				+        return self._poll_dashscope_task(model_config, task_id)
			
 
				+
			
 
				+    def generate_image_modelscope(self, 
			
 
				+                                model_config: Dict[str, Any],
			
 
				+                                prompt: str,
			
 
				+                                prompt_template: str = None) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        使用ModelScope进行文生图
			
 
				+        """
			
 
				+        headers = {
			
 
				+            "Authorization": f"Bearer {model_config['api_key']}",
			
 
				+            "Content-Type": "application/json",
			
 
				+            "X-ModelScope-Async-Mode": "true"
			
 
				+        }
			
 
				+
			
 
				+        # 如果提供了提示词模板，将其与用户提示词结合
			
 
				+        final_prompt = prompt
			
 
				+        if prompt_template and prompt_template.strip():
			
 
				+            print(f"🎯 使用提示词模板优化提示词")
			
 
				+            # 简单的模板应用：将用户提示词插入到模板中
			
 
				+            if "{prompt}" in prompt_template:
			
 
				+                final_prompt = prompt_template.replace("{prompt}", prompt)
			
 
				+            else:
			
 
				+                # 如果模板中没有占位符，则将用户提示词追加到模板后
			
 
				+                final_prompt = f"{prompt_template}\n\n具体要求：{prompt}"
			
 
				+
			
 
				+        body = {
			
 
				+            "model": model_config['model_id'],
			
 
				+            "prompt": final_prompt
			
 
				+        }
			
 
				+
			
 
				+        print(f"🚀 提交文生图任务...")
			
 
				+        print(f"   最终提示词: {final_prompt[:100]}...")
			
 
				+        
			
 
				+        response = requests.post(model_config['api_base'], headers=headers, json=body)
			
 
				+
			
 
				+        if response.status_code != 200:
			
 
				+            raise Exception(f"任务提交失败: {response.status_code}, {response.text}")
			
 
				+
			
 
				+        task_id = response.json().get("task_id")
			
 
				+        if not task_id:
			
 
				+            raise Exception("未获取到任务ID")
			
 
				+
			
 
				+        print(f"✅ 任务提交成功，任务ID: {task_id}")
			
 
				+
			
 
				+        # 轮询查询结果
			
 
				+        return self._poll_modelscope_task(model_config, task_id)
			
 
				+
			
 
				+    def generate_image_dashscope_flux(self,
			
 
				+                                    model_config: Dict[str, Any],
			
 
				+                                    prompt: str,
			
 
				+                                    size: str = None,
			
 
				+                                    prompt_template: str = None) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        使用通义万相FLUX进行文生图
			
 
				+        """
			
 
				+        headers = {
			
 
				+            "Authorization": f"Bearer {model_config['api_key']}",
			
 
				+            "Content-Type": "application/json",
			
 
				+            "X-DashScope-Async": "enable"
			
 
				+        }
			
 
				+
			
 
				+        # 如果提供了提示词模板，将其与用户提示词结合
			
 
				+        final_prompt = prompt
			
 
				+        if prompt_template and prompt_template.strip():
			
 
				+            print(f"🎯 使用提示词模板优化提示词")
			
 
				+            if "{prompt}" in prompt_template:
			
 
				+                final_prompt = prompt_template.replace("{prompt}", prompt)
			
 
				+            else:
			
 
				+                final_prompt = f"{prompt_template}\n\n具体要求：{prompt}"
			
 
				+
			
 
				+        body = {
			
 
				+            "model": model_config['model_id'],
			
 
				+            "input": {
			
 
				+                "prompt": final_prompt,
			
 
				+                "size": size or model_config['default_params']['size']
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        print(f"🚀 提交FLUX文生图任务...")
			
 
				+        print(f"   图片尺寸: {body['input']['size']}")
			
 
				+        print(f"   最终提示词: {final_prompt[:100]}...")
			
 
				+
			
 
				+        response = requests.post(model_config['api_base'], headers=headers, json=body)
			
 
				+
			
 
				+        if response.status_code != 200:
			
 
				+            raise Exception(f"任务提交失败: {response.status_code}, {response.text}")
			
 
				+
			
 
				+        task_id = response.json().get('output', {}).get('task_id')
			
 
				+        if not task_id:
			
 
				+            raise Exception("未获取到任务ID")
			
 
				+
			
 
				+        print(f"✅ 任务提交成功，任务ID: {task_id}")
			
 
				+
			
 
				+        return self._poll_dashscope_task(model_config, task_id)
			
 
				+
			
 
				+    def generate_image_dashscope_background(self,
			
 
				+                                      model_config: Dict[str, Any],
			
 
				+                                      image_path: str,
			
 
				+                                      ref_prompt: str,
			
 
				+                                      prompt_template: str = None) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        使用通义万相进行背景生成
			
 
				+        """
			
 
				+        headers = {
			
 
				+            "Authorization": f"Bearer {model_config['api_key']}",
			
 
				+            "Content-Type": "application/json",
			
 
				+            "X-DashScope-Async": "enable"
			
 
				+        }
			
 
				+
			
 
				+        # 上传图片并转换为RGBA格式（背景生成API要求RGBA格式）
			
 
				+        print(f"📤 读取并处理图片: {Path(image_path).name}")
			
 
				+        image_url = self.upload_image_to_temp(image_path, convert_to_rgba=True)
			
 
				+        
			
 
				+        # 如果提供了提示词模板，将其与用户提示词结合
			
 
				+        final_prompt = ref_prompt
			
 
				+        if prompt_template and prompt_template.strip():
			
 
				+            print(f"🎯 使用提示词模板优化背景描述")
			
 
				+            if "{prompt}" in prompt_template:
			
 
				+                final_prompt = prompt_template.replace("{prompt}", ref_prompt)
			
 
				+            else:
			
 
				+                final_prompt = f"{prompt_template}\n\n具体要求：{ref_prompt}"
			
 
				+
			
 
				+        # 构建请求体
			
 
				+        body = {
			
 
				+            "model": model_config['model_id'],
			
 
				+            "input": {
			
 
				+                "base_image_url": image_url,
			
 
				+                "ref_prompt": final_prompt
			
 
				+            },
			
 
				+            "parameters": {
			
 
				+                "model_version": model_config['default_params'].get('model_version', 'v3'),
			
 
				+                "n": model_config['default_params'].get('n', 1)
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        # 提交任务
			
 
				+        print(f"🚀 提交背景生成任务...")
			
 
				+        print(f"   背景描述: {final_prompt}")
			
 
				+        print(f"   模型版本: {body['parameters']['model_version']}")
			
 
				+        print(f"   生成数量: {body['parameters']['n']}")
			
 
				+        
			
 
				+        response = requests.post(model_config['api_base'], headers=headers, json=body)
			
 
				+
			
 
				+        if response.status_code != 200:
			
 
				+            raise Exception(f"任务提交失败: {response.status_code}, {response.text}")
			
 
				+
			
 
				+        task_id = response.json().get('output', {}).get('task_id')
			
 
				+        if not task_id:
			
 
				+            raise Exception("未获取到任务ID")
			
 
				+
			
 
				+        print(f"✅ 任务提交成功，任务ID: {task_id}")
			
 
				+
			
 
				+        # 轮询查询结果
			
 
				+        return self._poll_dashscope_task(model_config, task_id)
			
 
				+
			
 
				+    def _poll_dashscope_task(self, model_config: Dict[str, Any], task_id: str) -> Dict[str, Any]:
			
 
				+        """轮询通义万相任务结果"""
			
 
				+        query_url = f"https://dashscope.aliyuncs.com/api/v1/tasks/{task_id}"
			
 
				+        headers = {"Authorization": f"Bearer {model_config['api_key']}"}
			
 
				+
			
 
				+        poll_interval = model_config['default_params'].get('poll_interval', 5)
			
 
				+        timeout = model_config['default_params'].get('timeout', 300)
			
 
				+        start_time = time.time()
			
 
				+
			
 
				+        print("🔍 开始查询任务状态...")
			
 
				+        while True:
			
 
				+            if time.time() - start_time > timeout:
			
 
				+                raise Exception(f"任务超时（{timeout}秒）")
			
 
				+
			
 
				+            response = requests.get(query_url, headers=headers)
			
 
				+            if response.status_code != 200:
			
 
				+                raise Exception(f"查询失败: {response.status_code}, {response.text}")
			
 
				+
			
 
				+            response_data = response.json()
			
 
				+            task_status = response_data.get('output', {}).get('task_status')
			
 
				+
			
 
				+            if task_status == 'SUCCEEDED':
			
 
				+                print("✅ 任务成功完成！")
			
 
				+                return response_data
			
 
				+            elif task_status == 'FAILED':
			
 
				+                error_msg = response_data.get('output', {}).get('message', '未知错误')
			
 
				+                raise Exception(f"任务失败: {error_msg}")
			
 
				+            else:
			
 
				+                print(f"⏳ 任务处理中，当前状态: {task_status}...")
			
 
				+                time.sleep(poll_interval)
			
 
				+
			
 
				+    def _poll_modelscope_task(self, model_config: Dict[str, Any], task_id: str) -> Dict[str, Any]:
			
 
				+        """轮询ModelScope任务结果"""
			
 
				+        query_url = f"https://api-inference.modelscope.cn/v1/tasks/{task_id}"
			
 
				+        headers = {
			
 
				+            "Authorization": f"Bearer {model_config['api_key']}",
			
 
				+            "X-ModelScope-Task-Type": "image_generation"
			
 
				+        }
			
 
				+
			
 
				+        poll_interval = model_config['default_params'].get('poll_interval', 5)
			
 
				+        timeout = model_config['default_params'].get('timeout', 300)
			
 
				+        start_time = time.time()
			
 
				+
			
 
				+        print("🔍 开始查询任务状态...")
			
 
				+        while True:
			
 
				+            if time.time() - start_time > timeout:
			
 
				+                raise Exception(f"任务超时（{timeout}秒）")
			
 
				+
			
 
				+            response = requests.get(query_url, headers=headers)
			
 
				+            if response.status_code != 200:
			
 
				+                raise Exception(f"查询失败: {response.status_code}, {response.text}")
			
 
				+
			
 
				+            response_data = response.json()
			
 
				+            task_status = response_data.get('task_status')
			
 
				+
			
 
				+            if task_status == 'SUCCEED':
			
 
				+                print("✅ 任务成功完成！")
			
 
				+                return response_data
			
 
				+            elif task_status == 'FAILED':
			
 
				+                raise Exception(f"任务失败: {response_data}")
			
 
				+            else:
			
 
				+                print(f"⏳ 任务处理中，当前状态: {task_status}...")
			
 
				+                time.sleep(poll_interval)
			
 
				+
			
 
				+    def generate_image(self,
			
 
				+                      model_name: str,
			
 
				+                      prompt: str = None,
			
 
				+                      image_path: str = None,
			
 
				+                      style_index: int = None,
			
 
				+                      custom_style_url: str = None,
			
 
				+                      prompt_template_name: str = None,
			
 
				+                      output_dir: str = "./output") -> Dict[str, Any]:
			
 
				+        """
			
 
				+        统一的图片生成接口
			
 
				+        """
			
 
				+        model_config = self.get_model_config(model_name)
			
 
				+
			
 
				+        if model_config.get('type') != 'image_generation':
			
 
				+            raise ValueError(f"模型 {model_name} 不是图片生成模型")
			
 
				+
			
 
				+        # 获取提示词模板
			
 
				+        prompt_template = None
			
 
				+        if prompt_template_name:
			
 
				+            # 检查兼容性
			
 
				+            if not self.check_prompt_model_compatibility(prompt_template_name, model_name):
			
 
				+                print(f"⚠️ 警告: 提示词模板 {prompt_template_name} 可能与模型 {model_name} 不兼容")
			
 
				+            
			
 
				+            prompt_template = self.get_prompt_template(prompt_template_name)
			
 
				+            print(f"🎯 使用提示词模板: {prompt_template_name}")
			
 
				+
			
 
				+        print(f"🎨 使用模型: {model_config['name']}")
			
 
				+        print(f"🔧 生成类型: {model_config.get('generation_type')}")
			
 
				+
			
 
				+        # 根据不同的模型调用对应的生成方法
			
 
				+        if model_name == "dashscope_wanx":
			
 
				+            if not image_path:
			
 
				+                raise ValueError("风格重绘需要提供输入图片")
			
 
				+            result = self.generate_image_dashscope_style_repaint(
			
 
				+                model_config, image_path, style_index, custom_style_url, prompt_template
			
 
				+            )
			
 
				+        elif model_name == "dashscope_background":
			
 
				+            if not image_path:
			
 
				+                raise ValueError("背景生成需要提供输入图片")
			
 
				+            if not prompt:
			
 
				+                raise ValueError("背景生成需要提供背景描述")
			
 
				+            result = self.generate_image_dashscope_background(
			
 
				+                model_config, image_path, prompt, prompt_template
			
 
				+            )
			
 
				+        elif model_name == "modelscope_qwen":
			
 
				+            if not prompt:
			
 
				+                raise ValueError("文生图需要提供文本提示")
			
 
				+            result = self.generate_image_modelscope(model_config, prompt, prompt_template)
			
 
				+        elif model_name == "dashscope_flux":
			
 
				+            if not prompt:
			
 
				+                raise ValueError("FLUX文生图需要提供文本提示")
			
 
				+            result = self.generate_image_dashscope_flux(model_config, prompt, None, prompt_template)
			
 
				+        else:
			
 
				+            raise ValueError(f"不支持的模型: {model_name}")
			
 
				+
			
 
				+        # 保存结果
			
 
				+        return self._save_generated_images(result, model_name, output_dir, prompt_template_name)
			
 
				+
			
 
				+    def _save_generated_images(self, 
			
 
				+                             result: Dict[str, Any], 
			
 
				+                             model_name: str, 
			
 
				+                             output_dir: str,
			
 
				+                             prompt_template_name: str = None) -> Dict[str, Any]:
			
 
				+        """保存生成的图片"""
			
 
				+        output_path = Path(output_dir)
			
 
				+        output_path.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+        timestamp = time.strftime("%Y%m%d_%H%M%S")
			
 
				+        saved_files = []
			
 
				+
			
 
				+        # 根据不同API的响应格式提取图片URL
			
 
				+        if model_name == "dashscope_wanx" or model_name == "dashscope_flux":
			
 
				+            # 通义万相格式
			
 
				+            results = result.get('output', {}).get('results', [])
			
 
				+            for i, img_result in enumerate(results):
			
 
				+                img_url = img_result.get('url')
			
 
				+                if img_url:
			
 
				+                    # 如果使用了提示词模板，在文件名中体现
			
 
				+                    template_suffix = f"_{prompt_template_name}" if prompt_template_name else ""
			
 
				+                    filename = f"{model_name}_{timestamp}{template_suffix}_{i+1}.png"
			
 
				+                    filepath = output_path / filename
			
 
				+
			
 
				+                    # 下载并保存图片
			
 
				+                    img_response = requests.get(img_url)
			
 
				+                    if img_response.status_code == 200:
			
 
				+                        image = Image.open(BytesIO(img_response.content))
			
 
				+                        image.save(filepath)
			
 
				+                        saved_files.append(filepath)
			
 
				+                        print(f"🖼️ 图片已保存: {filepath}")
			
 
				+                    else:
			
 
				+                        print(f"❌ 下载图片失败: {img_url}")
			
 
				+
			
 
				+        elif model_name == "modelscope_qwen":
			
 
				+            # ModelScope格式
			
 
				+            output_images = result.get('output_images', [])
			
 
				+            for i, img_url in enumerate(output_images):
			
 
				+                template_suffix = f"_{prompt_template_name}" if prompt_template_name else ""
			
 
				+                filename = f"{model_name}_{timestamp}{template_suffix}_{i+1}.png"
			
 
				+                filepath = output_path / filename
			
 
				+
			
 
				+                img_response = requests.get(img_url)
			
 
				+                if img_response.status_code == 200:
			
 
				+                    image = Image.open(BytesIO(img_response.content))
			
 
				+                    image.save(filepath)
			
 
				+                    saved_files.append(filepath)
			
 
				+                    print(f"🖼️ 图片已保存: {filepath}")
			
 
				+                else:
			
 
				+                    print(f"❌ 下载图片失败: {img_url}")
			
 
				+
			
 
				+        # 保存元数据
			
 
				+        metadata = {
			
 
				+            "generation_info": {
			
 
				+                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
			
 
				+                "model_used": model_name,
			
 
				+                "prompt_template_used": prompt_template_name,
			
 
				+                "saved_files": [str(f) for f in saved_files],
			
 
				+                "api_response": result
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        template_suffix = f"_{prompt_template_name}" if prompt_template_name else ""
			
 
				+        metadata_file = output_path / f"{model_name}_{timestamp}{template_suffix}_metadata.json"
			
 
				+        with open(metadata_file, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(metadata, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+        print(f"📊 元数据已保存: {metadata_file}")
			
 
				+
			
 
				+        return metadata
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    parser = argparse.ArgumentParser(description='AI图片生成工具')
			
 
				+    
			
 
				+    # 基本参数
			
 
				+    parser.add_argument('-c', '--config', default='config.yaml', help='配置文件路径')
			
 
				+    parser.add_argument('-o', '--output', default='./output', help='输出目录')
			
 
				+    
			
 
				+    # 模型选择
			
 
				+    parser.add_argument('-m', '--model', help='模型名称')
			
 
				+    
			
 
				+    # 生成参数
			
 
				+    parser.add_argument('-p', '--prompt', help='文本提示（用于文生图）')
			
 
				+    parser.add_argument('-i', '--image', help='输入图片路径（用于风格重绘）')
			
 
				+    parser.add_argument('-s', '--style', type=int, help='风格索引（0-6）')
			
 
				+    parser.add_argument('--style-ref', help='自定义风格参考图片URL')
			
 
				+    
			
 
				+    # 提示词模板
			
 
				+    parser.add_argument('-t', '--template', help='提示词模板名称')
			
 
				+    
			
 
				+    # 信息查询
			
 
				+    parser.add_argument('--list-models', action='store_true', help='列出所有可用的图片生成模型')
			
 
				+    parser.add_argument('--list-styles', action='store_true', help='列出所有可用风格')
			
 
				+    parser.add_argument('--list-prompts', action='store_true', help='列出所有可用的提示词模板')
			
 
				+    
			
 
				+    args = parser.parse_args()
			
 
				+    
			
 
				+    try:
			
 
				+        generator = ImageGenerator(args.config)
			
 
				+        
			
 
				+        # 处理信息查询
			
 
				+        if args.list_models:
			
 
				+            generator.list_models("image_generation")
			
 
				+            return 0
			
 
				+            
			
 
				+        if args.list_styles:
			
 
				+            generator.list_styles()
			
 
				+            return 0
			
 
				+
			
 
				+        if args.list_prompts:
			
 
				+            generator.list_prompts("image_generation")
			
 
				+            return 0
			
 
				+        
			
 
				+        # 检查必要参数
			
 
				+        if not args.model:
			
 
				+            print("❌ 错误: 请指定模型名称")
			
 
				+            print("\n使用示例:")
			
 
				+            print("  # 风格重绘")
			
 
				+            print("  python image_generator.py -m dashscope_wanx -i photo.jpg -s 3")
			
 
				+            print("  # 使用提示词模板进行风格重绘")
			
 
				+            print("  python image_generator.py -m dashscope_wanx -i photo.jpg -t photo_restoration")
			
 
				+            print("  # 文生图")
			
 
				+            print("  python image_generator.py -m modelscope_qwen -p '一只可爱的金色小猫'")
			
 
				+            print("  # 使用提示词模板进行文生图")
			
 
				+            print("  python image_generator.py -m modelscope_qwen -p '金色小猫' -t text_to_image_simple")
			
 
				+            print("  # 查看信息")
			
 
				+            print("  python image_generator.py --list-models")
			
 
				+            print("  python image_generator.py --list-prompts")
			
 
				+            return 1
			
 
				+        
			
 
				+        # 生成图片
			
 
				+        result = generator.generate_image(
			
 
				+            model_name=args.model,
			
 
				+            prompt=args.prompt,
			
 
				+            image_path=args.image,
			
 
				+            style_index=args.style,
			
 
				+            custom_style_url=args.style_ref,
			
 
				+            prompt_template_name=args.template,
			
 
				+            output_dir=args.output
			
 
				+        )
			
 
				+        
			
 
				+        print(f"\n🎉 图片生成完成！")
			
 
				+        saved_files = result.get('generation_info', {}).get('saved_files', [])
			
 
				+        print(f"📊 生成统计: 共保存 {len(saved_files)} 张图片")
			
 
				+        
			
 
				+        return 0
			
 
				+        
			
 
				+    except Exception as e:
			
 
				+        print(f"❌ 程序执行失败: {e}")
			
 
				+        return 1
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # 调试用的默认参数
			
 
				+    import sys
			
 
				+    if len(sys.argv) == 1:
			
 
				+        sys.argv.extend([
			
 
				+            '-m', 'dashscope_background', 
			
 
				+            '-i', '../sample_data/工大照片-1.jpg', 
			
 
				+            '-t', 'background_studio',  # 使用提示词模板
			
 
				+            '-p', '温馨的书房环境',  # 文生图提示词
			
 
				+            '-o', './output'
			
 
				+        ])
			
 
				+    exit(main())
			
--- a/image_edit/local_vlm_processor.py
+++ b/image_edit/local_vlm_processor.py
@@ -1,598 +0,0 @@
 
				-import os
			
 
				-import re
			
 
				-import yaml
			
 
				-import base64
			
 
				-import json
			
 
				-import time
			
 
				-import argparse
			
 
				-from pathlib import Path
			
 
				-from typing import Dict, Any, Optional
			
 
				-from openai import OpenAI
			
 
				-from dotenv import load_dotenv
			
 
				-
			
 
				-# 加载环境变量
			
 
				-load_dotenv(override=True)
			
 
				-
			
 
				-class LocalVLMProcessor:
			
 
				-    def __init__(self, config_path: str = "config.yaml"):
			
 
				-        """
			
 
				-        初始化本地VLM处理器
			
 
				-        
			
 
				-        Args:
			
 
				-            config_path: 配置文件路径
			
 
				-        """
			
 
				-        self.config_path = Path(config_path)
			
 
				-        self.config = self._load_config()
			
 
				-        
			
 
				-    def _load_config(self) -> Dict[str, Any]:
			
 
				-        """加载配置文件"""
			
 
				-        if not self.config_path.exists():
			
 
				-            raise FileNotFoundError(f"配置文件不存在: {self.config_path}")
			
 
				-        
			
 
				-        with open(self.config_path, 'r', encoding='utf-8') as f:
			
 
				-            config = yaml.safe_load(f)
			
 
				-        
			
 
				-        return config
			
 
				-    
			
 
				-    def _resolve_env_variable(self, value: str) -> str:
			
 
				-        """
			
 
				-        解析环境变量：将 ${VAR_NAME} 格式替换为实际的环境变量值
			
 
				-        
			
 
				-        Args:
			
 
				-            value: 可能包含环境变量的字符串
			
 
				-            
			
 
				-        Returns:
			
 
				-            解析后的字符串
			
 
				-        """
			
 
				-        if not isinstance(value, str):
			
 
				-            return value
			
 
				-            
			
 
				-        # 匹配 ${VAR_NAME} 格式的环境变量
			
 
				-        pattern = r'\$\{([^}]+)\}'
			
 
				-        
			
 
				-        def replace_env_var(match):
			
 
				-            env_var_name = match.group(1)
			
 
				-            env_value = os.getenv(env_var_name)
			
 
				-            if env_value is None:
			
 
				-                print(f"⚠️ 警告: 环境变量 {env_var_name} 未设置，使用原值")
			
 
				-                return match.group(0)
			
 
				-            return env_value
			
 
				-        
			
 
				-        return re.sub(pattern, replace_env_var, value)
			
 
				-    
			
 
				-    def _is_image_generation_prompt(self, prompt_name: str) -> bool:
			
 
				-        """
			
 
				-        判断是否为图片生成相关的提示词
			
 
				-        
			
 
				-        Args:
			
 
				-            prompt_name: 提示词名称
			
 
				-            
			
 
				-        Returns:
			
 
				-            True if 是图片生成任务
			
 
				-        """
			
 
				-        image_generation_prompts = [
			
 
				-            'photo_restore_classroom',
			
 
				-            'photo_restore_advanced', 
			
 
				-            'photo_colorize_classroom',
			
 
				-            'simple_photo_fix'
			
 
				-        ]
			
 
				-        return prompt_name in image_generation_prompts
			
 
				-    
			
 
				-    def _extract_base64_image(self, response_text: str) -> Optional[str]:
			
 
				-        """
			
 
				-        从响应文本中提取base64编码的图片
			
 
				-        
			
 
				-        Args:
			
 
				-            response_text: API响应文本
			
 
				-            
			
 
				-        Returns:
			
 
				-            base64编码的图片数据，如果没找到返回None
			
 
				-        """
			
 
				-        # 常见的base64图片数据格式
			
 
				-        patterns = [
			
 
				-            r'data:image/[^;]+;base64,([A-Za-z0-9+/=]+)',  # data URL格式
			
 
				-            r'base64:([A-Za-z0-9+/=]{100,})',              # base64:前缀
			
 
				-            r'```base64\s*\n([A-Za-z0-9+/=\s]+)\n```',     # markdown代码块
			
 
				-            r'<img[^>]*src="data:image/[^;]+;base64,([A-Za-z0-9+/=]+)"[^>]*>',  # HTML img标签
			
 
				-        ]
			
 
				-        
			
 
				-        for pattern in patterns:
			
 
				-            match = re.search(pattern, response_text, re.MULTILINE | re.DOTALL)
			
 
				-            if match:
			
 
				-                base64_data = match.group(1).replace('\n', '').replace(' ', '')
			
 
				-                if len(base64_data) > 1000:  # 合理的图片大小
			
 
				-                    return base64_data
			
 
				-        
			
 
				-        return None
			
 
				-    
			
 
				-    def list_models(self) -> None:
			
 
				-        """列出所有可用的模型"""
			
 
				-        print("📋 可用模型列表:")
			
 
				-        for model_key, model_config in self.config['models'].items():
			
 
				-            resolved_api_key = self._resolve_env_variable(model_config['api_key'])
			
 
				-            api_key_status = "✅ 已配置" if resolved_api_key else "❌ 未配置"
			
 
				-            
			
 
				-            print(f"  🤖 {model_key}: {model_config['name']}")
			
 
				-            print(f"      API地址: {model_config['api_base']}")
			
 
				-            print(f"      模型ID: {model_config['model_id']}")
			
 
				-            print(f"      API密钥: {api_key_status}")
			
 
				-            print()
			
 
				-    
			
 
				-    def list_prompts(self) -> None:
			
 
				-        """列出所有可用的提示词模板"""
			
 
				-        print("📝 可用提示词模板:")
			
 
				-        for prompt_key, prompt_config in self.config['prompts'].items():
			
 
				-            is_image_gen = self._is_image_generation_prompt(prompt_key)
			
 
				-            task_type = "🖼️ 图片生成" if is_image_gen else "📝 文本生成"
			
 
				-            
			
 
				-            print(f"  💬 {prompt_key}: {prompt_config['name']} ({task_type})")
			
 
				-            # 显示模板的前100个字符
			
 
				-            template_preview = prompt_config['template'][:100].replace('\n', ' ')
			
 
				-            print(f"      预览: {template_preview}...")
			
 
				-            print()
			
 
				-    
			
 
				-    def get_model_config(self, model_name: str) -> Dict[str, Any]:
			
 
				-        """获取模型配置"""
			
 
				-        if model_name not in self.config['models']:
			
 
				-            raise ValueError(f"未找到模型配置: {model_name}，可用模型: {list(self.config['models'].keys())}")
			
 
				-        
			
 
				-        model_config = self.config['models'][model_name].copy()
			
 
				-        
			
 
				-        # 解析环境变量
			
 
				-        model_config['api_key'] = self._resolve_env_variable(model_config['api_key'])
			
 
				-        
			
 
				-        return model_config
			
 
				-    
			
 
				-    def get_prompt_template(self, prompt_name: str) -> str:
			
 
				-        """获取提示词模板"""
			
 
				-        if prompt_name not in self.config['prompts']:
			
 
				-            raise ValueError(f"未找到提示词模板: {prompt_name}，可用模板: {list(self.config['prompts'].keys())}")
			
 
				-        
			
 
				-        return self.config['prompts'][prompt_name]['template']
			
 
				-    
			
 
				-    def normalize_financial_numbers(self, text: str) -> str:
			
 
				-        """
			
 
				-        标准化财务数字：将全角字符转换为半角字符
			
 
				-        """
			
 
				-        if not text:
			
 
				-            return text
			
 
				-        
			
 
				-        # 定义全角到半角的映射
			
 
				-        fullwidth_to_halfwidth = {
			
 
				-            '０': '0', '１': '1', '２': '2', '３': '3', '４': '4',
			
 
				-            '５': '5', '６': '6', '７': '7', '８': '8', '９': '9',
			
 
				-            '，': ',', '。': '.', '．': '.', '：': ':',
			
 
				-            '；': ';', '（': '(', '）': ')', '－': '-',
			
 
				-            '＋': '+', '％': '%',
			
 
				-        }
			
 
				-        
			
 
				-        # 执行字符替换
			
 
				-        normalized_text = text
			
 
				-        for fullwidth, halfwidth in fullwidth_to_halfwidth.items():
			
 
				-            normalized_text = normalized_text.replace(fullwidth, halfwidth)
			
 
				-        
			
 
				-        return normalized_text
			
 
				-    
			
 
				-    def process_image(self, 
			
 
				-                     image_path: str,
			
 
				-                     model_name: Optional[str] = None,
			
 
				-                     prompt_name: Optional[str] = None,
			
 
				-                     output_dir: str = "./output",
			
 
				-                     temperature: Optional[float] = None,
			
 
				-                     max_tokens: Optional[int] = None,
			
 
				-                     timeout: Optional[int] = None,
			
 
				-                     normalize_numbers: Optional[bool] = None,
			
 
				-                     custom_prompt: Optional[str] = None) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        处理单张图片
			
 
				-        
			
 
				-        Args:
			
 
				-            image_path: 图片路径
			
 
				-            model_name: 模型名称
			
 
				-            prompt_name: 提示词模板名称
			
 
				-            output_dir: 输出目录
			
 
				-            temperature: 生成温度
			
 
				-            max_tokens: 最大token数
			
 
				-            timeout: 超时时间
			
 
				-            normalize_numbers: 是否标准化数字
			
 
				-            custom_prompt: 自定义提示词（优先级高于prompt_name）
			
 
				-        
			
 
				-        Returns:
			
 
				-            处理结果字典
			
 
				-        """
			
 
				-        # 使用默认值或配置值
			
 
				-        model_name = model_name or self.config['default']['model']
			
 
				-        prompt_name = prompt_name or self.config['default']['prompt']
			
 
				-        
			
 
				-        # 判断是否为图片生成任务
			
 
				-        is_image_generation = custom_prompt is None and self._is_image_generation_prompt(prompt_name)
			
 
				-        
			
 
				-        # 图片生成任务默认不进行数字标准化
			
 
				-        if is_image_generation:
			
 
				-            normalize_numbers = False
			
 
				-            print(f"🖼️ 检测到图片生成任务，自动禁用数字标准化")
			
 
				-        else:
			
 
				-            normalize_numbers = normalize_numbers if normalize_numbers is not None else self.config['default']['normalize_numbers']
			
 
				-        
			
 
				-        # 获取模型配置
			
 
				-        model_config = self.get_model_config(model_name)
			
 
				-        
			
 
				-        # 设置参数，优先使用传入的参数
			
 
				-        temperature = temperature if temperature is not None else model_config['default_params']['temperature']
			
 
				-        max_tokens = max_tokens if max_tokens is not None else model_config['default_params']['max_tokens']
			
 
				-        timeout = timeout if timeout is not None else model_config['default_params']['timeout']
			
 
				-        
			
 
				-        # 获取提示词
			
 
				-        if custom_prompt:
			
 
				-            prompt = custom_prompt
			
 
				-            print(f"🎯 使用自定义提示词")
			
 
				-        else:
			
 
				-            prompt = self.get_prompt_template(prompt_name)
			
 
				-            task_type = "图片生成" if is_image_generation else "文本分析"
			
 
				-            print(f"🎯 使用提示词模板: {prompt_name} ({task_type})")
			
 
				-        
			
 
				-        # 读取图片文件并转换为base64
			
 
				-        if not Path(image_path).exists():
			
 
				-            raise FileNotFoundError(f"找不到图片文件: {image_path}")
			
 
				-        
			
 
				-        with open(image_path, "rb") as image_file:
			
 
				-            image_data = base64.b64encode(image_file.read()).decode('utf-8')
			
 
				-        
			
 
				-        # 获取图片的MIME类型
			
 
				-        file_extension = Path(image_path).suffix.lower()
			
 
				-        mime_type_map = {
			
 
				-            '.jpg': 'image/jpeg',
			
 
				-            '.jpeg': 'image/jpeg',
			
 
				-            '.png': 'image/png',
			
 
				-            '.gif': 'image/gif',
			
 
				-            '.webp': 'image/webp'
			
 
				-        }
			
 
				-        mime_type = mime_type_map.get(file_extension, 'image/jpeg')
			
 
				-        
			
 
				-        # 创建OpenAI客户端
			
 
				-        client = OpenAI(
			
 
				-            api_key=model_config['api_key'] or "dummy-key",
			
 
				-            base_url=model_config['api_base']
			
 
				-        )
			
 
				-        
			
 
				-        # 构建消息
			
 
				-        messages = [
			
 
				-            {
			
 
				-                "role": "user",
			
 
				-                "content": [
			
 
				-                    {
			
 
				-                        "type": "text",
			
 
				-                        "text": prompt
			
 
				-                    },
			
 
				-                    {
			
 
				-                        "type": "image_url",
			
 
				-                        "image_url": {
			
 
				-                            "url": f"data:{mime_type};base64,{image_data}"
			
 
				-                        }
			
 
				-                    }
			
 
				-                ]
			
 
				-            }
			
 
				-        ]
			
 
				-        
			
 
				-        # 显示处理信息
			
 
				-        print(f"\n🚀 开始处理图片: {Path(image_path).name}")
			
 
				-        print(f"🤖 使用模型: {model_config['name']} ({model_name})")
			
 
				-        print(f"🌐 API地址: {model_config['api_base']}")
			
 
				-        print(f"🔧 参数配置:")
			
 
				-        print(f"   - 温度: {temperature}")
			
 
				-        print(f"   - 最大Token: {max_tokens}")
			
 
				-        print(f"   - 超时时间: {timeout}秒")
			
 
				-        print(f"   - 数字标准化: {'启用' if normalize_numbers else '禁用'}")
			
 
				-        print(f"   - 任务类型: {'图片生成' if is_image_generation else '文本分析'}")
			
 
				-        
			
 
				-        try:
			
 
				-            # 调用API
			
 
				-            response = client.chat.completions.create(
			
 
				-                model=model_config['model_id'],
			
 
				-                messages=messages,
			
 
				-                temperature=temperature,
			
 
				-                max_tokens=max_tokens,
			
 
				-                timeout=timeout
			
 
				-            )
			
 
				-            
			
 
				-            # 提取响应内容
			
 
				-            generated_text = response.choices[0].message.content
			
 
				-            
			
 
				-            if not generated_text:
			
 
				-                raise Exception("模型没有生成内容")
			
 
				-            
			
 
				-            # 处理图片生成结果
			
 
				-            if is_image_generation:
			
 
				-                # 尝试提取base64图片数据
			
 
				-                base64_image = self._extract_base64_image(generated_text)
			
 
				-                if base64_image:
			
 
				-                    print("🖼️ 检测到生成的图片数据")
			
 
				-                    return self._save_image_results(
			
 
				-                        image_path=image_path,
			
 
				-                        output_dir=output_dir,
			
 
				-                        generated_text=generated_text,
			
 
				-                        base64_image=base64_image,
			
 
				-                        model_name=model_name,
			
 
				-                        prompt_name=prompt_name,
			
 
				-                        model_config=model_config,
			
 
				-                        processing_params={
			
 
				-                            'temperature': temperature,
			
 
				-                            'max_tokens': max_tokens,
			
 
				-                            'timeout': timeout,
			
 
				-                            'normalize_numbers': normalize_numbers,
			
 
				-                            'custom_prompt_used': custom_prompt is not None,
			
 
				-                            'is_image_generation': True
			
 
				-                        }
			
 
				-                    )
			
 
				-                else:
			
 
				-                    print("⚠️ 未检测到图片数据，保存为文本结果")
			
 
				-            
			
 
				-            # 标准化数字格式（如果启用）
			
 
				-            original_text = generated_text
			
 
				-            if normalize_numbers:
			
 
				-                print("🔧 正在标准化数字格式...")
			
 
				-                generated_text = self.normalize_financial_numbers(generated_text)
			
 
				-                
			
 
				-                # 统计标准化的变化
			
 
				-                changes_count = len([1 for o, n in zip(original_text, generated_text) if o != n])
			
 
				-                if changes_count > 0:
			
 
				-                    print(f"✅ 已标准化 {changes_count} 个字符（全角→半角）")
			
 
				-                else:
			
 
				-                    print("ℹ️ 无需标准化（已是标准格式）")
			
 
				-            
			
 
				-            print(f"✅ 成功完成处理!")
			
 
				-            
			
 
				-            # 保存文本结果
			
 
				-            return self._save_text_results(
			
 
				-                image_path=image_path,
			
 
				-                output_dir=output_dir,
			
 
				-                generated_text=generated_text,
			
 
				-                original_text=original_text,
			
 
				-                model_name=model_name,
			
 
				-                prompt_name=prompt_name,
			
 
				-                model_config=model_config,
			
 
				-                processing_params={
			
 
				-                    'temperature': temperature,
			
 
				-                    'max_tokens': max_tokens,
			
 
				-                    'timeout': timeout,
			
 
				-                    'normalize_numbers': normalize_numbers,
			
 
				-                    'custom_prompt_used': custom_prompt is not None,
			
 
				-                    'is_image_generation': is_image_generation
			
 
				-                }
			
 
				-            )
			
 
				-            
			
 
				-        except Exception as e:
			
 
				-            print(f"❌ 处理失败: {e}")
			
 
				-            raise
			
 
				-    
			
 
				-    def _save_image_results(self, 
			
 
				-                           image_path: str,
			
 
				-                           output_dir: str,
			
 
				-                           generated_text: str,
			
 
				-                           base64_image: str,
			
 
				-                           model_name: str,
			
 
				-                           prompt_name: str,
			
 
				-                           model_config: Dict[str, Any],
			
 
				-                           processing_params: Dict[str, Any]) -> Dict[str, Any]:
			
 
				-        """保存图片生成结果"""
			
 
				-        # 创建输出目录
			
 
				-        output_path = Path(output_dir)
			
 
				-        output_path.mkdir(parents=True, exist_ok=True)
			
 
				-        
			
 
				-        # 生成输出文件名
			
 
				-        base_name = Path(image_path).stem
			
 
				-        timestamp = time.strftime("%Y%m%d_%H%M%S")
			
 
				-        
			
 
				-        # 保存生成的图片
			
 
				-        try:
			
 
				-            image_bytes = base64.b64decode(base64_image)
			
 
				-            image_file = output_path / f"{base_name}_{model_name}_{prompt_name}_{timestamp}.png"
			
 
				-            
			
 
				-            with open(image_file, 'wb') as f:
			
 
				-                f.write(image_bytes)
			
 
				-            print(f"🖼️ 生成的图片已保存到: {image_file}")
			
 
				-            
			
 
				-        except Exception as e:
			
 
				-            print(f"❌ 图片保存失败: {e}")
			
 
				-            # 如果图片保存失败，保存为文本
			
 
				-            text_file = output_path / f"{base_name}_{model_name}_{prompt_name}_{timestamp}.txt"
			
 
				-            with open(text_file, 'w', encoding='utf-8') as f:
			
 
				-                f.write(generated_text)
			
 
				-            print(f"📄 响应内容已保存为文本: {text_file}")
			
 
				-            image_file = text_file
			
 
				-        
			
 
				-        # 保存原始响应文本（包含可能的说明文字）
			
 
				-        if len(generated_text.strip()) > len(base64_image) + 100:  # 如果有额外的说明文字
			
 
				-            description_file = output_path / f"{base_name}_{model_name}_{prompt_name}_{timestamp}_description.txt"
			
 
				-            with open(description_file, 'w', encoding='utf-8') as f:
			
 
				-                f.write(generated_text)
			
 
				-            print(f"📝 响应说明已保存到: {description_file}")
			
 
				-        
			
 
				-        # 保存元数据
			
 
				-        metadata = {
			
 
				-            "processing_info": {
			
 
				-                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
			
 
				-                "image_path": Path(image_path).resolve().as_posix(),
			
 
				-                "output_file": image_file.resolve().as_posix(),
			
 
				-                "model_used": model_name,
			
 
				-                "model_config": model_config,
			
 
				-                "prompt_template": prompt_name,
			
 
				-                "processing_params": processing_params,
			
 
				-                "result_type": "image",
			
 
				-                "text_stats": {
			
 
				-                    "response_length": len(generated_text),
			
 
				-                    "has_image_data": True,
			
 
				-                    "base64_length": len(base64_image)
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-        
			
 
				-        metadata_file = output_path / f"{base_name}_{model_name}_{prompt_name}_{timestamp}_metadata.json"
			
 
				-        with open(metadata_file, 'w', encoding='utf-8') as f:
			
 
				-            json.dump(metadata, f, ensure_ascii=False, indent=2)
			
 
				-        print(f"📊 元数据已保存到: {metadata_file}")
			
 
				-        
			
 
				-        return metadata
			
 
				-    
			
 
				-    def _save_text_results(self, 
			
 
				-                          image_path: str,
			
 
				-                          output_dir: str,
			
 
				-                          generated_text: str,
			
 
				-                          original_text: str,
			
 
				-                          model_name: str,
			
 
				-                          prompt_name: str,
			
 
				-                          model_config: Dict[str, Any],
			
 
				-                          processing_params: Dict[str, Any]) -> Dict[str, Any]:
			
 
				-        """保存文本结果"""
			
 
				-        # 创建输出目录
			
 
				-        output_path = Path(output_dir)
			
 
				-        output_path.mkdir(parents=True, exist_ok=True)
			
 
				-        
			
 
				-        # 生成输出文件名
			
 
				-        base_name = Path(image_path).stem
			
 
				-        
			
 
				-        # 保存主结果文件
			
 
				-        if prompt_name in ['ocr_standard', 'table_extract']:
			
 
				-            # OCR相关任务保存为Markdown格式
			
 
				-            result_file = output_path / f"{base_name}_{model_name}.md"
			
 
				-            with open(result_file, 'w', encoding='utf-8') as f:
			
 
				-                f.write(generated_text)
			
 
				-            print(f"📄 结果已保存到: {result_file}")
			
 
				-        else:
			
 
				-            # 其他任务保存为文本格式
			
 
				-            result_file = output_path / f"{base_name}_{model_name}_{prompt_name}.txt"
			
 
				-            with open(result_file, 'w', encoding='utf-8') as f:
			
 
				-                f.write(generated_text)
			
 
				-            print(f"📄 结果已保存到: {result_file}")
			
 
				-        
			
 
				-        # 如果进行了数字标准化，保存原始版本
			
 
				-        if processing_params['normalize_numbers'] and original_text != generated_text:
			
 
				-            original_file = output_path / f"{base_name}_{model_name}_original.txt"
			
 
				-            with open(original_file, 'w', encoding='utf-8') as f:
			
 
				-                f.write(original_text)
			
 
				-            print(f"📄 原始结果已保存到: {original_file}")
			
 
				-        
			
 
				-        # 保存元数据
			
 
				-        metadata = {
			
 
				-            "processing_info": {
			
 
				-                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
			
 
				-                "image_path": Path(image_path).resolve().as_posix(),
			
 
				-                "output_file": result_file.resolve().as_posix(),
			
 
				-                "model_used": model_name,
			
 
				-                "model_config": model_config,
			
 
				-                "prompt_template": prompt_name,
			
 
				-                "processing_params": processing_params,
			
 
				-                "result_type": "text",
			
 
				-                "text_stats": {
			
 
				-                    "original_length": len(original_text),
			
 
				-                    "final_length": len(generated_text),
			
 
				-                    "character_changes": len([1 for o, n in zip(original_text, generated_text) if o != n]) if processing_params['normalize_numbers'] else 0
			
 
				-                }
			
 
				-            }
			
 
				-        }
			
 
				-        
			
 
				-        metadata_file = output_path / f"{base_name}_{model_name}_metadata.json"
			
 
				-        with open(metadata_file, 'w', encoding='utf-8') as f:
			
 
				-            json.dump(metadata, f, ensure_ascii=False, indent=2)
			
 
				-        print(f"📊 元数据已保存到: {metadata_file}")
			
 
				-        
			
 
				-        return metadata
			
 
				-
			
 
				-
			
 
				-def main():
			
 
				-    """主函数"""
			
 
				-    parser = argparse.ArgumentParser(description='本地VLM图片处理工具')
			
 
				-    
			
 
				-    # 基本参数
			
 
				-    parser.add_argument('image_path', nargs='?', help='图片文件路径')
			
 
				-    parser.add_argument('-c', '--config', default='config.yaml', help='配置文件路径')
			
 
				-    parser.add_argument('-o', '--output', default='./output', help='输出目录')
			
 
				-    
			
 
				-    # 模型和提示词选择
			
 
				-    parser.add_argument('-m', '--model', help='模型名称')
			
 
				-    parser.add_argument('-p', '--prompt', help='提示词模板名称')
			
 
				-    parser.add_argument('--custom-prompt', help='自定义提示词（优先级高于-p参数）')
			
 
				-    
			
 
				-    # 处理参数
			
 
				-    parser.add_argument('-t', '--temperature', type=float, help='生成温度')
			
 
				-    parser.add_argument('--max-tokens', type=int, help='最大token数')
			
 
				-    parser.add_argument('--timeout', type=int, help='超时时间（秒）')
			
 
				-    parser.add_argument('--no-normalize', action='store_true', help='禁用数字标准化, 只有提取表格或ocr相关任务才启用')
			
 
				-    
			
 
				-    # 信息查询
			
 
				-    parser.add_argument('--list-models', action='store_true', help='列出所有可用模型')
			
 
				-    parser.add_argument('--list-prompts', action='store_true', help='列出所有提示词模板')
			
 
				-    
			
 
				-    args = parser.parse_args()
			
 
				-    
			
 
				-    try:
			
 
				-        # 初始化处理器
			
 
				-        processor = LocalVLMProcessor(args.config)
			
 
				-        
			
 
				-        # 处理信息查询请求
			
 
				-        if args.list_models:
			
 
				-            processor.list_models()
			
 
				-            return 0
			
 
				-            
			
 
				-        if args.list_prompts:
			
 
				-            processor.list_prompts()
			
 
				-            return 0
			
 
				-        
			
 
				-        # 检查是否提供了图片路径
			
 
				-        if not args.image_path:
			
 
				-            print("❌ 错误: 请提供图片文件路径")
			
 
				-            print("\n使用示例:")
			
 
				-            print("  python local_vlm_processor.py image.jpg")
			
 
				-            print("  python local_vlm_processor.py image.jpg -m qwen2_vl -p photo_analysis")
			
 
				-            print("  python local_vlm_processor.py image.jpg -p simple_photo_fix  # 图片修复")
			
 
				-            print("  python local_vlm_processor.py --list-models")
			
 
				-            print("  python local_vlm_processor.py --list-prompts")
			
 
				-            return 1
			
 
				-        
			
 
				-        # 处理图片
			
 
				-        result = processor.process_image(
			
 
				-            image_path=args.image_path,
			
 
				-            model_name=args.model,
			
 
				-            prompt_name=args.prompt,
			
 
				-            output_dir=args.output,
			
 
				-            temperature=args.temperature,
			
 
				-            max_tokens=args.max_tokens,
			
 
				-            timeout=args.timeout,
			
 
				-            normalize_numbers=not args.no_normalize,
			
 
				-            custom_prompt=args.custom_prompt
			
 
				-        )
			
 
				-        
			
 
				-        print(f"\n🎉 处理完成！")
			
 
				-        print(f"📊 处理统计:")
			
 
				-        
			
 
				-        if result['processing_info']['result_type'] == 'image':
			
 
				-            stats = result['processing_info']['text_stats']
			
 
				-            print(f"   响应长度: {stats['response_length']} 字符")
			
 
				-            print(f"   图片数据: {'包含' if stats['has_image_data'] else '不包含'}")
			
 
				-            if stats['has_image_data']:
			
 
				-                print(f"   Base64长度: {stats['base64_length']} 字符")
			
 
				-        else:
			
 
				-            stats = result['processing_info']['text_stats']
			
 
				-            print(f"   原始长度: {stats['original_length']} 字符")
			
 
				-            print(f"   最终长度: {stats['final_length']} 字符")
			
 
				-            if stats['character_changes'] > 0:
			
 
				-                print(f"   标准化变更: {stats['character_changes']} 字符")
			
 
				-        
			
 
				-        return 0
			
 
				-        
			
 
				-    except Exception as e:
			
 
				-        print(f"❌ 程序执行失败: {e}")
			
 
				-        return 1
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    # 如果sys.argv没有被传入参数，则提供默认参数用于测试
			
 
				-    import sys
			
 
				-    if len(sys.argv) == 1:
			
 
				-        sys.argv.extend([
			
 
				-            '../sample_data/工大照片-1.jpg', 
			
 
				-			'-p', 'simple_photo_fix',
			
 
				-            '-o', './output', 
			
 
				-            '--no-normalize'])
			
 
				-
			
 
				-    exit(main())