5 mesiacov pred · 939a8a49d3
--- a/llmops/agents/report_agent.py
+++ b/llmops/agents/report_agent.py
@@ -0,0 +1,208 @@
 
				+"""
			
 
				+报告大纲生成Agent (Report Outline Generation Agent)
			
 
				+===============================================
			
 
				+
			
 
				+此Agent负责根据用户需求和数据样本，生成专业的报告大纲结构。
			
 
				+
			
 
				+核心功能：
			
 
				+1. 分析用户需求：理解报告目标和关键指标
			
 
				+2. 数据结构分析：识别可用字段和数据特征
			
 
				+3. 大纲生成：创建结构化的报告章节和指标需求
			
 
				+4. 智能推断：自动推断所需字段和计算逻辑
			
 
				+
			
 
				+工作流程：
			
 
				+1. 接收用户查询和数据样本
			
 
				+2. 分析数据结构和可用字段
			
 
				+3. 生成报告标题和章节结构
			
 
				+4. 定义全局指标需求
			
 
				+5. 返回结构化的大纲对象
			
 
				+
			
 
				+技术实现：
			
 
				+- 使用LangChain和结构化输出
			
 
				+- 支持异步处理
			
 
				+- 自动字段推断和补全
			
 
				+- 错误处理和默认值提供
			
 
				+
			
 
				+作者: Big Agent Team
			
 
				+版本: 1.0.0
			
 
				+创建时间: 2024-12-20
			
 
				+"""
			
 
				+
			
 
				+from typing import List, Dict, Any
			
 
				+from langchain_openai import ChatOpenAI
			
 
				+from langchain_core.prompts import ChatPromptTemplate
			
 
				+from llmops.config import enable_kp_rc_prompts
			
 
				+
			
 
				+
			
 
				+class ReportSectionGeneratorAgent:
			
 
				+    """报告章节内容生成智能体"""
			
 
				+
			
 
				+    def __init__(self, api_key: str, base_url: str = "https://api.deepseek.com", model_name: str = "deepseek-chat"):
			
 
				+        """
			
 
				+        初始化大纲生成Agent
			
 
				+
			
 
				+        Args:
			
 
				+            api_key: DeepSeek API密钥
			
 
				+            base_url: DeepSeek API基础URL
			
 
				+            model_name: 模型名称
			
 
				+        """
			
 
				+        self.llm = ChatOpenAI(
			
 
				+            model=model_name,
			
 
				+            api_key=api_key,
			
 
				+            base_url=base_url,
			
 
				+            temperature=0.1
			
 
				+        )
			
 
				+
			
 
				+    def get_prompt(self):
			
 
				+        """
			
 
				+        获取报告章节写作提示词模板, 根据配置开关动态获取
			
 
				+        """
			
 
				+        prompt = ""
			
 
				+        if enable_kp_rc_prompts == 1: # 从知识沉淀平台获取
			
 
				+            prompt = self._get_prompt_from_klg()
			
 
				+        if len(prompt) == 0: # 获取默认提示词
			
 
				+            prompt = self._get_base_prompt()
			
 
				+        return prompt
			
 
				+
			
 
				+    def _get_prompt_from_klg(self):
			
 
				+        """
			
 
				+        从知识沉淀平台获取报告章节内容提示词模板（预留）
			
 
				+        可能包括的步骤：
			
 
				+        1、对应接口（通过配置指定） post 请求
			
 
				+        2、获取结果，重新解析组织
			
 
				+        3、异常情况处理
			
 
				+        """
			
 
				+        try:
			
 
				+            pass
			
 
				+        except Exception as e:
			
 
				+            return ""
			
 
				+
			
 
				+    def _get_base_prompt(self):
			
 
				+        """
			
 
				+        获取基础（默认）报告章节写作提示词
			
 
				+        """
			
 
				+        template = """
			
 
				+            ## 基本要求
			
 
				+            你是一位专业的报告撰写专家，需要基于以下参数生成高质量的章节内容。
			
 
				+    
			
 
				+            ## 输入参数
			
 
				+            1. ** 章节标题 **：{title}
			
 
				+            2. ** 写作范围 **：{writing_scope}
			
 
				+            3. ** 指标要求 **：{indicators}
			
 
				+    
			
 
				+            ## 生成要求
			
 
				+            请按照以下结构生成内容：
			
 
				+    
			
 
				+            ### 1. 章节开头（引言部分）
			
 
				+            - 简要说明本章节的核心主题
			
 
				+            - 阐述本章节在整体报告中的定位和作用
			
 
				+            - 概述将要分析的主要内容和逻辑脉络
			
 
				+
			
 
				+            ### 2. 主体内容分析
			
 
				+            ** 基于写作范围和指标要求，具体包含： **
			
 
				+    
			
 
				+            #### a) 数据/现状分析
			
 
				+            - 对相关指标进行系统性分析
			
 
				+            - 使用数据支持观点（如提供具体数据）
			
 
				+
			
 
				+            #### b) 问题/趋势识别
			
 
				+            - 识别当前存在的主要问题
			
 
				+            - 分析发展趋势和潜在机遇
			
 
				+
			
 
				+            #### c) 深度解读
			
 
				+            - 对关键指标进行深入解读
			
 
				+            - 分析指标间的相互关系和影响
			
 
				+
			
 
				+            ### 3. 章节总结
			
 
				+            - 归纳本章核心发现
			
 
				+            - 提出关键结论
			
 
				+            - 引出可能的建议或下一步分析方向
			
 
				+
			
 
				+            ## 写作风格要求
			
 
				+            - 专业、严谨、客观
			
 
				+            - 数据驱动，避免主观臆断
			
 
				+            - 逻辑清晰，层次分明
			
 
				+            - 语言精炼，避免冗余
			
 
				+            - 使用适当的学术 / 行业术语
			
 
				+
			
 
				+            ## 格式要求
			
 
				+            - 使用Markdown格式
			
 
				+            - 适当使用标题层级（  ##、###等）
			
 
				+            - 重要观点可使用 ** 加粗 ** 强调
			
 
				+            - 数据可使用表格或列表清晰呈现
			
 
				+
			
 
				+            请开始生成章节内容：
			
 
				+            """
			
 
				+
			
 
				+        return template
			
 
				+
			
 
				+    async def generate_section_content(self, section: Dict[str, Any]) -> str:
			
 
				+        """异步生成报告章节内容"""
			
 
				+        # 获取报告章节写作提示词
			
 
				+        template = self.get_prompt()
			
 
				+        pt = ChatPromptTemplate.from_template(template)
			
 
				+        chain = pt | self.llm
			
 
				+        response = await chain.ainvoke({
			
 
				+            "title": section["title"],
			
 
				+            "writing_scope": section["description"],
			
 
				+            "indicators": section["metrics"]})
			
 
				+
			
 
				+        # 解析JSON响应
			
 
				+        try:
			
 
				+            # 从响应中提取JSON内容
			
 
				+            section_content = response.content if hasattr(response, 'content') else str(response)
			
 
				+        except Exception as e:
			
 
				+            print(f"生成报告章节内容失败: {e}")
			
 
				+            raise ValueError(f"生成报告章节:{section['title']}异常, {str(e)}")
			
 
				+
			
 
				+        return section_content
			
 
				+
			
 
				+async def generate_report_section_content(api_key: str, base_url: str, model_name: str, section: Dict[str, Any], max_retries: int = 2) -> str:
			
 
				+    """
			
 
				+    生成报告章节内容(可以重试)
			
 
				+
			
 
				+    Args:
			
 
				+        api_key: API密钥
			
 
				+        base_url: LLM base url
			
 
				+        model_name: LLM model name
			
 
				+        section：章节对象
			
 
				+        max_retries: 重试次数
			
 
				+
			
 
				+    Returns:
			
 
				+        生成的章节内容
			
 
				+    """
			
 
				+    import asyncio
			
 
				+    import time
			
 
				+
			
 
				+    agent = ReportSectionGeneratorAgent(api_key=api_key, base_url=base_url, model_name=model_name)
			
 
				+
			
 
				+    print(f"📝 开始生成报告章节:{section['title']} 内容（最多重试 {max_retries} 次）...")
			
 
				+    section_content = ""
			
 
				+    for attempt in range(max_retries):
			
 
				+        try:
			
 
				+            print(f"   尝试 {attempt + 1}/{max_retries}...")
			
 
				+            start_time = time.time()
			
 
				+            # 生成章节内容
			
 
				+            section_content = await agent.generate_section_content(section)
			
 
				+            elapsed_time = time.time() - start_time
			
 
				+            print(f"{elapsed_time:.2f}")
			
 
				+            print(f"\n📝 章节{section['title']}生成成功：")
			
 
				+
			
 
				+            return section_content
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            elapsed_time = time.time() - start_time if 'start_time' in locals() else 0
			
 
				+            print(f"   错误详情: {str(e)}")
			
 
				+
			
 
				+            # 如果不是最后一次尝试，等待后重试
			
 
				+            if attempt < max_retries - 1:
			
 
				+                print(f"   ⏳ {retry_delay} 秒后进行第 {attempt + 2} 次重试...")
			
 
				+                await asyncio.sleep(retry_delay)
			
 
				+                # 增加重试间隔，避免频繁调用
			
 
				+                retry_delay = min(retry_delay * 1.5, 10.0)  # 最多等待10秒
			
 
				+            else:
			
 
				+                print(f"   ❌ 生成报告章节内容{section['title']} 已达到最大重试次数 ({max_retries})")
			
 
				+
			
 
				+    # 所有重试都失败后，使用默认结构
			
 
				+    print("⚠️ 所有重试均失败，使用默认大纲结构")
			
 
				+
			
--- a/llmops/complete_agent_flow_rule.py
+++ b/llmops/complete_agent_flow_rule.py
@@ -30,7 +30,7 @@
 
				 import asyncio
			
 
				 from typing import Dict, Any, List
			
 
				 from datetime import datetime
			
 
				-from langgraph.graph import StateGraph, START, END
			
 
				+from langgraph.graph import StateGraph, END
			
 
				 
			
 
				 from llmops.workflow_state import (
			
 
				     IntegratedWorkflowState,
			
@@ -49,8 +49,9 @@ from llmops.agents.rules_engine_metric_calculation_agent import RulesEngineMetri
 
				 from llmops.agents.data_manager import DataManager
			
 
				 import os
			
 
				 from llmops.agents.data_classify_agent import data_classify
			
 
				-from llmops.config import DEEPSEEK_API_KEY, multimodal_api_url, LLM_API_KEY, LLM_BASE_URL, LLM_MODEL_NAME
			
 
				+from llmops.config import multimodal_api_url, LLM_API_KEY, LLM_BASE_URL, LLM_MODEL_NAME
			
 
				 from llmops.agents.data_stardard import data_standardize
			
 
				+from llmops.agents.report_agent import generate_report_section_content
			
 
				 
			
 
				 class CompleteAgentFlow:
			
 
				     """完整的智能体工作流"""
			
@@ -372,6 +373,10 @@ class CompleteAgentFlow:
 
				                         else:
			
 
				                             section_content["metrics"][metric_id] = "数据缺失"
			
 
				 
			
 
				+                # 生成章节内容
			
 
				+                chapter_content = await generate_report_section_content(api_key=self.api_key, base_url=self.base_url, model_name=self.model_name, section=section_content)
			
 
				+                print(f"生成章节内容：{chapter_content}")
			
 
				+                section_content["content"] = chapter_content
			
 
				                 final_report["sections"].append(section_content)
			
 
				 
			
 
				             # 添加详细的指标信息
			
@@ -679,7 +684,6 @@ class CompleteAgentFlow:
 
				         Args:
			
 
				             question: 用户查询
			
 
				             industry: 行业
			
 
				-            data: 数据集
			
 
				             original_file_path: 原始文件路径
			
 
				             session_id: 会话ID
			
 
				             use_rules_engine_only: 是否只使用规则引擎指标计算
			
@@ -693,7 +697,6 @@ class CompleteAgentFlow:
 
				             print(f"问题：{question}")
			
 
				             print(f"行业：{industry}")
			
 
				             print(f"数据文件：{original_file_path}")
			
 
				-            # print(f"数据条数：{len(data)}")
			
 
				 
			
 
				             if use_rules_engine_only:
			
 
				                 print("计算模式：只使用规则引擎")
			
@@ -791,8 +794,6 @@ async def run_flow(question: str, industry: str, original_file_path: str, api_ke
 
				     return await workflow.run_workflow(question, industry, original_file_path, session_id, use_rules_engine_only, use_traditional_engine_only)
			
 
				 
			
 
				 
			
 
				-
			
 
				-
			
 
				 # 主函数用于测试
			
 
				 async def main():
			
 
				     """主函数：执行系统测试"""
			
--- a/llmops/config.py
+++ b/llmops/config.py
@@ -125,4 +125,9 @@ LLM_MODEL_NAME=deepseek_v3_model["name"]
 
				 
			
 
				 # 多模型接口地址
			
 
				 multimodal_api_url = "http://103.154.31.78:20012/api/file/read"
			
 
				-#multimodal_api_url="http://10.192.72.11:6300/api/file/read"
			
 
				+#multimodal_api_url="http://10.192.72.11:6300/api/file/read"
			
 
				+
			
 
				+
			
 
				+# 是否从知识沉淀平台获取报告章节写作提示词
			
 
				+# 0:否 1:是
			
 
				+enable_kp_rc_prompts = 0