1
0

2 Commits 7ad1aad432 ... fb02e06147

Autor SHA1 Mensagem Data
  2643616413 fb02e06147 Merge branch 'master' of https://git.yangzhiqiang.tech/zsh/finrep-report há 8 horas atrás
  2643616413 71b5dadbaf python端接口开发 há 8 horas atrás

+ 10 - 6
algo/docs/API.md

@@ -329,14 +329,18 @@ RAG 索引使用进程内 **`InMemoryRagStore`**(按 `task_id` 隔离)。**
 | `task_id` | string | 否 | RAG 召回、`task_id` 隔离用 |
 | `tenant_id` | string | 否 | 租户 ID |
 | `report_type` | string | 否 | 报告类型 |
-| `paragraph_logic` | string | 否 | 撰写逻辑 |
-| `paragraph_position` | string | 否 | 段落定位 |
+| `paragraph_logic` | string | 否 | 兼容字段,不建议单独传;以 `template.paragraph_logic` 为准 |
+| `paragraph_position` | string | 否 | 兼容字段,不建议单独传;以 `template.paragraph_position` 为准 |
 | `overall_logic` | string | 否 | 全篇逻辑 |
 | `chapter_logic` | string | 否 | 章逻辑 |
 | `task_input` | object | 否 | 任务级输入 |
-| `data_package` | object | 否 | 数据包(召回结果会合并进来) |
-| `example` | string | 否 | 示例 |
-| `notes` | string | 否 | 备注 |
+| `template` | object | 否 | 知识单元模板(红色字段来源):可含 `paragraph_position`、`paragraph_logic`、`example`、`notes`;当对应顶层字段未显式传入时用于回填 |
+| `auto_data_package` | object | 否 | 自动获取的数据结果(原 `data_package_1`,可兼容旧字段名) |
+| `manual_supplement` | object | 否 | 人工补录结构化数据 |
+| `manual_supplement_text` | string | 否 | 人工补录文本 |
+| `data_package` | object | 否 | 最终数据包增量;服务端会与 `auto_data_package`、`manual_supplement`、`manual_supplement_text` 合并后使用(召回结果也合并进来) |
+| `example` | string | 否 | 兼容字段,不建议单独传;以 `template.example` 为准 |
+| `notes` | string | 否 | 兼容字段,不建议单独传;以 `template.notes` 为准 |
 | `rag_recall` | boolean | 否 | 默认 `false`;`true` 时需 `task_id` 且已配置 embedding/llm key |
 | `rag_query` | string | 否 | 召回查询;空则拼接 `paragraph_position`、`paragraph_logic`、`knowledge_unit_id` |
 | `rag_top_k` | integer | 否 | 传给 RAG;空则使用服务默认 `rag_default_top_k` |
@@ -349,7 +353,7 @@ RAG 索引使用进程内 **`InMemoryRagStore`**(按 `task_id` 隔离)。**
 | `generated_text` | string | 生成正文 |
 | `usage` | object | `TokenUsage`:`prompt_tokens`、`completion_tokens`(当前实现多为 0) |
 
-**`rag_recall=true`** 时:先 `retrieve`,将结果写入 `data_package["rag_recall"]`(含 `query`、`hits`、`formatted_context`),再渲染模板并调用 LLM
+RAG 召回建议在上游(Java/数据准备阶段)先完成,并将结果写入 `data_package` 作为正文素材;本接口不再执行内置召回合并逻辑
 
 **`FINREP_STUB_SKILLS=true`**:不请求 LLM,返回占位正文。
 

BIN
algo/src/finrep_algo_agent/schemas/__pycache__/section.cpython-312.pyc


+ 18 - 1
algo/src/finrep_algo_agent/schemas/section.py

@@ -2,7 +2,7 @@ from __future__ import annotations
 
 from typing import Any
 
-from pydantic import BaseModel, Field
+from pydantic import AliasChoices, BaseModel, Field
 
 
 class SectionRequest(BaseModel):
@@ -16,6 +16,23 @@ class SectionRequest(BaseModel):
     overall_logic: str = ""
     chapter_logic: str = ""
     task_input: dict[str, Any] = Field(default_factory=dict)
+    template: dict[str, Any] = Field(
+        default_factory=dict,
+        description="知识单元模板字段(红色):如 paragraph_position/paragraph_logic/example/notes",
+    )
+    auto_data_package: dict[str, Any] = Field(
+        default_factory=dict,
+        validation_alias=AliasChoices("auto_data_package", "data_package_1"),
+        description="自动获取的数据结果(兼容旧字段名 data_package_1)",
+    )
+    manual_supplement: dict[str, Any] = Field(
+        default_factory=dict,
+        description="人工补录结构化数据(可选)",
+    )
+    manual_supplement_text: str = Field(
+        default="",
+        description="人工补录文本(可选)",
+    )
     data_package: dict[str, Any] = Field(default_factory=dict)
     example: str = ""
     notes: str = ""

BIN
algo/src/finrep_algo_agent/skills/section_gen/__pycache__/section_gen.cpython-312.pyc


+ 33 - 34
algo/src/finrep_algo_agent/skills/section_gen/section_gen.py

@@ -13,6 +13,37 @@ logger = logging.getLogger(__name__)
 _ALLOWED_TEMPLATE_TYPES = frozenset({"info", "analysis", "metric", "judgment"})
 
 
+def _merge_dict(target: dict, patch: dict) -> dict:
+    for k, v in patch.items():
+        if isinstance(v, dict) and isinstance(target.get(k), dict):
+            target[k] = _merge_dict(dict(target[k]), v)
+        else:
+            target[k] = v
+    return target
+
+
+def _build_effective_request(req: SectionRequest) -> SectionRequest:
+    # 统一数据口径:最终 data_package 以自动结果与人工补录文本为主,
+    # 若上游已完成 RAG/整理并放入 data_package,则在此并入作为最终素材。
+    final_data = _merge_dict({}, dict(req.auto_data_package))
+    if req.manual_supplement_text.strip():
+        final_data["manual_supplement_text"] = req.manual_supplement_text.strip()
+    if req.data_package:
+        final_data = _merge_dict(final_data, dict(req.data_package))
+
+    # 红色字段来自顶层 template,不再单独由请求顶层 paragraph_* / example / notes 传入
+    template = req.template if isinstance(req.template, dict) else {}
+    return req.model_copy(
+        update={
+            "paragraph_position": str(template.get("paragraph_position", "")).strip(),
+            "paragraph_logic": str(template.get("paragraph_logic", "")).strip(),
+            "example": str(template.get("example", "")).strip(),
+            "notes": str(template.get("notes", "")).strip(),
+            "data_package": final_data,
+        }
+    )
+
+
 def _stub_section(req: SectionRequest) -> SectionResponse:
     text = (
         f"【占位正文】knowledge_unit_id={req.knowledge_unit_id} "
@@ -29,41 +60,9 @@ async def run_section(
     llm: LlmClient,
     rag: RagService | None = None,
 ) -> SectionResponse:
+    effective = _build_effective_request(req)
     if settings.stub_skills:
-        return _stub_section(req)
-
-    effective = req
-    if req.rag_recall:
-        if not req.task_id:
-            raise ValueError("rag_recall=true 时必须提供 task_id")
-        if rag is None:
-            raise ValueError("RagService 未注入,无法执行召回")
-        if not (settings.embedding_api_key or settings.llm_api_key):
-            raise ValueError("RAG 召回需配置 FINREP_EMBEDDING_API_KEY 或 FINREP_LLM_API_KEY")
-        q = (req.rag_query or "").strip()
-        if not q:
-            q = "\n".join(
-                s
-                for s in (
-                    (req.paragraph_position or "").strip(),
-                    (req.paragraph_logic or "").strip(),
-                    f"knowledge_unit_id:{req.knowledge_unit_id}",
-                )
-                if s
-            )
-        recall = await rag.retrieve(
-            req.task_id,
-            q,
-            top_k=req.rag_top_k,
-            min_score=req.rag_min_score,
-        )
-        dp = dict(req.data_package)
-        dp["rag_recall"] = {
-            "query": q,
-            "hits": [h.model_dump() for h in recall.hits],
-            "formatted_context": recall.formatted_context,
-        }
-        effective = req.model_copy(update={"data_package": dp})
+        return _stub_section(effective)
 
     tt = (effective.template_type or "info").lower()
     if tt not in _ALLOWED_TEMPLATE_TYPES:

BIN
algo/tests/__pycache__/test_prompts.cpython-312-pytest-9.0.2.pyc


BIN
algo/tests/__pycache__/test_section_rag.cpython-312-pytest-9.0.2.pyc


+ 11 - 0
algo/tests/test_prompts.py

@@ -63,6 +63,17 @@ def test_section_template_renders() -> None:
     assert "撰写" in text
 
 
+def test_section_request_accepts_legacy_data_package_1() -> None:
+    req = SectionRequest.model_validate(
+        {
+            "knowledge_unit_id": "ku-x",
+            "template_type": "info",
+            "data_package_1": {"auto": {"k": "v"}},
+        }
+    )
+    assert req.auto_data_package == {"auto": {"k": "v"}}
+
+
 def test_builtin_l1_embedded_in_template() -> None:
     req = OutlineL1Request(report_type="项目融资", chapter_candidates=[])
     text = build_outline_l1_user_prompt(req)

+ 27 - 5
algo/tests/test_section_rag.py

@@ -11,7 +11,7 @@ from finrep_algo_agent.skills.section_gen.section_gen import run_section
 
 
 @pytest.mark.asyncio
-async def test_section_rag_recall_merges_into_data_package() -> None:
+async def test_section_uses_upstream_data_package_without_internal_rag_recall() -> None:
     settings = Settings(stub_skills=False, llm_api_key="test-key")
     mock_llm = AsyncMock()
     mock_llm.chat_completion = AsyncMock(return_value=" 生成正文片段 ")
@@ -37,13 +37,35 @@ async def test_section_rag_recall_merges_into_data_package() -> None:
         task_id="task-99",
         rag_recall=True,
         rag_query="融资主体",
-        paragraph_position="定位",
-        paragraph_logic="撰写逻辑",
+        template={"paragraph_position": "定位", "paragraph_logic": "撰写逻辑"},
         data_package={"api": {"x": 1}},
     )
     resp = await run_section(req, settings=settings, llm=mock_llm, rag=mock_rag)
     assert "生成正文片段" in resp.generated_text
-    mock_rag.retrieve.assert_awaited_once()
+    mock_rag.retrieve.assert_not_awaited()
     call_kw = mock_llm.chat_completion.await_args
     prompt = call_kw[0][0][0]["content"]
-    assert "rag_recall" in prompt or "[RAG]" in prompt
+    assert "定位" in prompt
+
+
+@pytest.mark.asyncio
+async def test_section_uses_template_fields_from_top_level_template() -> None:
+    settings = Settings(stub_skills=False, llm_api_key="test-key")
+    mock_llm = AsyncMock()
+    mock_llm.chat_completion = AsyncMock(return_value="模板注入正文")
+    req = SectionRequest(
+        knowledge_unit_id="ku-2",
+        template_type="info",
+        template={
+            "paragraph_position": "来自知识单元模板-段落定位",
+            "paragraph_logic": "来自知识单元模板-段落逻辑",
+            "example": "示例文本",
+            "notes": "模板注意事项",
+        },
+    )
+    resp = await run_section(req, settings=settings, llm=mock_llm, rag=AsyncMock())
+    assert "模板注入正文" in resp.generated_text
+    prompt = mock_llm.chat_completion.await_args[0][0][0]["content"]
+    assert "来自知识单元模板-段落定位" in prompt
+    assert "来自知识单元模板-段落逻辑" in prompt
+    assert "模板注意事项" in prompt