|
|
@@ -15,13 +15,15 @@ from typing import Optional
|
|
|
import json
|
|
|
from agent.logger import logger
|
|
|
from agent.core.config import get_config_path
|
|
|
+import asyncio
|
|
|
+
|
|
|
config = get_config_path()
|
|
|
TOP_K = config['app']['top_k']
|
|
|
+CONCURRENCE = int(config['app']['concurrence'])
|
|
|
+background_semaphore = asyncio.Semaphore(CONCURRENCE)
|
|
|
|
|
|
router = APIRouter(prefix="/v1", tags=["AI Tagging"])
|
|
|
|
|
|
-
|
|
|
-
|
|
|
class TaggingRequest(BaseModel):
|
|
|
app_id: Optional[str] = Field(None, description="应用ID")
|
|
|
timestamp: Optional[int] = Field(None, description="请求时间戳")
|
|
|
@@ -61,15 +63,18 @@ async def execute_reg(log_id:str,tag_category_id:str,phrase: str)-> list:
|
|
|
"""UPDATE aitag_tag_log SET reg_result = %s WHERE id = %s""",
|
|
|
(str(result), log_id)
|
|
|
)
|
|
|
- logger.info(f"Updated reg_result for log_id {id}")
|
|
|
+ logger.info(f"[{log_id}] Regex filtering result: {result}")
|
|
|
return result
|
|
|
|
|
|
def vector_similarity_search(phrase: str, ids:list)-> list:
|
|
|
+ logger.info("Starting vector similarity search...")
|
|
|
# 这里应该调用向量数据库进行相似度检索,返回相关标签id列表
|
|
|
query = get_embeddings([phrase])[0]
|
|
|
results = hybrid_search(ids, query, top_k=TOP_K)
|
|
|
# return [{"id": r["_id"], "score": r["_score"], "tag_prompt": r["_source"]["tag_prompt"],"tag_name": r["_source"]["tag_name"],"tag_code": r["_source"]["tag_code"]} for r in results]
|
|
|
- return [{"id": r["_id"], "tag_remark":r["_source"]["tag_remark"], "tag_prompt": r["_source"]["tag_prompt"],"tag_name": r["_source"]["tag_name"],"tag_code": r["_source"]["tag_code"],"tag_path": r["_source"]["tag_path"],"category_id": r["_source"]["category_id"] } for r in results]
|
|
|
+ r = [{"id": r["_id"], "tag_remark":r["_source"]["tag_remark"], "tag_prompt": r["_source"]["tag_prompt"],"tag_name": r["_source"]["tag_name"],"tag_code": r["_source"]["tag_code"],"tag_path": r["_source"]["tag_path"],"category_id": r["_source"]["category_id"] } for r in results]
|
|
|
+ logger.info(f"{phrase} Vector search result: {r}")
|
|
|
+ return r
|
|
|
|
|
|
def init_tag_log(request: TaggingRequest):
|
|
|
id = uuid.uuid4().hex
|
|
|
@@ -88,34 +93,32 @@ def init_tag_log(request: TaggingRequest):
|
|
|
|
|
|
def update_tag_log(id:str, result:str):
|
|
|
dao.execute(
|
|
|
- """UPDATE aitag_tag_log SET state = %s, result = %s WHERE id = %s""",
|
|
|
- (1, result, id)
|
|
|
+ """UPDATE aitag_tag_log SET state = %s, result = %s, ai_result_endtime = %s WHERE id = %s""",
|
|
|
+ (1, result, datetime.now(), id)
|
|
|
)
|
|
|
|
|
|
async def run_ai_pipeline(log_id: str, tag_category_id: str, phrase: str):
|
|
|
try:
|
|
|
- # step1: 正则过滤
|
|
|
- result = await execute_reg(log_id,tag_category_id,phrase)
|
|
|
- logger.info(f"[{log_id}] Regex filtering result: {result}")
|
|
|
-
|
|
|
- # step2: 向量检索
|
|
|
- if result:
|
|
|
- result = vector_similarity_search(phrase, result)
|
|
|
- logger.info(f"[{log_id}] Vector search result: {result}")
|
|
|
-
|
|
|
- # step3: LLM 打标
|
|
|
- if result:
|
|
|
- result = await reflect_check(phrase, result)
|
|
|
- result = [r.dict() for r in result.labels]
|
|
|
- result = json.dumps(result, ensure_ascii=False)
|
|
|
- logger.info(f"[{log_id}] LLM result: {result}")
|
|
|
-
|
|
|
- # step4: 更新数据库
|
|
|
- # 如果result是个空集合,插入None
|
|
|
- update_tag_log(log_id, result if result else None)
|
|
|
+ async with background_semaphore:
|
|
|
+ # step1: 正则过滤
|
|
|
+ result = await execute_reg(log_id,tag_category_id,phrase)
|
|
|
+ # step2: 向量检索
|
|
|
+ if result:
|
|
|
+ result = vector_similarity_search(phrase, result)
|
|
|
+ # step3: LLM 打标
|
|
|
+ if result:
|
|
|
+ try:
|
|
|
+ result = await reflect_check(phrase, result)
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"LLM reflection check failed: {e}")
|
|
|
+ result = None
|
|
|
+ # step4: 更新数据库
|
|
|
+ # 如果result是个空集合,插入None
|
|
|
+ update_tag_log(log_id, result if result else None)
|
|
|
|
|
|
except Exception as e:
|
|
|
logger.error(f"[{log_id}] Pipeline failed: {e}")
|
|
|
+ update_tag_log(log_id, None)
|
|
|
|
|
|
@router.post("/tagging")
|
|
|
async def ai_tagging(request: TaggingRequest,background_tasks: BackgroundTasks):
|