Browse Source

预设规则打标

jiayongqiang 2 tuần trước cách đây
mục cha
commit
357e2c12aa

+ 4 - 3
agent/src/agent/agent.py

@@ -53,7 +53,7 @@ class Lables(BaseModel):
     labels: list[Lable] = Field(description="List of optimized labels after reflection.")
     
 
-async def reflect_check(context: str, labels: list):
+async def reflect_check(context: str,is_marine: bool, labels: list):
     agent = create_agent(
         model = llm, 
         response_format=Lables
@@ -78,7 +78,8 @@ async def reflect_check(context: str, labels: list):
     {labels}
 
     四、贷款信息
-    {context}   
+    {context}
+    {"请注意:该笔贷款涉及海洋产业相关内容。" if is_marine else ""}
     
     五、语义判定任务说明(核心)
     请你基于以上信息,从**“贷款打标”的视角**完成语义判定,重点判断以下问题:
@@ -173,7 +174,7 @@ async def generate_reg(tag_name: str, tag_remark: str):
 if __name__ == "__main__":
     context = "该笔贷款用于支持一家制造企业的生产线升级改造,涉及购买新设备、技术改造和相关建设活动。文本中提到的资产包括数台高端数控机床和自动化生产线,项目性质为产能提升和技术升级,合同内容涉及设备采购和安装服务,资金用途明确指向生产线改造,建设活动包括厂房扩建和设备安装调试。"
     labels = [{"label": "设备采购", "desc": "用于采购新设备"}, {"label": "标签2", "desc": "标签2定义"}, {"label": "标签3", "desc": "标签3定义"}]
-    optimized_labels = reflect_check(context, labels)
+    optimized_labels = reflect_check(context, False, labels)
     print(optimized_labels)
  
 

+ 37 - 3
agent/src/agent/api_outter.py

@@ -109,7 +109,7 @@ def fail_tagging(id:str):
 
 def start_tagging(id:str, instucde: Optional[str] = None):
     is_marine = 0
-    if instucde:
+    if instucde is not None:
         rows = dao.query("select tag_type from aitag_org_whitelist where org_code = %s", (instucde,))
         if rows and len(rows) == 1:
             logger.info(f"机构{instucde}在白名单中")
@@ -118,14 +118,47 @@ def start_tagging(id:str, instucde: Optional[str] = None):
             """UPDATE aitag_tag_log SET state = %s, is_marine = %s,org_code = %s, ai_result_starttime = %s WHERE id = %s""",
             (TAGGING_STATE.BEGIN.value, is_marine, instucde, datetime.now(),  id)
         )
+    return is_marine
 
+# 定义预设规则匹配函数
+def defined_rule_match(phrase: str):
+    sql = """select tag_type,tag_nm from ai_tagging.ai_tagging.aitag_predefined_rules where %s ~ defined_rule """
+    rules = dao.query(sql, (phrase,))
+    if rules and len(rules) > 0:
+        matched = rules[0]
+        logger.info(f"Predefined rule matched: {matched}")
+        tag_info = dao.query("""select ati.id,ati.category_id, ati.tag_nm, ati.tag_path,ati.tag_code from aitag_tag_info ati left join aitag_tag_category atc on ati.category_id = atc.id where ati.tag_nm = %s and ati.is_delete = 0 and atc.category_code = %s""", (matched[1], matched[0]))
+        return [{
+            "id": tag_info[0][0],
+            "desc": "",
+            "passr": True,
+            "tag_code": tag_info[0][4],
+            "tag_name": tag_info[0][2],
+            "tag_path": tag_info[0][3],
+            "category_id": tag_info[0][1]
+        }]
+    return None
+
+def end_tagging_predefined_rule(id:str, result:str):
+    dao.execute(
+            """UPDATE aitag_tag_log SET state = %s, result = %s WHERE id = %s""",
+            (TAGGING_STATE.PREDEFINED_RULE_MATCH.value, result,  id)
+        )
 
 async def run_ai_pipeline(log_id: str, tag_category_id: str, phrase: str, instucde: Optional[str] = None):
     try:
         async with background_semaphore:
             logger.info(f"开始打标:{log_id}, {phrase}")
             # step0: 开始打标
-            start_tagging(log_id, instucde)
+            is_marine = start_tagging(log_id, instucde)
+
+            # step0.5: 预设规则匹配,如果匹配成功则直接更新结果并结束打标流程
+            defined_rule_result = defined_rule_match(phrase)    
+            if defined_rule_result:
+                logger.info(f"预设规则匹配成功,直接返回结果: {defined_rule_result}")
+                end_tagging_predefined_rule(log_id, json.dumps(defined_rule_result))
+                return
+
             # step1: 正则过滤
             result = await execute_reg(log_id,tag_category_id,phrase)
             # step2: 向量检索
@@ -133,7 +166,7 @@ async def run_ai_pipeline(log_id: str, tag_category_id: str, phrase: str, instuc
             # step3: LLM 打标
             if result:
                 try:
-                    result = await reflect_check(phrase, result)
+                    result = await reflect_check(phrase,is_marine, result)
                 except Exception as e:
                     logger.error(f"LLM reflection check failed: {e}")
                     result = None
@@ -147,6 +180,7 @@ async def run_ai_pipeline(log_id: str, tag_category_id: str, phrase: str, instuc
         logger.error(f"[{log_id}] Pipeline failed: {e}")
         fail_tagging(log_id)
 
+
 async def batch_run_async():
     # 一次查询1000条 状态为0(处理中)的记录,调用打标流程,直到没有满足条件的记录
     while True:

+ 3 - 2
agent/src/agent/core/tagging_state.py

@@ -1,5 +1,5 @@
 # 0:请求已接收;1:打标完成; 2:客户经理已经确认;3,结果已推送; 
-# 4:开始打标, 5:打标失败
+# 4:开始打标, 5:打标失败; 6:预设规则匹配成功跳过客户经理确认,下一步就可以推送结果了
 from enum import Enum
 
 class TAGGING_STATE(Enum):
@@ -8,4 +8,5 @@ class TAGGING_STATE(Enum):
     FEEDBACK = 2
     PUSHED = 3
     BEGIN = 4
-    FAIL = 5
+    FAIL = 5
+    PREDEFINED_RULE_MATCH = 6

+ 1 - 1
agent/tests/test_tagging.py

@@ -9,7 +9,7 @@ res = requests.post("http://10.192.72.13:9876/api/aitag/v1/tagging", json={
     # "sign": "test_sign",
     "esb_seq_no":"abc",
     "business_attr": "test_attr3",
-    "phrase": "职业:水产养殖人员 投向:内陆养殖 用途:养殖鲍鱼",
+    "phrase": "职业:水产养殖人员 投向:内陆养殖 用途:其他海洋服务",
     "instucde": "9080803001"
 })
 

+ 46 - 2
agent/update.sql

@@ -1,6 +1,9 @@
+/**
+	行社白名单功能
+**/
 -- 添加是否是海洋标签字段
 ALTER TABLE ai_tagging.aitag_tag_log ADD is_marine bit DEFAULT 0;
-COMMENT ON COLUMN ai_tagging.aitag_tag_log.is_marine IS '是否是海洋标签,0:否,1:是';
+COMMENT ON COLUMN ai_tagging.aitag_tag_log.is_marine IS '是否匹配行社白名单,匹配上的默认隶属于海洋经济,在大模型处理阶段给予提示,没匹配上的不给提示,0:否,1:是';
 
 -- 添加法人行社代码字段
 ALTER TABLE ai_tagging.aitag_tag_log ADD org_code varchar(100);
@@ -38,4 +41,45 @@ INSERT INTO ai_tagging.aitag_org_whitelist (id, org_name, org_code, tag_type) VA
 INSERT INTO ai_tagging.aitag_org_whitelist (id, org_name, org_code, tag_type) VALUES ('20', '云霄县农村信用合作联社', '908040300', 'marine');
 INSERT INTO ai_tagging.aitag_org_whitelist (id, org_name, org_code, tag_type) VALUES ('21', '福建漳浦农村商业银行股份有限公司', '908050300', 'marine');
 INSERT INTO ai_tagging.aitag_org_whitelist (id, org_name, org_code, tag_type) VALUES ('22', '诏安县农村信用合作联社', '908060300', 'marine');
-INSERT INTO ai_tagging.aitag_org_whitelist (id, org_name, org_code, tag_type) VALUES ('23', '东山县农村信用合作联社', '908080300', 'marine');
+INSERT INTO ai_tagging.aitag_org_whitelist (id, org_name, org_code, tag_type) VALUES ('23', '东山县农村信用合作联社', '908080300', 'marine');
+
+/**
+ 预设规则的匹配,可以不用人工确认,直接通过推送给画像系统
+**/
+
+update ai_tagging.ai_tagging.aitag_tag_category set category_code = 'marine' where id = 'f47ac10b-58cc-4372-a567-0e02b2c3d479' ;
+
+CREATE TABLE ai_tagging.aitag_predefined_rules (
+	id varchar(100) NOT NULL,
+	defined_rule varchar(100),
+	tag_type varchar(100),
+	tag_nm varchar(100),
+	CONSTRAINT aitag_predefined_rules_PK PRIMARY KEY (id)
+);
+
+COMMENT ON COLUMN ai_tagging.aitag_predefined_rules.defined_rule is '预设关键字';
+COMMENT ON COLUMN ai_tagging.aitag_predefined_rules.tag_type is '规则所属标签体系,对应aitag_tag_category的category_code字段';
+COMMENT ON COLUMN ai_tagging.aitag_predefined_rules.tag_nm is '预设关键字对应的标签名称,默认在同一个体系下标签名称不重复,用ID或者代码不直观,所以用名称';
+
+INSERT INTO ai_tagging.aitag_predefined_rules (id, defined_rule, tag_type, tag_nm) VALUES
+('rule_01', '海水养殖', 'marine', '海水养殖'),
+('rule_02', '海水捕捞', 'marine', '海洋捕捞'),
+('rule_03', '海洋石油开采', 'marine', '海洋石油和天然气开采'),
+('rule_04', '海洋天然气及可燃冰开采', 'marine', '海洋石油和天然气开采'),
+('rule_05', '海洋工程装备制造', 'marine', '海洋工程装备制造业'),
+('rule_06', '深海石油钻探设备制造', 'marine', '海洋油气资源勘探开发装备制造及修理'),
+('rule_07', '海洋油气资源开发利用工程建筑', 'marine', '海上工程建筑'),
+('rule_08', '海洋能源开发利用工程建筑', 'marine', '海上工程建筑'),
+('rule_09', '海底隧道工程建筑', 'marine', '海底工程建筑'),
+('rule_10', '海底设施铺设工程建筑', 'marine', '海底工程建筑'),
+('rule_11', '其他海洋工程建筑', 'marine', '海上工程建筑'),
+('rule_12', '港口及航运设施工程建筑', 'marine', '近岸工程建筑'),
+('rule_13', '海水淡化处理', 'marine', '海水淡化'),
+('rule_14', '海上旅客运输', 'marine', '海洋交通运输业'),
+('rule_15', '客运轮渡运输', 'marine', '海洋交通运输业'),
+('rule_16', '远洋货物运输', 'marine', '海洋交通运输业'),
+('rule_17', '沿海货物运输', 'marine', '海洋交通运输业'),
+('rule_18', '海底管道运输', 'marine', '海洋交通运输业'),
+('rule_19', '海洋气象服务', 'marine', '海洋自然科学研究和试验发展'),
+('rule_20', '海洋环境服务', 'marine', '海洋信息集成服务'),
+('rule_21', '其他海洋服务', 'marine', '海洋技术服务');