10 bulan lalu · 4f1271f856
--- a/agno_test1.py
+++ b/agno_test1.py
@@ -0,0 +1,102 @@
 
															+from agno.agent import Agent

														
 
															+from agno.models.base import Model

														
 
															+from agno.tools import tool

														
 
															+from dotenv import load_dotenv

														
 
															+from pydantic import BaseModel, Field

														
 
															+from openai import OpenAI

														
 
															+import os

														
 
															+

														
 
															+load_dotenv()

														
 
															+

														
 
															+# 定义转账请求模型

														
 
															+class TransferRequest(BaseModel):

														
 
															+    payee_name: str = Field(..., description="收款人全名")

														
 
															+    account_number: str = Field(..., min_length=12, max_length=12, description="12位银行账号")

														
 
															+    amount: float = Field(..., gt=0, le=500, description="转账金额（1-500元）")

														
 
															+    phone: str = Field(None, min_length=11, max_length=11, description="收款人手机号（可选）")

														
 
															+

														
 
															+# 注册转账工具

														
 
															+@tool

														
 
															+def bank_transfer(request: TransferRequest):

														
 
															+    """执行银行转账操作，包含以下校验规则：

														
 
															+    1. 账号必须是12位数字

														
 
															+    2. 金额必须介于1-500元

														
 
															+    3. 手机号可选但必须是11位

														
 
															+    """

														
 
															+    # 模拟失败场景

														
 
															+    if request.amount > 500:

														
 
															+        return {"status": "failed", "reason": "单笔转账金额不能超过500元"}

														
 
															+    if len(request.account_number) != 12 or not request.account_number.isdigit():

														
 
															+        return {"status": "failed", "reason": "账号格式错误（需要12位数字）"}

														
 
															+    

														
 
															+    # 模拟成功响应

														
 
															+    return {

														
 
															+        "status": "success",

														
 
															+        "transaction_id": f"TX{os.urandom(4).hex().upper()}",

														
 
															+        "amount": request.amount

														
 
															+    }

														
 
															+

														
 
															+# 修复后的Qwen模型实现

														
 
															+class QwenModel(Model):

														
 
															+    def __init__(self):

														
 
															+        self.client = OpenAI(

														
 
															+            base_url=os.getenv("BAILIAN_API_BASE_URL"),

														
 
															+            api_key=os.getenv("BAILIAN_API_KEY")

														
 
															+        )

														
 
															+        

														
 
															+    def invoke(self, messages, tools=None, **kwargs):

														
 
															+        response = self.client.chat.completions.create(

														
 
															+            model="qwen3-32b",

														
 
															+            messages=messages,

														
 
															+            tools=tools,

														
 
															+            **kwargs

														
 
															+        )

														
 
															+        return self.parse_provider_response(response)

														
 
															+    

														
 
															+    def parse_provider_response(self, response):

														
 
															+        return {

														
 
															+            "content": response.choices[0].message.content,

														
 
															+            "tool_calls": response.choices[0].message.tool_calls

														
 
															+        }

														
 
															+    

														
 
															+    # 未实现的方法抛出明确错误

														
 
															+    def invoke_stream(self, messages, tools=None, **kwargs):

														
 
															+        raise NotImplementedError("流式调用暂不支持")

														
 
															+    

														
 
															+    def ainvoke(self, messages, tools=None, **kwargs):

														
 
															+        raise NotImplementedError("异步调用暂不支持")

														
 
															+    

														
 
															+    def ainvoke_stream(self, messages, tools=None, **kwargs):

														
 
															+        raise NotImplementedError("异步流调用暂不支持")

														
 
															+    

														
 
															+    def parse_provider_response_delta(self, delta):

														
 
															+        return {"content": delta.get("content", "")}

														
 
															+

														
 
															+# 配置Agent

														
 
															+agent = Agent(

														
 
															+    model=QwenModel(),

														
 
															+    tools=[bank_transfer],

														
 
															+    instructions="""您是银行AI助手，需要：  # 修改参数名

														
 
															+1. 按顺序收集：姓名→账号→金额→手机号（可选）

														
 
															+2. 金额超过500时主动提醒限额规则

														
 
															+3. 自动校验输入格式

														
 
															+4. 转账失败时解释具体原因"""

														
 
															+)

														
 
															+

														
 
															+if __name__ == "__main__":

														
 
															+    # 启动对话循环

														
 
															+    while True:

														
 
															+        user_input = input("User: ")

														
 
															+        response = agent.run(user_input)

														
 
															+        

														
 
															+        # 处理转账结果

														
 
															+        if response.tool_responses:

														
 
															+            for resp in response.tool_responses:

														
 
															+                if resp.tool_name == "bank_transfer":

														
 
															+                    if resp.result["status"] == "success":

														
 
															+                        print(f"\n转账成功！交易号：{resp.result['transaction_id']}")

														
 
															+                        exit()

														
 
															+                    else:

														
 
															+                        print(f"\n错误：{resp.result['reason']}，请修改信息后重试")

														
 
															+        

														
 
															+        print(f"AI: {response.content}")
														
--- a/error_analysis.csv
+++ b/error_analysis.csv
@@ -0,0 +1,5 @@
 
															+text,true_label,prediction,reason

														
 
															+"How many movies are there that you can think of when you see a movie like this? I can't count them but it sure seemed like the movie makers were trying to give me a hint. I was reminded so often of other movies, it became a big distraction. One of the borrowed memorable lines came from a movie from 2003 - Day After Tomorrow. One line by itself, is not so bad but this movie borrows so much from so many movies it becomes a bad risk.<br /><br />BUT...<br /><br />See The Movie! Despite its downfalls there is enough to make it interesting and maybe make it appear clever. While borrowing so much from other movies it never goes overboard. In fact, you'll probably find yourself battening down the hatches and riding the storm out. Why? ...Costner and Kutcher played their characters very well. I have never been a fan of Kutcher's and I nearly gave up on him in The Guardian, but he surfaced in good fashion. Costner carries the movie swimmingly with the best of Costner's ability. I don't think Mrs. Robinson had anything to do with his success.<br /><br />The supporting cast all around played their parts well. I had no problem with any of them in the end. But some of these characters were used too much.<br /><br />From here on out I can only nit-pick so I will save you the wear and tear. Enjoy the movie, the parts that work, work well enough to keep your head above water. Just don't expect a smooth ride.<br /><br />7 of 10 but almost a 6.",positive,negative,"The movie borrows so much from other movies, leading to a poor risk and a lack of originality. It becomes a distraction, with characters used too much and the plot not well developed. The film ends up being a negative experience despite its points of interest."

														
 
															+"I only went to see this movie because I have always liked Kevin Costner. I felt that Ashton did a great job in the Butterfly Effect. Unfortunately, even though these two actors were/are capable of good if not great acting moments some of that was missing here. Some of the scenes were just not believable and didn't have enough story line support.<br /><br />Though the movie claims influence from the hurricane Katrina aftermath, there was very little (none) to that effect in the movie.<br /><br />Overall, I liked the fact that the movie brought forward some of what goes into saving lives from a water perspective.<br /><br />The special effects were pretty good and more than a little intimidating. Not sure I'll ever go deep sea fishing again...<br /><br />I expected a little more emotion in the film than what was presented.<br /><br />Definitely a movie that could've been seen on DVD.",positive,negative,"The movie missed some acting moments, scenes were not believable, and there was little to that effect in the movie. The special effects were good but didn't fully capture the emotional impact expected from a hurricane Katrina reference."

														
 
															+"I'm a Petty Officer 1st Class (E-6) and have been in the USCG for 6 years and feel that this movie strongly represents the Coast Guard. There were only a few scenes that were far fetched. The most far-fetched was when PO Fischer (Kutcher) went down inside of the sinking vessel to pull the vessel's captain out of the engine room... that would never happen. Swimmers are not allowed to go inside of any vessel no matter the circumstances. Second, the Command Center (supposedly in Kodiak), it looked more like a NASA command center... we don't have any gear that hi-tech. Third, the Captain of the Airstation would not be running the search & rescue cases with like 10 people on watch. In reality it would be an E-6 or E-7 as the SAR Controller and maybe 2 other support personnel like an assist SAR Controller & a Radio Watchstander. Otherwise the movie was dead on, I think they should have incorporated more of the other rates in the CG and their roles in search & rescue instead of just Aviation based rates. Some of the scenes from ""A"" school reminded me of my days their and the dumb stuff I did and got in trouble for in my younger days.",positive,negative,"The movie portrayed the Coast Guard in a way that was unrealistic, with scenes that failed to meet expectations, and lacked practicality for real Coast Guard operations. The characters and situations were not aligned with realistic military scenarios."

														
 
															+"Wow, another Kevin Costner hero movie. Postman, Tin Cup, Waterworld, Bodyguard, Wyatt Earp, Robin Hood, even that baseball movie. Seems like he makes movies specifically to be the center of attention. The characters are almost always the same ... the heroics, the flaws, the greatness, the fall, the redemption. Yup, within the 1st 5 minutes of the movie, we're all supposed to be in awe of his character, and it builds up more and more from there.<br /><br />And this time the story ... story? ... is just a collage of different movies. You don't need a spoiler; you've seen this movie several times, though it had different titles. You'll know what will happen way before it happens. This is like mixing An Officer and a Gentleman with Backdraft, but both are easily better movies. Watch Backdraft to see how this kind of movie should be made ... and also to see how an good but slightly underrated actor, Russell, plays the hero.",negative,positive,"The review highlights Kevin Costner's consistent performance across multiple movies, emphasizing his character depth and the storytelling quality. The reviewer appreciates how the narrative builds up to a compelling conclusion and mentions that the movie is a mix of different genres with strong performances."

														
--- a/error_analysis_report.csv
+++ b/error_analysis_report.csv
--- a/prediction_results.csv
+++ b/prediction_results.csv
--- a/test1.ipynb
+++ b/test1.ipynb
@@ -0,0 +1,724 @@
 
															+{
														
 
															+ "cells": [
														
 
															+  {
														
 
															+   "cell_type": "code",
														
 
															+   "execution_count": 1,
														
 
															+   "id": "691a012b",
														
 
															+   "metadata": {},
														
 
															+   "outputs": [],
														
 
															+   "source": [
														
 
															+    "from openai import OpenAI\n",
														
 
															+    "from dotenv import load_dotenv \n",
														
 
															+    "import glob\n",
														
 
															+    "import json\n",
														
 
															+    "import os\n",
														
 
															+    "import time\n",
														
 
															+    "from pydantic import BaseModel\n",
														
 
															+    "from openai import BadRequestError\n",
														
 
															+    "import pandas as pd\n",
														
 
															+    "from sklearn.metrics import accuracy_score, precision_recall_fscore_support\n",
														
 
															+    "from tqdm import tqdm\n",
														
 
															+    "\n",
														
 
															+    "# 加载环境变量\n",
														
 
															+    "load_dotenv() \n",
														
 
															+    "\n",
														
 
															+    "# 创建OpenAI客户端\n",
														
 
															+    "client = OpenAI(base_url=os.getenv(\"BAILIAN_API_BASE_URL\"),\n",
														
 
															+    "       api_key=os.getenv(\"BAILIAN_API_KEY\"))\n",
														
 
															+    "DATA_ROOT = \"D:/ai_learning-master/ai_learning/data/acllmdb_sentiment_small\"\n",
														
 
															+    "SAVE_PATH = \"./prediction_results.csv\"\n",
														
 
															+    "\n",
														
 
															+    "class SentimentRequest(BaseModel):\n",
														
 
															+    "    sentiment: str\n",
														
 
															+    "\n",
														
 
															+    "\n"
														
 
															+   ]
														
 
															+  },
														
 
															+  {
														
 
															+   "cell_type": "code",
														
 
															+   "execution_count": null,
														
 
															+   "id": "fe312896",
														
 
															+   "metadata": {},
														
 
															+   "outputs": [],
														
 
															+   "source": [
														
 
															+    "def load_dataset():\n",
														
 
															+    "    \"\"\"加载数据集并返回带标签的样本列表\"\"\"\n",
														
 
															+    "    samples = []\n",
														
 
															+    "    for label in [\"positive\", \"negative\"]:\n",
														
 
															+    "        dir_path = os.path.join(DATA_ROOT, label)\n",
														
 
															+    "        for file_path in glob.glob(os.path.join(dir_path, \"*.txt\")):\n",
														
 
															+    "            with open(file_path, \"r\", encoding=\"utf-8\") as f:\n",
														
 
															+    "                samples.append({\n",
														
 
															+    "                    \"text\": f.read(),\n",
														
 
															+    "                    \"true_label\": label\n",
														
 
															+    "                })\n",
														
 
															+    "    return samples"
														
 
															+   ]
														
 
															+  },
														
 
															+  {
														
 
															+   "cell_type": "code",
														
 
															+   "execution_count": 5,
														
 
															+   "id": "0cfb9303",
														
 
															+   "metadata": {},
														
 
															+   "outputs": [],
														
 
															+   "source": [
														
 
															+    "def predict_text(client, text, max_retries=3):\n",
														
 
															+    "    \"\"\"纯文本模式预测\"\"\"\n",
														
 
															+    "    prompt = f\"\"\"请判断以下影评的情感倾向，严格只输出英文单词positive或negative：\n",
														
 
															+    "{text}\"\"\"\n",
														
 
															+    "    \n",
														
 
															+    "    for _ in range(max_retries):\n",
														
 
															+    "        try:\n",
														
 
															+    "            response = client.chat.completions.create(\n",
														
 
															+    "                model=\"qwen3-4b\",\n",
														
 
															+    "                messages=[{\"role\": \"user\", \"content\": prompt}],\n",
														
 
															+    "                temperature=0,\n",
														
 
															+    "                extra_body={\"enable_thinking\": False}\n",
														
 
															+    "            )\n",
														
 
															+    "         \n",
														
 
															+    "            return response.choices[0].message.content.strip().lower()\n",
														
 
															+    "        except BadRequestError:\n",
														
 
															+    "            time.sleep(1)\n",
														
 
															+    "    return \"error\"\n",
														
 
															+    "\n",
														
 
															+    "def predict_json(client, text, max_retries=3):\n",
														
 
															+    "    \"\"\"JSON模式预测\"\"\"\n",
														
 
															+    "    prompt = f\"\"\"分析以下影评的情感倾向，使用JSON格式返回结果：\n",
														
 
															+    "{text}\"\"\"\n",
														
 
															+    "    \n",
														
 
															+    "    for _ in range(max_retries):\n",
														
 
															+    "        try:\n",
														
 
															+    "            response = client.chat.completions.create(\n",
														
 
															+    "                model=\"qwen3-4b\",\n",
														
 
															+    "                messages=[{\"role\": \"user\", \"content\": prompt}],\n",
														
 
															+    "                response_format={\"type\": \"json_object\"},\n",
														
 
															+    "                temperature=0,\n",
														
 
															+    "                extra_body={\"enable_thinking\": False}\n",
														
 
															+    "            )\n",
														
 
															+    "            result = json.loads(response.choices[0].message.content)\n",
														
 
															+    "            return SentimentRequest(**result).sentiment\n",
														
 
															+    "        except:\n",
														
 
															+    "            time.sleep(1)\n",
														
 
															+    "    return \"error\"\n",
														
 
															+    "\n",
														
 
															+    "def predict_tool(client, text, max_retries=3):\n",
														
 
															+    "    \"\"\"工具调用模式预测\"\"\"\n",
														
 
															+    "    prompt = f\"\"\"请分析以下影评的情感倾向：\n",
														
 
															+    "{text}\"\"\"\n",
														
 
															+    "    \n",
														
 
															+    "    tools = [{\n",
														
 
															+    "        \"type\": \"function\",\n",
														
 
															+    "        \"function\": {\n",
														
 
															+    "            \"name\": \"sentiment_analysis\",\n",
														
 
															+    "            \"description\": \"情感分析结果\",\n",
														
 
															+    "            \"parameters\": {\n",
														
 
															+    "                \"type\": \"object\",\n",
														
 
															+    "                \"properties\": {\n",
														
 
															+    "                    \"sentiment\": {\n",
														
 
															+    "                        \"type\": \"string\", \n",
														
 
															+    "                        \"enum\": [\"positive\", \"negative\"],\n",
														
 
															+    "                        \"description\": \"情感倾向\"\n",
														
 
															+    "                    }\n",
														
 
															+    "                },\n",
														
 
															+    "                \"required\": [\"sentiment\"]\n",
														
 
															+    "            }\n",
														
 
															+    "        }\n",
														
 
															+    "    }]\n",
														
 
															+    "    \n",
														
 
															+    "    for _ in range(max_retries):\n",
														
 
															+    "        try:\n",
														
 
															+    "            response = client.chat.completions.create(\n",
														
 
															+    "                model=\"qwen3-4b\",\n",
														
 
															+    "                messages=[{\"role\": \"user\", \"content\": prompt}],\n",
														
 
															+    "                tools=tools,\n",
														
 
															+    "                tool_choice={\"type\": \"function\", \"function\": {\"name\": \"sentiment_analysis\"}},\n",
														
 
															+    "                temperature=0,\n",
														
 
															+    "                extra_body={\"enable_thinking\": False}\n",
														
 
															+    "            )\n",
														
 
															+    "            args = json.loads(response.choices[0].message.tool_calls[0].function.arguments)\n",
														
 
															+    "            return SentimentRequest(**args).sentiment\n",
														
 
															+    "        except:\n",
														
 
															+    "            time.sleep(1)\n",
														
 
															+    "    return \"error\"\n"
														
 
															+   ]
														
 
															+  },
														
 
															+  {
														
 
															+   "cell_type": "code",
														
 
															+   "execution_count": 6,
														
 
															+   "id": "d0fa114e",
														
 
															+   "metadata": {},
														
 
															+   "outputs": [
														
 
															+    {
														
 
															+     "name": "stdout",
														
 
															+     "output_type": "stream",
														
 
															+     "text": [
														
 
															+      "成功加载 21 条样本\n"
														
 
															+     ]
														
 
															+    },
														
 
															+    {
														
 
															+     "name": "stderr",
														
 
															+     "output_type": "stream",
														
 
															+     "text": [
														
 
															+      "预测进度: 100%|██████████| 21/21 [00:57<00:00,  2.72s/it]"
														
 
															+     ]
														
 
															+    },
														
 
															+    {
														
 
															+     "name": "stdout",
														
 
															+     "output_type": "stream",
														
 
															+     "text": [
														
 
															+      "预测结果已保存至：./prediction_results.csv\n",
														
 
															+      "text模式准确率：90.48%\n",
														
 
															+      "json模式准确率：90.48%\n",
														
 
															+      "tool模式准确率：85.71%\n"
														
 
															+     ]
														
 
															+    },
														
 
															+    {
														
 
															+     "name": "stderr",
														
 
															+     "output_type": "stream",
														
 
															+     "text": [
														
 
															+      "\n"
														
 
															+     ]
														
 
															+    }
														
 
															+   ],
														
 
															+   "source": [
														
 
															+    "def main():\n",
														
 
															+    "    # 初始化客户端\n",
														
 
															+    "    client = OpenAI(\n",
														
 
															+    "        base_url=os.getenv(\"BAILIAN_API_BASE_URL\"),\n",
														
 
															+    "        api_key=os.getenv(\"BAILIAN_API_KEY\")\n",
														
 
															+    "    )\n",
														
 
															+    "    \n",
														
 
															+    "    # 加载数据\n",
														
 
															+    "    samples = load_dataset()\n",
														
 
															+    "    print(f\"成功加载 {len(samples)} 条样本\")\n",
														
 
															+    "    \n",
														
 
															+    "    # 执行预测\n",
														
 
															+    "    results = []\n",
														
 
															+    "    for sample in tqdm(samples, desc=\"预测进度\"):\n",
														
 
															+    "        try:\n",
														
 
															+    "            pred_text = predict_text(client, sample[\"text\"])\n",
														
 
															+    "            pred_json = predict_json(client, sample[\"text\"])\n",
														
 
															+    "            pred_tool = predict_tool(client, sample[\"text\"])\n",
														
 
															+    "            \n",
														
 
															+    "            results.append({\n",
														
 
															+    "                \"text\": sample[\"text\"],\n",
														
 
															+    "                \"true_label\": sample[\"true_label\"],\n",
														
 
															+    "                \"text_pred\": pred_text,\n",
														
 
															+    "                \"json_pred\": pred_json,\n",
														
 
															+    "                \"tool_pred\": pred_tool\n",
														
 
															+    "            })\n",
														
 
															+    "        except Exception as e:\n",
														
 
															+    "            print(f\"预测失败：{str(e)}\")\n",
														
 
															+    "    \n",
														
 
															+    "    # 保存结果\n",
														
 
															+    "    df = pd.DataFrame(results)\n",
														
 
															+    "    df.to_csv(SAVE_PATH, index=False)\n",
														
 
															+    "    print(f\"预测结果已保存至：{SAVE_PATH}\")\n",
														
 
															+    "    \n",
														
 
															+    "    # 计算指标\n",
														
 
															+    "    for mode in [\"text_pred\",\"json_pred\",\"tool_pred\"]:\n",
														
 
															+    "        y_true = df[\"true_label\"]\n",
														
 
															+    "        y_pred = df[mode]\n",
														
 
															+    "        accuracy = accuracy_score(y_true, y_pred)\n",
														
 
															+    "        print(f\"{mode.split('_')[0]}模式准确率：{accuracy:.2%}\")\n",
														
 
															+    "if __name__ == \"__main__\":\n",
														
 
															+    "    main()"
														
 
															+   ]
														
 
															+  },
														
 
															+  {
														
 
															+   "cell_type": "code",
														
 
															+   "execution_count": null,
														
 
															+   "id": "f6048b3a",
														
 
															+   "metadata": {},
														
 
															+   "outputs": [],
														
 
															+   "source": [
														
 
															+    "# 新单元格1：定义独立函数\n",
														
 
															+    "from tqdm.notebook import tqdm\n",
														
 
															+    "from openai import OpenAI\n",
														
 
															+    "import pandas as pd\n",
														
 
															+    "\n",
														
 
															+    "def compare_models():\n",
														
 
															+    "    \"\"\"独立模型比较函数\"\"\"\n",
														
 
															+    "    # 初始化独立客户端\n",
														
 
															+    "    local_client = OpenAI(\n",
														
 
															+    "        base_url=os.getenv(\"BAILIAN_API_BASE_URL\"),\n",
														
 
															+    "        api_key=os.getenv(\"BAILIAN_API_KEY\")\n",
														
 
															+    "    )\n",
														
 
															+    "    \n",
														
 
															+    "    # 独立模型列表\n",
														
 
															+    "    MODEL_COMPARE = [\n",
														
 
															+    "        \"qwen3-32b\",\n",
														
 
															+    "        \"qwen3-30b-a3b\",\n",
														
 
															+    "        \"qwen3-0.6b\"\n",
														
 
															+    "    ]\n",
														
 
															+    "    \n",
														
 
															+    "    def local_predict(text, model_name, max_retries=3):\n",
														
 
															+    "        \"\"\"独立预测函数\"\"\"\n",
														
 
															+    "        prompt = f\"判断情感倾向（仅输出positive/negative）：\\n{text}\"\n",
														
 
															+    "        for _ in range(max_retries):\n",
														
 
															+    "            try:\n",
														
 
															+    "                response = local_client.chat.completions.create(\n",
														
 
															+    "                    model=model_name,\n",
														
 
															+    "                    messages=[{\"role\": \"user\", \"content\": prompt}],\n",
														
 
															+    "                    temperature=0,\n",
														
 
															+    "                    extra_body={\"enable_thinking\": False}\n",
														
 
															+    "                )\n",
														
 
															+    "                return response.choices[0].message.content.strip().lower()\n",
														
 
															+    "            except Exception:\n",
														
 
															+    "                time.sleep(1)\n",
														
 
															+    "        return \"error\"\n",
														
 
															+    "    \n",
														
 
															+    "    # 独立数据加载\n",
														
 
															+    "    local_samples = load_dataset()  # 复用已有加载函数\n",
														
 
															+    "    \n",
														
 
															+    "    # 执行比较\n",
														
 
															+    "    report = []\n",
														
 
															+    "    for model in MODEL_COMPARE:\n",
														
 
															+    "        correct = 0\n",
														
 
															+    "        for sample in tqdm(local_samples, desc=f\"Testing {model}\"):\n",
														
 
															+    "            pred = local_predict(sample[\"text\"], model)\n",
														
 
															+    "            if pred == sample[\"true_label\"]:\n",
														
 
															+    "                correct += 1\n",
														
 
															+    "        report.append({\n",
														
 
															+    "            \"Model\": model,\n",
														
 
															+    "            \"Accuracy\": f\"{correct/len(local_samples):.2%}\",\n",
														
 
															+    "            \"Params\": model.split(\"-\")[1].upper()\n",
														
 
															+    "        })\n",
														
 
															+    "    \n",
														
 
															+    "    # 显示结果\n",
														
 
															+    "    return pd.DataFrame(report)"
														
 
															+   ]
														
 
															+  },
														
 
															+  {
														
 
															+   "cell_type": "code",
														
 
															+   "execution_count": 12,
														
 
															+   "id": "6e69bbb2",
														
 
															+   "metadata": {},
														
 
															+   "outputs": [
														
 
															+    {
														
 
															+     "data": {
														
 
															+      "application/vnd.jupyter.widget-view+json": {
														
 
															+       "model_id": "8f226078caad47a89f695eb7180f2e27",
														
 
															+       "version_major": 2,
														
 
															+       "version_minor": 0
														
 
															+      },
														
 
															+      "text/plain": [
														
 
															+       "Testing qwen3-32b:   0%|          | 0/21 [00:00<?, ?it/s]"
														
 
															+      ]
														
 
															+     },
														
 
															+     "metadata": {},
														
 
															+     "output_type": "display_data"
														
 
															+    },
														
 
															+    {
														
 
															+     "data": {
														
 
															+      "application/vnd.jupyter.widget-view+json": {
														
 
															+       "model_id": "729126a4fed14b558c36947f65d8d6c0",
														
 
															+       "version_major": 2,
														
 
															+       "version_minor": 0
														
 
															+      },
														
 
															+      "text/plain": [
														
 
															+       "Testing qwen3-30b-a3b:   0%|          | 0/21 [00:00<?, ?it/s]"
														
 
															+      ]
														
 
															+     },
														
 
															+     "metadata": {},
														
 
															+     "output_type": "display_data"
														
 
															+    },
														
 
															+    {
														
 
															+     "data": {
														
 
															+      "application/vnd.jupyter.widget-view+json": {
														
 
															+       "model_id": "b263d76ce04749b19e8d918b528a5fdb",
														
 
															+       "version_major": 2,
														
 
															+       "version_minor": 0
														
 
															+      },
														
 
															+      "text/plain": [
														
 
															+       "Testing qwen3-0.6b:   0%|          | 0/21 [00:00<?, ?it/s]"
														
 
															+      ]
														
 
															+     },
														
 
															+     "metadata": {},
														
 
															+     "output_type": "display_data"
														
 
															+    },
														
 
															+    {
														
 
															+     "data": {
														
 
															+      "text/html": [
														
 
															+       "<style type=\"text/css\">\n",
														
 
															+       "</style>\n",
														
 
															+       "<table id=\"T_e2e5a\">\n",
														
 
															+       "  <thead>\n",
														
 
															+       "    <tr>\n",
														
 
															+       "      <th id=\"T_e2e5a_level0_col0\" class=\"col_heading level0 col0\" >Model</th>\n",
														
 
															+       "      <th id=\"T_e2e5a_level0_col1\" class=\"col_heading level0 col1\" >Accuracy</th>\n",
														
 
															+       "      <th id=\"T_e2e5a_level0_col2\" class=\"col_heading level0 col2\" >Params</th>\n",
														
 
															+       "    </tr>\n",
														
 
															+       "  </thead>\n",
														
 
															+       "  <tbody>\n",
														
 
															+       "    <tr>\n",
														
 
															+       "      <td id=\"T_e2e5a_row0_col0\" class=\"data row0 col0\" >qwen3-32b</td>\n",
														
 
															+       "      <td id=\"T_e2e5a_row0_col1\" class=\"data row0 col1\" >90.48%</td>\n",
														
 
															+       "      <td id=\"T_e2e5a_row0_col2\" class=\"data row0 col2\" >32B</td>\n",
														
 
															+       "    </tr>\n",
														
 
															+       "    <tr>\n",
														
 
															+       "      <td id=\"T_e2e5a_row1_col0\" class=\"data row1 col0\" >qwen3-30b-a3b</td>\n",
														
 
															+       "      <td id=\"T_e2e5a_row1_col1\" class=\"data row1 col1\" >85.71%</td>\n",
														
 
															+       "      <td id=\"T_e2e5a_row1_col2\" class=\"data row1 col2\" >30B</td>\n",
														
 
															+       "    </tr>\n",
														
 
															+       "    <tr>\n",
														
 
															+       "      <td id=\"T_e2e5a_row2_col0\" class=\"data row2 col0\" >qwen3-0.6b</td>\n",
														
 
															+       "      <td id=\"T_e2e5a_row2_col1\" class=\"data row2 col1\" >47.62%</td>\n",
														
 
															+       "      <td id=\"T_e2e5a_row2_col2\" class=\"data row2 col2\" >0.6B</td>\n",
														
 
															+       "    </tr>\n",
														
 
															+       "  </tbody>\n",
														
 
															+       "</table>\n"
														
 
															+      ],
														
 
															+      "text/plain": [
														
 
															+       "<pandas.io.formats.style.Styler at 0x1c4fb54bd90>"
														
 
															+      ]
														
 
															+     },
														
 
															+     "execution_count": 12,
														
 
															+     "metadata": {},
														
 
															+     "output_type": "execute_result"
														
 
															+    }
														
 
															+   ],
														
 
															+   "source": [
														
 
															+    "result_df = compare_models()\n",
														
 
															+    "result_df.style.hide(axis=\"index\").format(precision=2)"
														
 
															+   ]
														
 
															+  },
														
 
															+  {
														
 
															+   "cell_type": "code",
														
 
															+   "execution_count": 32,
														
 
															+   "id": "e772335c",
														
 
															+   "metadata": {},
														
 
															+   "outputs": [],
														
 
															+   "source": [
														
 
															+    "def analyze_with_reason():\n",
														
 
															+    "    \"\"\"使用qwen3-0.6b进行带原因的情感分析\"\"\"\n",
														
 
															+    "    client = OpenAI(\n",
														
 
															+    "        base_url=os.getenv(\"BAILIAN_API_BASE_URL\"),\n",
														
 
															+    "        api_key=os.getenv(\"BAILIAN_API_KEY\")\n",
														
 
															+    "    )\n",
														
 
															+    "    \n",
														
 
															+    "    # 自定义工具定义\n",
														
 
															+    "    tools = [{\n",
														
 
															+    "        \"type\": \"function\",\n",
														
 
															+    "        \"function\": {\n",
														
 
															+    "            \"name\": \"sentiment_analysis\",\n",
														
 
															+    "            \"description\": \"情感分析结果及原因\",\n",
														
 
															+    "            \"parameters\": {\n",
														
 
															+    "                \"type\": \"object\",\n",
														
 
															+    "                \"properties\": {\n",
														
 
															+    "                    \"sentiment\": {\n",
														
 
															+    "                        \"type\": \"string\", \n",
														
 
															+    "                        \"enum\": [\"positive\", \"negative\"],\n",
														
 
															+    "                        \"description\": \"情感倾向\"\n",
														
 
															+    "                    },\n",
														
 
															+    "                    \"reason\": {\n",
														
 
															+    "                        \"type\": \"string\",\n",
														
 
															+    "                        \"description\": \"分析原因\"\n",
														
 
															+    "                    }\n",
														
 
															+    "                },\n",
														
 
															+    "                \"required\": [\"sentiment\", \"reason\"]\n",
														
 
															+    "            }\n",
														
 
															+    "        }\n",
														
 
															+    "    }]\n",
														
 
															+    "    \n",
														
 
															+    "    # 加载数据\n",
														
 
															+    "    samples = load_dataset()\n",
														
 
															+    "    error_cases = []\n",
														
 
															+    "    \n",
														
 
															+    "    for sample in tqdm(samples, desc=\"分析进度\"):\n",
														
 
															+    "        try:\n",
														
 
															+    "            # 工具调用预测\n",
														
 
															+    "            response = client.chat.completions.create(\n",
														
 
															+    "                model=\"qwen3-0.6b\",\n",
														
 
															+    "                messages=[{\n",
														
 
															+    "                    \"role\": \"user\", \n",
														
 
															+    "                    \"content\": f\"请分析以下影评情感并说明原因：\\n{sample['text']}\"\n",
														
 
															+    "                }],\n",
														
 
															+    "                tools=tools,\n",
														
 
															+    "                tool_choice={\"type\": \"function\", \"function\": {\"name\": \"sentiment_analysis\"}},\n",
														
 
															+    "                temperature=0,\n",
														
 
															+    "                extra_body={\"enable_thinking\": False}\n",
														
 
															+    "            )\n",
														
 
															+    "            \n",
														
 
															+    "            # 解析结果\n",
														
 
															+    "            args = json.loads(response.choices[0].message.tool_calls[0].function.arguments)\n",
														
 
															+    "            pred = args[\"sentiment\"]\n",
														
 
															+    "            reason = args[\"reason\"]\n",
														
 
															+    "            \n",
														
 
															+    "            # 记录错误案例\n",
														
 
															+    "            if pred != sample[\"true_label\"]:\n",
														
 
															+    "                error_cases.append({\n",
														
 
															+    "                    \"text\": sample[\"text\"],\n",
														
 
															+    "                    \"true_label\": sample[\"true_label\"],\n",
														
 
															+    "                    \"prediction\": pred,\n",
														
 
															+    "                    \"reason\": reason\n",
														
 
															+    "                })\n",
														
 
															+    "                \n",
														
 
															+    "        except Exception as e:\n",
														
 
															+    "            print(f\"分析失败：{str(e)}\")\n",
														
 
															+    "            continue\n",
														
 
															+    "    \n",
														
 
															+    "    # 保存错误案例\n",
														
 
															+    "    error_df = pd.DataFrame(error_cases)\n",
														
 
															+    "    error_df.to_csv(\"./error_analysis.csv\", index=False)\n",
														
 
															+    "    return error_df\n"
														
 
															+   ]
														
 
															+  },
														
 
															+  {
														
 
															+   "cell_type": "code",
														
 
															+   "execution_count": 33,
														
 
															+   "id": "e4fd9997",
														
 
															+   "metadata": {},
														
 
															+   "outputs": [
														
 
															+    {
														
 
															+     "data": {
														
 
															+      "application/vnd.jupyter.widget-view+json": {
														
 
															+       "model_id": "51954bc8a0624b2c8e0e0100edae6fc3",
														
 
															+       "version_major": 2,
														
 
															+       "version_minor": 0
														
 
															+      },
														
 
															+      "text/plain": [
														
 
															+       "分析进度:   0%|          | 0/21 [00:00<?, ?it/s]"
														
 
															+      ]
														
 
															+     },
														
 
															+     "metadata": {},
														
 
															+     "output_type": "display_data"
														
 
															+    },
														
 
															+    {
														
 
															+     "name": "stdout",
														
 
															+     "output_type": "stream",
														
 
															+     "text": [
														
 
															+      "分析失败：'sentiment'\n",
														
 
															+      "发现 4 个错误案例\n"
														
 
															+     ]
														
 
															+    },
														
 
															+    {
														
 
															+     "data": {
														
 
															+      "text/html": [
														
 
															+       "<div>\n",
														
 
															+       "<style scoped>\n",
														
 
															+       "    .dataframe tbody tr th:only-of-type {\n",
														
 
															+       "        vertical-align: middle;\n",
														
 
															+       "    }\n",
														
 
															+       "\n",
														
 
															+       "    .dataframe tbody tr th {\n",
														
 
															+       "        vertical-align: top;\n",
														
 
															+       "    }\n",
														
 
															+       "\n",
														
 
															+       "    .dataframe thead th {\n",
														
 
															+       "        text-align: right;\n",
														
 
															+       "    }\n",
														
 
															+       "</style>\n",
														
 
															+       "<table border=\"1\" class=\"dataframe\">\n",
														
 
															+       "  <thead>\n",
														
 
															+       "    <tr style=\"text-align: right;\">\n",
														
 
															+       "      <th></th>\n",
														
 
															+       "      <th>text</th>\n",
														
 
															+       "      <th>true_label</th>\n",
														
 
															+       "      <th>prediction</th>\n",
														
 
															+       "      <th>reason</th>\n",
														
 
															+       "    </tr>\n",
														
 
															+       "  </thead>\n",
														
 
															+       "  <tbody>\n",
														
 
															+       "    <tr>\n",
														
 
															+       "      <th>0</th>\n",
														
 
															+       "      <td>How many movies are there that you can think o...</td>\n",
														
 
															+       "      <td>positive</td>\n",
														
 
															+       "      <td>negative</td>\n",
														
 
															+       "      <td>The movie borrows so much from other movies, l...</td>\n",
														
 
															+       "    </tr>\n",
														
 
															+       "    <tr>\n",
														
 
															+       "      <th>1</th>\n",
														
 
															+       "      <td>I only went to see this movie because I have a...</td>\n",
														
 
															+       "      <td>positive</td>\n",
														
 
															+       "      <td>negative</td>\n",
														
 
															+       "      <td>The movie missed some acting moments, scenes w...</td>\n",
														
 
															+       "    </tr>\n",
														
 
															+       "    <tr>\n",
														
 
															+       "      <th>2</th>\n",
														
 
															+       "      <td>I'm a Petty Officer 1st Class (E-6) and have b...</td>\n",
														
 
															+       "      <td>positive</td>\n",
														
 
															+       "      <td>negative</td>\n",
														
 
															+       "      <td>The movie portrayed the Coast Guard in a way t...</td>\n",
														
 
															+       "    </tr>\n",
														
 
															+       "    <tr>\n",
														
 
															+       "      <th>3</th>\n",
														
 
															+       "      <td>Wow, another Kevin Costner hero movie. Postman...</td>\n",
														
 
															+       "      <td>negative</td>\n",
														
 
															+       "      <td>positive</td>\n",
														
 
															+       "      <td>The review highlights Kevin Costner's consiste...</td>\n",
														
 
															+       "    </tr>\n",
														
 
															+       "  </tbody>\n",
														
 
															+       "</table>\n",
														
 
															+       "</div>"
														
 
															+      ],
														
 
															+      "text/plain": [
														
 
															+       "                                                text true_label prediction  \\\n",
														
 
															+       "0  How many movies are there that you can think o...   positive   negative   \n",
														
 
															+       "1  I only went to see this movie because I have a...   positive   negative   \n",
														
 
															+       "2  I'm a Petty Officer 1st Class (E-6) and have b...   positive   negative   \n",
														
 
															+       "3  Wow, another Kevin Costner hero movie. Postman...   negative   positive   \n",
														
 
															+       "\n",
														
 
															+       "                                              reason  \n",
														
 
															+       "0  The movie borrows so much from other movies, l...  \n",
														
 
															+       "1  The movie missed some acting moments, scenes w...  \n",
														
 
															+       "2  The movie portrayed the Coast Guard in a way t...  \n",
														
 
															+       "3  The review highlights Kevin Costner's consiste...  "
														
 
															+      ]
														
 
															+     },
														
 
															+     "execution_count": 33,
														
 
															+     "metadata": {},
														
 
															+     "output_type": "execute_result"
														
 
															+    }
														
 
															+   ],
														
 
															+   "source": [
														
 
															+    "error_results = analyze_with_reason()\n",
														
 
															+    "print(f\"发现 {len(error_results)} 个错误案例\")\n",
														
 
															+    "error_results.head()"
														
 
															+   ]
														
 
															+  },
														
 
															+  {
														
 
															+   "cell_type": "code",
														
 
															+   "execution_count": null,
														
 
															+   "id": "cbb2aba7",
														
 
															+   "metadata": {},
														
 
															+   "outputs": [
														
 
															+    {
														
 
															+     "data": {
														
 
															+      "application/vnd.jupyter.widget-view+json": {
														
 
															+       "model_id": "eec3adfdc2424234b2df5e2e2d78a7b2",
														
 
															+       "version_major": 2,
														
 
															+       "version_minor": 0
														
 
															+      },
														
 
															+      "text/plain": [
														
 
															+       "分析进度:   0%|          | 0/21 [00:00<?, ?it/s]"
														
 
															+      ]
														
 
															+     },
														
 
															+     "metadata": {},
														
 
															+     "output_type": "display_data"
														
 
															+    },
														
 
															+    {
														
 
															+     "name": "stdout",
														
 
															+     "output_type": "stream",
														
 
															+     "text": [
														
 
															+      "发现 5 个错误案例\n"
														
 
															+     ]
														
 
															+    },
														
 
															+    {
														
 
															+     "data": {
														
 
															+      "text/html": [
														
 
															+       "<div>\n",
														
 
															+       "<style scoped>\n",
														
 
															+       "    .dataframe tbody tr th:only-of-type {\n",
														
 
															+       "        vertical-align: middle;\n",
														
 
															+       "    }\n",
														
 
															+       "\n",
														
 
															+       "    .dataframe tbody tr th {\n",
														
 
															+       "        vertical-align: top;\n",
														
 
															+       "    }\n",
														
 
															+       "\n",
														
 
															+       "    .dataframe thead th {\n",
														
 
															+       "        text-align: right;\n",
														
 
															+       "    }\n",
														
 
															+       "</style>\n",
														
 
															+       "<table border=\"1\" class=\"dataframe\">\n",
														
 
															+       "  <thead>\n",
														
 
															+       "    <tr style=\"text-align: right;\">\n",
														
 
															+       "      <th></th>\n",
														
 
															+       "      <th>text</th>\n",
														
 
															+       "      <th>true_label</th>\n",
														
 
															+       "      <th>prediction</th>\n",
														
 
															+       "      <th>reason</th>\n",
														
 
															+       "    </tr>\n",
														
 
															+       "  </thead>\n",
														
 
															+       "  <tbody>\n",
														
 
															+       "    <tr>\n",
														
 
															+       "      <th>0</th>\n",
														
 
															+       "      <td>How many movies are there that you can think o...</td>\n",
														
 
															+       "      <td>positive</td>\n",
														
 
															+       "      <td>negative</td>\n",
														
 
															+       "      <td>Borrowing so much from multiple movies creates...</td>\n",
														
 
															+       "    </tr>\n",
														
 
															+       "    <tr>\n",
														
 
															+       "      <th>1</th>\n",
														
 
															+       "      <td>I only went to see this movie because I have a...</td>\n",
														
 
															+       "      <td>positive</td>\n",
														
 
															+       "      <td>negative</td>\n",
														
 
															+       "      <td>Although some aspects of the movie have positi...</td>\n",
														
 
															+       "    </tr>\n",
														
 
															+       "    <tr>\n",
														
 
															+       "      <th>2</th>\n",
														
 
															+       "      <td>I'm a Petty Officer 1st Class (E-6) and have b...</td>\n",
														
 
															+       "      <td>positive</td>\n",
														
 
															+       "      <td>negative</td>\n",
														
 
															+       "      <td>The sentiment is negative.</td>\n",
														
 
															+       "    </tr>\n",
														
 
															+       "    <tr>\n",
														
 
															+       "      <th>3</th>\n",
														
 
															+       "      <td>I've seen this story before but my kids haven'...</td>\n",
														
 
															+       "      <td>positive</td>\n",
														
 
															+       "      <td>mixed</td>\n",
														
 
															+       "      <td>the review contains both positive and negative...</td>\n",
														
 
															+       "    </tr>\n",
														
 
															+       "    <tr>\n",
														
 
															+       "      <th>4</th>\n",
														
 
															+       "      <td>Wow, another Kevin Costner hero movie. Postman...</td>\n",
														
 
															+       "      <td>negative</td>\n",
														
 
															+       "      <td>positive</td>\n",
														
 
															+       "      <td>the movie is highlighted by its relatability, ...</td>\n",
														
 
															+       "    </tr>\n",
														
 
															+       "  </tbody>\n",
														
 
															+       "</table>\n",
														
 
															+       "</div>"
														
 
															+      ],
														
 
															+      "text/plain": [
														
 
															+       "                                                text true_label prediction  \\\n",
														
 
															+       "0  How many movies are there that you can think o...   positive   negative   \n",
														
 
															+       "1  I only went to see this movie because I have a...   positive   negative   \n",
														
 
															+       "2  I'm a Petty Officer 1st Class (E-6) and have b...   positive   negative   \n",
														
 
															+       "3  I've seen this story before but my kids haven'...   positive      mixed   \n",
														
 
															+       "4  Wow, another Kevin Costner hero movie. Postman...   negative   positive   \n",
														
 
															+       "\n",
														
 
															+       "                                              reason  \n",
														
 
															+       "0  Borrowing so much from multiple movies creates...  \n",
														
 
															+       "1  Although some aspects of the movie have positi...  \n",
														
 
															+       "2                         The sentiment is negative.  \n",
														
 
															+       "3  the review contains both positive and negative...  \n",
														
 
															+       "4  the movie is highlighted by its relatability, ...  "
														
 
															+      ]
														
 
															+     },
														
 
															+     "execution_count": 31,
														
 
															+     "metadata": {},
														
 
															+     "output_type": "execute_result"
														
 
															+    }
														
 
															+   ],
														
 
															+   "source": [
														
 
															+    "#修改temperature查看结果,从0修改至1.9\n",
														
 
															+    "error_results = analyze_with_reason()\n",
														
 
															+    "print(f\"发现 {len(error_results)} 个错误案例\")\n",
														
 
															+    "error_results.head()"
														
 
															+   ]
														
 
															+  }
														
 
															+ ],
														
 
															+ "metadata": {
														
 
															+  "kernelspec": {
														
 
															+   "display_name": "base",
														
 
															+   "language": "python",
														
 
															+   "name": "python3"
														
 
															+  },
														
 
															+  "language_info": {
														
 
															+   "codemirror_mode": {
														
 
															+    "name": "ipython",
														
 
															+    "version": 3
														
 
															+   },
														
 
															+   "file_extension": ".py",
														
 
															+   "mimetype": "text/x-python",
														
 
															+   "name": "python",
														
 
															+   "nbconvert_exporter": "python",
														
 
															+   "pygments_lexer": "ipython3",
														
 
															+   "version": "3.13.5"
														
 
															+  }
														
 
															+ },
														
 
															+ "nbformat": 4,
														
 
															+ "nbformat_minor": 5
														
 
															+}