|
@@ -0,0 +1,724 @@
|
|
|
|
+{
|
|
|
|
+ "cells": [
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 1,
|
|
|
|
+ "id": "691a012b",
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "from openai import OpenAI\n",
|
|
|
|
+ "from dotenv import load_dotenv \n",
|
|
|
|
+ "import glob\n",
|
|
|
|
+ "import json\n",
|
|
|
|
+ "import os\n",
|
|
|
|
+ "import time\n",
|
|
|
|
+ "from pydantic import BaseModel\n",
|
|
|
|
+ "from openai import BadRequestError\n",
|
|
|
|
+ "import pandas as pd\n",
|
|
|
|
+ "from sklearn.metrics import accuracy_score, precision_recall_fscore_support\n",
|
|
|
|
+ "from tqdm import tqdm\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "# 加载环境变量\n",
|
|
|
|
+ "load_dotenv() \n",
|
|
|
|
+ "\n",
|
|
|
|
+ "# 创建OpenAI客户端\n",
|
|
|
|
+ "client = OpenAI(base_url=os.getenv(\"BAILIAN_API_BASE_URL\"),\n",
|
|
|
|
+ " api_key=os.getenv(\"BAILIAN_API_KEY\"))\n",
|
|
|
|
+ "DATA_ROOT = \"D:/ai_learning-master/ai_learning/data/acllmdb_sentiment_small\"\n",
|
|
|
|
+ "SAVE_PATH = \"./prediction_results.csv\"\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "class SentimentRequest(BaseModel):\n",
|
|
|
|
+ " sentiment: str\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "\n"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": null,
|
|
|
|
+ "id": "fe312896",
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "def load_dataset():\n",
|
|
|
|
+ " \"\"\"加载数据集并返回带标签的样本列表\"\"\"\n",
|
|
|
|
+ " samples = []\n",
|
|
|
|
+ " for label in [\"positive\", \"negative\"]:\n",
|
|
|
|
+ " dir_path = os.path.join(DATA_ROOT, label)\n",
|
|
|
|
+ " for file_path in glob.glob(os.path.join(dir_path, \"*.txt\")):\n",
|
|
|
|
+ " with open(file_path, \"r\", encoding=\"utf-8\") as f:\n",
|
|
|
|
+ " samples.append({\n",
|
|
|
|
+ " \"text\": f.read(),\n",
|
|
|
|
+ " \"true_label\": label\n",
|
|
|
|
+ " })\n",
|
|
|
|
+ " return samples"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 5,
|
|
|
|
+ "id": "0cfb9303",
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "def predict_text(client, text, max_retries=3):\n",
|
|
|
|
+ " \"\"\"纯文本模式预测\"\"\"\n",
|
|
|
|
+ " prompt = f\"\"\"请判断以下影评的情感倾向,严格只输出英文单词positive或negative:\n",
|
|
|
|
+ "{text}\"\"\"\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " for _ in range(max_retries):\n",
|
|
|
|
+ " try:\n",
|
|
|
|
+ " response = client.chat.completions.create(\n",
|
|
|
|
+ " model=\"qwen3-4b\",\n",
|
|
|
|
+ " messages=[{\"role\": \"user\", \"content\": prompt}],\n",
|
|
|
|
+ " temperature=0,\n",
|
|
|
|
+ " extra_body={\"enable_thinking\": False}\n",
|
|
|
|
+ " )\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " return response.choices[0].message.content.strip().lower()\n",
|
|
|
|
+ " except BadRequestError:\n",
|
|
|
|
+ " time.sleep(1)\n",
|
|
|
|
+ " return \"error\"\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "def predict_json(client, text, max_retries=3):\n",
|
|
|
|
+ " \"\"\"JSON模式预测\"\"\"\n",
|
|
|
|
+ " prompt = f\"\"\"分析以下影评的情感倾向,使用JSON格式返回结果:\n",
|
|
|
|
+ "{text}\"\"\"\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " for _ in range(max_retries):\n",
|
|
|
|
+ " try:\n",
|
|
|
|
+ " response = client.chat.completions.create(\n",
|
|
|
|
+ " model=\"qwen3-4b\",\n",
|
|
|
|
+ " messages=[{\"role\": \"user\", \"content\": prompt}],\n",
|
|
|
|
+ " response_format={\"type\": \"json_object\"},\n",
|
|
|
|
+ " temperature=0,\n",
|
|
|
|
+ " extra_body={\"enable_thinking\": False}\n",
|
|
|
|
+ " )\n",
|
|
|
|
+ " result = json.loads(response.choices[0].message.content)\n",
|
|
|
|
+ " return SentimentRequest(**result).sentiment\n",
|
|
|
|
+ " except:\n",
|
|
|
|
+ " time.sleep(1)\n",
|
|
|
|
+ " return \"error\"\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "def predict_tool(client, text, max_retries=3):\n",
|
|
|
|
+ " \"\"\"工具调用模式预测\"\"\"\n",
|
|
|
|
+ " prompt = f\"\"\"请分析以下影评的情感倾向:\n",
|
|
|
|
+ "{text}\"\"\"\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " tools = [{\n",
|
|
|
|
+ " \"type\": \"function\",\n",
|
|
|
|
+ " \"function\": {\n",
|
|
|
|
+ " \"name\": \"sentiment_analysis\",\n",
|
|
|
|
+ " \"description\": \"情感分析结果\",\n",
|
|
|
|
+ " \"parameters\": {\n",
|
|
|
|
+ " \"type\": \"object\",\n",
|
|
|
|
+ " \"properties\": {\n",
|
|
|
|
+ " \"sentiment\": {\n",
|
|
|
|
+ " \"type\": \"string\", \n",
|
|
|
|
+ " \"enum\": [\"positive\", \"negative\"],\n",
|
|
|
|
+ " \"description\": \"情感倾向\"\n",
|
|
|
|
+ " }\n",
|
|
|
|
+ " },\n",
|
|
|
|
+ " \"required\": [\"sentiment\"]\n",
|
|
|
|
+ " }\n",
|
|
|
|
+ " }\n",
|
|
|
|
+ " }]\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " for _ in range(max_retries):\n",
|
|
|
|
+ " try:\n",
|
|
|
|
+ " response = client.chat.completions.create(\n",
|
|
|
|
+ " model=\"qwen3-4b\",\n",
|
|
|
|
+ " messages=[{\"role\": \"user\", \"content\": prompt}],\n",
|
|
|
|
+ " tools=tools,\n",
|
|
|
|
+ " tool_choice={\"type\": \"function\", \"function\": {\"name\": \"sentiment_analysis\"}},\n",
|
|
|
|
+ " temperature=0,\n",
|
|
|
|
+ " extra_body={\"enable_thinking\": False}\n",
|
|
|
|
+ " )\n",
|
|
|
|
+ " args = json.loads(response.choices[0].message.tool_calls[0].function.arguments)\n",
|
|
|
|
+ " return SentimentRequest(**args).sentiment\n",
|
|
|
|
+ " except:\n",
|
|
|
|
+ " time.sleep(1)\n",
|
|
|
|
+ " return \"error\"\n"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 6,
|
|
|
|
+ "id": "d0fa114e",
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [
|
|
|
|
+ {
|
|
|
|
+ "name": "stdout",
|
|
|
|
+ "output_type": "stream",
|
|
|
|
+ "text": [
|
|
|
|
+ "成功加载 21 条样本\n"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "name": "stderr",
|
|
|
|
+ "output_type": "stream",
|
|
|
|
+ "text": [
|
|
|
|
+ "预测进度: 100%|██████████| 21/21 [00:57<00:00, 2.72s/it]"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "name": "stdout",
|
|
|
|
+ "output_type": "stream",
|
|
|
|
+ "text": [
|
|
|
|
+ "预测结果已保存至:./prediction_results.csv\n",
|
|
|
|
+ "text模式准确率:90.48%\n",
|
|
|
|
+ "json模式准确率:90.48%\n",
|
|
|
|
+ "tool模式准确率:85.71%\n"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "name": "stderr",
|
|
|
|
+ "output_type": "stream",
|
|
|
|
+ "text": [
|
|
|
|
+ "\n"
|
|
|
|
+ ]
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
|
|
+ "source": [
|
|
|
|
+ "def main():\n",
|
|
|
|
+ " # 初始化客户端\n",
|
|
|
|
+ " client = OpenAI(\n",
|
|
|
|
+ " base_url=os.getenv(\"BAILIAN_API_BASE_URL\"),\n",
|
|
|
|
+ " api_key=os.getenv(\"BAILIAN_API_KEY\")\n",
|
|
|
|
+ " )\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " # 加载数据\n",
|
|
|
|
+ " samples = load_dataset()\n",
|
|
|
|
+ " print(f\"成功加载 {len(samples)} 条样本\")\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " # 执行预测\n",
|
|
|
|
+ " results = []\n",
|
|
|
|
+ " for sample in tqdm(samples, desc=\"预测进度\"):\n",
|
|
|
|
+ " try:\n",
|
|
|
|
+ " pred_text = predict_text(client, sample[\"text\"])\n",
|
|
|
|
+ " pred_json = predict_json(client, sample[\"text\"])\n",
|
|
|
|
+ " pred_tool = predict_tool(client, sample[\"text\"])\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " results.append({\n",
|
|
|
|
+ " \"text\": sample[\"text\"],\n",
|
|
|
|
+ " \"true_label\": sample[\"true_label\"],\n",
|
|
|
|
+ " \"text_pred\": pred_text,\n",
|
|
|
|
+ " \"json_pred\": pred_json,\n",
|
|
|
|
+ " \"tool_pred\": pred_tool\n",
|
|
|
|
+ " })\n",
|
|
|
|
+ " except Exception as e:\n",
|
|
|
|
+ " print(f\"预测失败:{str(e)}\")\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " # 保存结果\n",
|
|
|
|
+ " df = pd.DataFrame(results)\n",
|
|
|
|
+ " df.to_csv(SAVE_PATH, index=False)\n",
|
|
|
|
+ " print(f\"预测结果已保存至:{SAVE_PATH}\")\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " # 计算指标\n",
|
|
|
|
+ " for mode in [\"text_pred\",\"json_pred\",\"tool_pred\"]:\n",
|
|
|
|
+ " y_true = df[\"true_label\"]\n",
|
|
|
|
+ " y_pred = df[mode]\n",
|
|
|
|
+ " accuracy = accuracy_score(y_true, y_pred)\n",
|
|
|
|
+ " print(f\"{mode.split('_')[0]}模式准确率:{accuracy:.2%}\")\n",
|
|
|
|
+ "if __name__ == \"__main__\":\n",
|
|
|
|
+ " main()"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": null,
|
|
|
|
+ "id": "f6048b3a",
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "# 新单元格1:定义独立函数\n",
|
|
|
|
+ "from tqdm.notebook import tqdm\n",
|
|
|
|
+ "from openai import OpenAI\n",
|
|
|
|
+ "import pandas as pd\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "def compare_models():\n",
|
|
|
|
+ " \"\"\"独立模型比较函数\"\"\"\n",
|
|
|
|
+ " # 初始化独立客户端\n",
|
|
|
|
+ " local_client = OpenAI(\n",
|
|
|
|
+ " base_url=os.getenv(\"BAILIAN_API_BASE_URL\"),\n",
|
|
|
|
+ " api_key=os.getenv(\"BAILIAN_API_KEY\")\n",
|
|
|
|
+ " )\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " # 独立模型列表\n",
|
|
|
|
+ " MODEL_COMPARE = [\n",
|
|
|
|
+ " \"qwen3-32b\",\n",
|
|
|
|
+ " \"qwen3-30b-a3b\",\n",
|
|
|
|
+ " \"qwen3-0.6b\"\n",
|
|
|
|
+ " ]\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " def local_predict(text, model_name, max_retries=3):\n",
|
|
|
|
+ " \"\"\"独立预测函数\"\"\"\n",
|
|
|
|
+ " prompt = f\"判断情感倾向(仅输出positive/negative):\\n{text}\"\n",
|
|
|
|
+ " for _ in range(max_retries):\n",
|
|
|
|
+ " try:\n",
|
|
|
|
+ " response = local_client.chat.completions.create(\n",
|
|
|
|
+ " model=model_name,\n",
|
|
|
|
+ " messages=[{\"role\": \"user\", \"content\": prompt}],\n",
|
|
|
|
+ " temperature=0,\n",
|
|
|
|
+ " extra_body={\"enable_thinking\": False}\n",
|
|
|
|
+ " )\n",
|
|
|
|
+ " return response.choices[0].message.content.strip().lower()\n",
|
|
|
|
+ " except Exception:\n",
|
|
|
|
+ " time.sleep(1)\n",
|
|
|
|
+ " return \"error\"\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " # 独立数据加载\n",
|
|
|
|
+ " local_samples = load_dataset() # 复用已有加载函数\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " # 执行比较\n",
|
|
|
|
+ " report = []\n",
|
|
|
|
+ " for model in MODEL_COMPARE:\n",
|
|
|
|
+ " correct = 0\n",
|
|
|
|
+ " for sample in tqdm(local_samples, desc=f\"Testing {model}\"):\n",
|
|
|
|
+ " pred = local_predict(sample[\"text\"], model)\n",
|
|
|
|
+ " if pred == sample[\"true_label\"]:\n",
|
|
|
|
+ " correct += 1\n",
|
|
|
|
+ " report.append({\n",
|
|
|
|
+ " \"Model\": model,\n",
|
|
|
|
+ " \"Accuracy\": f\"{correct/len(local_samples):.2%}\",\n",
|
|
|
|
+ " \"Params\": model.split(\"-\")[1].upper()\n",
|
|
|
|
+ " })\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " # 显示结果\n",
|
|
|
|
+ " return pd.DataFrame(report)"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 12,
|
|
|
|
+ "id": "6e69bbb2",
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [
|
|
|
|
+ {
|
|
|
|
+ "data": {
|
|
|
|
+ "application/vnd.jupyter.widget-view+json": {
|
|
|
|
+ "model_id": "8f226078caad47a89f695eb7180f2e27",
|
|
|
|
+ "version_major": 2,
|
|
|
|
+ "version_minor": 0
|
|
|
|
+ },
|
|
|
|
+ "text/plain": [
|
|
|
|
+ "Testing qwen3-32b: 0%| | 0/21 [00:00<?, ?it/s]"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "output_type": "display_data"
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "data": {
|
|
|
|
+ "application/vnd.jupyter.widget-view+json": {
|
|
|
|
+ "model_id": "729126a4fed14b558c36947f65d8d6c0",
|
|
|
|
+ "version_major": 2,
|
|
|
|
+ "version_minor": 0
|
|
|
|
+ },
|
|
|
|
+ "text/plain": [
|
|
|
|
+ "Testing qwen3-30b-a3b: 0%| | 0/21 [00:00<?, ?it/s]"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "output_type": "display_data"
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "data": {
|
|
|
|
+ "application/vnd.jupyter.widget-view+json": {
|
|
|
|
+ "model_id": "b263d76ce04749b19e8d918b528a5fdb",
|
|
|
|
+ "version_major": 2,
|
|
|
|
+ "version_minor": 0
|
|
|
|
+ },
|
|
|
|
+ "text/plain": [
|
|
|
|
+ "Testing qwen3-0.6b: 0%| | 0/21 [00:00<?, ?it/s]"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "output_type": "display_data"
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "data": {
|
|
|
|
+ "text/html": [
|
|
|
|
+ "<style type=\"text/css\">\n",
|
|
|
|
+ "</style>\n",
|
|
|
|
+ "<table id=\"T_e2e5a\">\n",
|
|
|
|
+ " <thead>\n",
|
|
|
|
+ " <tr>\n",
|
|
|
|
+ " <th id=\"T_e2e5a_level0_col0\" class=\"col_heading level0 col0\" >Model</th>\n",
|
|
|
|
+ " <th id=\"T_e2e5a_level0_col1\" class=\"col_heading level0 col1\" >Accuracy</th>\n",
|
|
|
|
+ " <th id=\"T_e2e5a_level0_col2\" class=\"col_heading level0 col2\" >Params</th>\n",
|
|
|
|
+ " </tr>\n",
|
|
|
|
+ " </thead>\n",
|
|
|
|
+ " <tbody>\n",
|
|
|
|
+ " <tr>\n",
|
|
|
|
+ " <td id=\"T_e2e5a_row0_col0\" class=\"data row0 col0\" >qwen3-32b</td>\n",
|
|
|
|
+ " <td id=\"T_e2e5a_row0_col1\" class=\"data row0 col1\" >90.48%</td>\n",
|
|
|
|
+ " <td id=\"T_e2e5a_row0_col2\" class=\"data row0 col2\" >32B</td>\n",
|
|
|
|
+ " </tr>\n",
|
|
|
|
+ " <tr>\n",
|
|
|
|
+ " <td id=\"T_e2e5a_row1_col0\" class=\"data row1 col0\" >qwen3-30b-a3b</td>\n",
|
|
|
|
+ " <td id=\"T_e2e5a_row1_col1\" class=\"data row1 col1\" >85.71%</td>\n",
|
|
|
|
+ " <td id=\"T_e2e5a_row1_col2\" class=\"data row1 col2\" >30B</td>\n",
|
|
|
|
+ " </tr>\n",
|
|
|
|
+ " <tr>\n",
|
|
|
|
+ " <td id=\"T_e2e5a_row2_col0\" class=\"data row2 col0\" >qwen3-0.6b</td>\n",
|
|
|
|
+ " <td id=\"T_e2e5a_row2_col1\" class=\"data row2 col1\" >47.62%</td>\n",
|
|
|
|
+ " <td id=\"T_e2e5a_row2_col2\" class=\"data row2 col2\" >0.6B</td>\n",
|
|
|
|
+ " </tr>\n",
|
|
|
|
+ " </tbody>\n",
|
|
|
|
+ "</table>\n"
|
|
|
|
+ ],
|
|
|
|
+ "text/plain": [
|
|
|
|
+ "<pandas.io.formats.style.Styler at 0x1c4fb54bd90>"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ "execution_count": 12,
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "output_type": "execute_result"
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
|
|
+ "source": [
|
|
|
|
+ "result_df = compare_models()\n",
|
|
|
|
+ "result_df.style.hide(axis=\"index\").format(precision=2)"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 32,
|
|
|
|
+ "id": "e772335c",
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "def analyze_with_reason():\n",
|
|
|
|
+ " \"\"\"使用qwen3-0.6b进行带原因的情感分析\"\"\"\n",
|
|
|
|
+ " client = OpenAI(\n",
|
|
|
|
+ " base_url=os.getenv(\"BAILIAN_API_BASE_URL\"),\n",
|
|
|
|
+ " api_key=os.getenv(\"BAILIAN_API_KEY\")\n",
|
|
|
|
+ " )\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " # 自定义工具定义\n",
|
|
|
|
+ " tools = [{\n",
|
|
|
|
+ " \"type\": \"function\",\n",
|
|
|
|
+ " \"function\": {\n",
|
|
|
|
+ " \"name\": \"sentiment_analysis\",\n",
|
|
|
|
+ " \"description\": \"情感分析结果及原因\",\n",
|
|
|
|
+ " \"parameters\": {\n",
|
|
|
|
+ " \"type\": \"object\",\n",
|
|
|
|
+ " \"properties\": {\n",
|
|
|
|
+ " \"sentiment\": {\n",
|
|
|
|
+ " \"type\": \"string\", \n",
|
|
|
|
+ " \"enum\": [\"positive\", \"negative\"],\n",
|
|
|
|
+ " \"description\": \"情感倾向\"\n",
|
|
|
|
+ " },\n",
|
|
|
|
+ " \"reason\": {\n",
|
|
|
|
+ " \"type\": \"string\",\n",
|
|
|
|
+ " \"description\": \"分析原因\"\n",
|
|
|
|
+ " }\n",
|
|
|
|
+ " },\n",
|
|
|
|
+ " \"required\": [\"sentiment\", \"reason\"]\n",
|
|
|
|
+ " }\n",
|
|
|
|
+ " }\n",
|
|
|
|
+ " }]\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " # 加载数据\n",
|
|
|
|
+ " samples = load_dataset()\n",
|
|
|
|
+ " error_cases = []\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " for sample in tqdm(samples, desc=\"分析进度\"):\n",
|
|
|
|
+ " try:\n",
|
|
|
|
+ " # 工具调用预测\n",
|
|
|
|
+ " response = client.chat.completions.create(\n",
|
|
|
|
+ " model=\"qwen3-0.6b\",\n",
|
|
|
|
+ " messages=[{\n",
|
|
|
|
+ " \"role\": \"user\", \n",
|
|
|
|
+ " \"content\": f\"请分析以下影评情感并说明原因:\\n{sample['text']}\"\n",
|
|
|
|
+ " }],\n",
|
|
|
|
+ " tools=tools,\n",
|
|
|
|
+ " tool_choice={\"type\": \"function\", \"function\": {\"name\": \"sentiment_analysis\"}},\n",
|
|
|
|
+ " temperature=0,\n",
|
|
|
|
+ " extra_body={\"enable_thinking\": False}\n",
|
|
|
|
+ " )\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " # 解析结果\n",
|
|
|
|
+ " args = json.loads(response.choices[0].message.tool_calls[0].function.arguments)\n",
|
|
|
|
+ " pred = args[\"sentiment\"]\n",
|
|
|
|
+ " reason = args[\"reason\"]\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " # 记录错误案例\n",
|
|
|
|
+ " if pred != sample[\"true_label\"]:\n",
|
|
|
|
+ " error_cases.append({\n",
|
|
|
|
+ " \"text\": sample[\"text\"],\n",
|
|
|
|
+ " \"true_label\": sample[\"true_label\"],\n",
|
|
|
|
+ " \"prediction\": pred,\n",
|
|
|
|
+ " \"reason\": reason\n",
|
|
|
|
+ " })\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " except Exception as e:\n",
|
|
|
|
+ " print(f\"分析失败:{str(e)}\")\n",
|
|
|
|
+ " continue\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " # 保存错误案例\n",
|
|
|
|
+ " error_df = pd.DataFrame(error_cases)\n",
|
|
|
|
+ " error_df.to_csv(\"./error_analysis.csv\", index=False)\n",
|
|
|
|
+ " return error_df\n"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 33,
|
|
|
|
+ "id": "e4fd9997",
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [
|
|
|
|
+ {
|
|
|
|
+ "data": {
|
|
|
|
+ "application/vnd.jupyter.widget-view+json": {
|
|
|
|
+ "model_id": "51954bc8a0624b2c8e0e0100edae6fc3",
|
|
|
|
+ "version_major": 2,
|
|
|
|
+ "version_minor": 0
|
|
|
|
+ },
|
|
|
|
+ "text/plain": [
|
|
|
|
+ "分析进度: 0%| | 0/21 [00:00<?, ?it/s]"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "output_type": "display_data"
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "name": "stdout",
|
|
|
|
+ "output_type": "stream",
|
|
|
|
+ "text": [
|
|
|
|
+ "分析失败:'sentiment'\n",
|
|
|
|
+ "发现 4 个错误案例\n"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "data": {
|
|
|
|
+ "text/html": [
|
|
|
|
+ "<div>\n",
|
|
|
|
+ "<style scoped>\n",
|
|
|
|
+ " .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
+ " vertical-align: middle;\n",
|
|
|
|
+ " }\n",
|
|
|
|
+ "\n",
|
|
|
|
+ " .dataframe tbody tr th {\n",
|
|
|
|
+ " vertical-align: top;\n",
|
|
|
|
+ " }\n",
|
|
|
|
+ "\n",
|
|
|
|
+ " .dataframe thead th {\n",
|
|
|
|
+ " text-align: right;\n",
|
|
|
|
+ " }\n",
|
|
|
|
+ "</style>\n",
|
|
|
|
+ "<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
+ " <thead>\n",
|
|
|
|
+ " <tr style=\"text-align: right;\">\n",
|
|
|
|
+ " <th></th>\n",
|
|
|
|
+ " <th>text</th>\n",
|
|
|
|
+ " <th>true_label</th>\n",
|
|
|
|
+ " <th>prediction</th>\n",
|
|
|
|
+ " <th>reason</th>\n",
|
|
|
|
+ " </tr>\n",
|
|
|
|
+ " </thead>\n",
|
|
|
|
+ " <tbody>\n",
|
|
|
|
+ " <tr>\n",
|
|
|
|
+ " <th>0</th>\n",
|
|
|
|
+ " <td>How many movies are there that you can think o...</td>\n",
|
|
|
|
+ " <td>positive</td>\n",
|
|
|
|
+ " <td>negative</td>\n",
|
|
|
|
+ " <td>The movie borrows so much from other movies, l...</td>\n",
|
|
|
|
+ " </tr>\n",
|
|
|
|
+ " <tr>\n",
|
|
|
|
+ " <th>1</th>\n",
|
|
|
|
+ " <td>I only went to see this movie because I have a...</td>\n",
|
|
|
|
+ " <td>positive</td>\n",
|
|
|
|
+ " <td>negative</td>\n",
|
|
|
|
+ " <td>The movie missed some acting moments, scenes w...</td>\n",
|
|
|
|
+ " </tr>\n",
|
|
|
|
+ " <tr>\n",
|
|
|
|
+ " <th>2</th>\n",
|
|
|
|
+ " <td>I'm a Petty Officer 1st Class (E-6) and have b...</td>\n",
|
|
|
|
+ " <td>positive</td>\n",
|
|
|
|
+ " <td>negative</td>\n",
|
|
|
|
+ " <td>The movie portrayed the Coast Guard in a way t...</td>\n",
|
|
|
|
+ " </tr>\n",
|
|
|
|
+ " <tr>\n",
|
|
|
|
+ " <th>3</th>\n",
|
|
|
|
+ " <td>Wow, another Kevin Costner hero movie. Postman...</td>\n",
|
|
|
|
+ " <td>negative</td>\n",
|
|
|
|
+ " <td>positive</td>\n",
|
|
|
|
+ " <td>The review highlights Kevin Costner's consiste...</td>\n",
|
|
|
|
+ " </tr>\n",
|
|
|
|
+ " </tbody>\n",
|
|
|
|
+ "</table>\n",
|
|
|
|
+ "</div>"
|
|
|
|
+ ],
|
|
|
|
+ "text/plain": [
|
|
|
|
+ " text true_label prediction \\\n",
|
|
|
|
+ "0 How many movies are there that you can think o... positive negative \n",
|
|
|
|
+ "1 I only went to see this movie because I have a... positive negative \n",
|
|
|
|
+ "2 I'm a Petty Officer 1st Class (E-6) and have b... positive negative \n",
|
|
|
|
+ "3 Wow, another Kevin Costner hero movie. Postman... negative positive \n",
|
|
|
|
+ "\n",
|
|
|
|
+ " reason \n",
|
|
|
|
+ "0 The movie borrows so much from other movies, l... \n",
|
|
|
|
+ "1 The movie missed some acting moments, scenes w... \n",
|
|
|
|
+ "2 The movie portrayed the Coast Guard in a way t... \n",
|
|
|
|
+ "3 The review highlights Kevin Costner's consiste... "
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ "execution_count": 33,
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "output_type": "execute_result"
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
|
|
+ "source": [
|
|
|
|
+ "error_results = analyze_with_reason()\n",
|
|
|
|
+ "print(f\"发现 {len(error_results)} 个错误案例\")\n",
|
|
|
|
+ "error_results.head()"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": null,
|
|
|
|
+ "id": "cbb2aba7",
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [
|
|
|
|
+ {
|
|
|
|
+ "data": {
|
|
|
|
+ "application/vnd.jupyter.widget-view+json": {
|
|
|
|
+ "model_id": "eec3adfdc2424234b2df5e2e2d78a7b2",
|
|
|
|
+ "version_major": 2,
|
|
|
|
+ "version_minor": 0
|
|
|
|
+ },
|
|
|
|
+ "text/plain": [
|
|
|
|
+ "分析进度: 0%| | 0/21 [00:00<?, ?it/s]"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "output_type": "display_data"
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "name": "stdout",
|
|
|
|
+ "output_type": "stream",
|
|
|
|
+ "text": [
|
|
|
|
+ "发现 5 个错误案例\n"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "data": {
|
|
|
|
+ "text/html": [
|
|
|
|
+ "<div>\n",
|
|
|
|
+ "<style scoped>\n",
|
|
|
|
+ " .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
+ " vertical-align: middle;\n",
|
|
|
|
+ " }\n",
|
|
|
|
+ "\n",
|
|
|
|
+ " .dataframe tbody tr th {\n",
|
|
|
|
+ " vertical-align: top;\n",
|
|
|
|
+ " }\n",
|
|
|
|
+ "\n",
|
|
|
|
+ " .dataframe thead th {\n",
|
|
|
|
+ " text-align: right;\n",
|
|
|
|
+ " }\n",
|
|
|
|
+ "</style>\n",
|
|
|
|
+ "<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
+ " <thead>\n",
|
|
|
|
+ " <tr style=\"text-align: right;\">\n",
|
|
|
|
+ " <th></th>\n",
|
|
|
|
+ " <th>text</th>\n",
|
|
|
|
+ " <th>true_label</th>\n",
|
|
|
|
+ " <th>prediction</th>\n",
|
|
|
|
+ " <th>reason</th>\n",
|
|
|
|
+ " </tr>\n",
|
|
|
|
+ " </thead>\n",
|
|
|
|
+ " <tbody>\n",
|
|
|
|
+ " <tr>\n",
|
|
|
|
+ " <th>0</th>\n",
|
|
|
|
+ " <td>How many movies are there that you can think o...</td>\n",
|
|
|
|
+ " <td>positive</td>\n",
|
|
|
|
+ " <td>negative</td>\n",
|
|
|
|
+ " <td>Borrowing so much from multiple movies creates...</td>\n",
|
|
|
|
+ " </tr>\n",
|
|
|
|
+ " <tr>\n",
|
|
|
|
+ " <th>1</th>\n",
|
|
|
|
+ " <td>I only went to see this movie because I have a...</td>\n",
|
|
|
|
+ " <td>positive</td>\n",
|
|
|
|
+ " <td>negative</td>\n",
|
|
|
|
+ " <td>Although some aspects of the movie have positi...</td>\n",
|
|
|
|
+ " </tr>\n",
|
|
|
|
+ " <tr>\n",
|
|
|
|
+ " <th>2</th>\n",
|
|
|
|
+ " <td>I'm a Petty Officer 1st Class (E-6) and have b...</td>\n",
|
|
|
|
+ " <td>positive</td>\n",
|
|
|
|
+ " <td>negative</td>\n",
|
|
|
|
+ " <td>The sentiment is negative.</td>\n",
|
|
|
|
+ " </tr>\n",
|
|
|
|
+ " <tr>\n",
|
|
|
|
+ " <th>3</th>\n",
|
|
|
|
+ " <td>I've seen this story before but my kids haven'...</td>\n",
|
|
|
|
+ " <td>positive</td>\n",
|
|
|
|
+ " <td>mixed</td>\n",
|
|
|
|
+ " <td>the review contains both positive and negative...</td>\n",
|
|
|
|
+ " </tr>\n",
|
|
|
|
+ " <tr>\n",
|
|
|
|
+ " <th>4</th>\n",
|
|
|
|
+ " <td>Wow, another Kevin Costner hero movie. Postman...</td>\n",
|
|
|
|
+ " <td>negative</td>\n",
|
|
|
|
+ " <td>positive</td>\n",
|
|
|
|
+ " <td>the movie is highlighted by its relatability, ...</td>\n",
|
|
|
|
+ " </tr>\n",
|
|
|
|
+ " </tbody>\n",
|
|
|
|
+ "</table>\n",
|
|
|
|
+ "</div>"
|
|
|
|
+ ],
|
|
|
|
+ "text/plain": [
|
|
|
|
+ " text true_label prediction \\\n",
|
|
|
|
+ "0 How many movies are there that you can think o... positive negative \n",
|
|
|
|
+ "1 I only went to see this movie because I have a... positive negative \n",
|
|
|
|
+ "2 I'm a Petty Officer 1st Class (E-6) and have b... positive negative \n",
|
|
|
|
+ "3 I've seen this story before but my kids haven'... positive mixed \n",
|
|
|
|
+ "4 Wow, another Kevin Costner hero movie. Postman... negative positive \n",
|
|
|
|
+ "\n",
|
|
|
|
+ " reason \n",
|
|
|
|
+ "0 Borrowing so much from multiple movies creates... \n",
|
|
|
|
+ "1 Although some aspects of the movie have positi... \n",
|
|
|
|
+ "2 The sentiment is negative. \n",
|
|
|
|
+ "3 the review contains both positive and negative... \n",
|
|
|
|
+ "4 the movie is highlighted by its relatability, ... "
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ "execution_count": 31,
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "output_type": "execute_result"
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
|
|
+ "source": [
|
|
|
|
+ "#修改temperature查看结果,从0修改至1.9\n",
|
|
|
|
+ "error_results = analyze_with_reason()\n",
|
|
|
|
+ "print(f\"发现 {len(error_results)} 个错误案例\")\n",
|
|
|
|
+ "error_results.head()"
|
|
|
|
+ ]
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
|
|
+ "metadata": {
|
|
|
|
+ "kernelspec": {
|
|
|
|
+ "display_name": "base",
|
|
|
|
+ "language": "python",
|
|
|
|
+ "name": "python3"
|
|
|
|
+ },
|
|
|
|
+ "language_info": {
|
|
|
|
+ "codemirror_mode": {
|
|
|
|
+ "name": "ipython",
|
|
|
|
+ "version": 3
|
|
|
|
+ },
|
|
|
|
+ "file_extension": ".py",
|
|
|
|
+ "mimetype": "text/x-python",
|
|
|
|
+ "name": "python",
|
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
|
+ "version": "3.13.5"
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ "nbformat": 4,
|
|
|
|
+ "nbformat_minor": 5
|
|
|
|
+}
|