{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "691a012b",
"metadata": {},
"outputs": [],
"source": [
"from openai import OpenAI\n",
"from dotenv import load_dotenv \n",
"import glob\n",
"import json\n",
"import os\n",
"import time\n",
"from pydantic import BaseModel\n",
"from openai import BadRequestError\n",
"import pandas as pd\n",
"from sklearn.metrics import accuracy_score, precision_recall_fscore_support\n",
"from tqdm import tqdm\n",
"\n",
"# 加载环境变量\n",
"load_dotenv() \n",
"\n",
"# 创建OpenAI客户端\n",
"client = OpenAI(base_url=os.getenv(\"BAILIAN_API_BASE_URL\"),\n",
" api_key=os.getenv(\"BAILIAN_API_KEY\"))\n",
"DATA_ROOT = \"D:/ai_learning-master/ai_learning/data/acllmdb_sentiment_small\"\n",
"SAVE_PATH = \"./prediction_results.csv\"\n",
"\n",
"class SentimentRequest(BaseModel):\n",
" sentiment: str\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe312896",
"metadata": {},
"outputs": [],
"source": [
"def load_dataset():\n",
" \"\"\"加载数据集并返回带标签的样本列表\"\"\"\n",
" samples = []\n",
" for label in [\"positive\", \"negative\"]:\n",
" dir_path = os.path.join(DATA_ROOT, label)\n",
" for file_path in glob.glob(os.path.join(dir_path, \"*.txt\")):\n",
" with open(file_path, \"r\", encoding=\"utf-8\") as f:\n",
" samples.append({\n",
" \"text\": f.read(),\n",
" \"true_label\": label\n",
" })\n",
" return samples"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "0cfb9303",
"metadata": {},
"outputs": [],
"source": [
"def predict_text(client, text, max_retries=3):\n",
" \"\"\"纯文本模式预测\"\"\"\n",
" prompt = f\"\"\"请判断以下影评的情感倾向,严格只输出英文单词positive或negative:\n",
"{text}\"\"\"\n",
" \n",
" for _ in range(max_retries):\n",
" try:\n",
" response = client.chat.completions.create(\n",
" model=\"qwen3-4b\",\n",
" messages=[{\"role\": \"user\", \"content\": prompt}],\n",
" temperature=0,\n",
" extra_body={\"enable_thinking\": False}\n",
" )\n",
" \n",
" return response.choices[0].message.content.strip().lower()\n",
" except BadRequestError:\n",
" time.sleep(1)\n",
" return \"error\"\n",
"\n",
"def predict_json(client, text, max_retries=3):\n",
" \"\"\"JSON模式预测\"\"\"\n",
" prompt = f\"\"\"分析以下影评的情感倾向,使用JSON格式返回结果:\n",
"{text}\"\"\"\n",
" \n",
" for _ in range(max_retries):\n",
" try:\n",
" response = client.chat.completions.create(\n",
" model=\"qwen3-4b\",\n",
" messages=[{\"role\": \"user\", \"content\": prompt}],\n",
" response_format={\"type\": \"json_object\"},\n",
" temperature=0,\n",
" extra_body={\"enable_thinking\": False}\n",
" )\n",
" result = json.loads(response.choices[0].message.content)\n",
" return SentimentRequest(**result).sentiment\n",
" except:\n",
" time.sleep(1)\n",
" return \"error\"\n",
"\n",
"def predict_tool(client, text, max_retries=3):\n",
" \"\"\"工具调用模式预测\"\"\"\n",
" prompt = f\"\"\"请分析以下影评的情感倾向:\n",
"{text}\"\"\"\n",
" \n",
" tools = [{\n",
" \"type\": \"function\",\n",
" \"function\": {\n",
" \"name\": \"sentiment_analysis\",\n",
" \"description\": \"情感分析结果\",\n",
" \"parameters\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"sentiment\": {\n",
" \"type\": \"string\", \n",
" \"enum\": [\"positive\", \"negative\"],\n",
" \"description\": \"情感倾向\"\n",
" }\n",
" },\n",
" \"required\": [\"sentiment\"]\n",
" }\n",
" }\n",
" }]\n",
" \n",
" for _ in range(max_retries):\n",
" try:\n",
" response = client.chat.completions.create(\n",
" model=\"qwen3-4b\",\n",
" messages=[{\"role\": \"user\", \"content\": prompt}],\n",
" tools=tools,\n",
" tool_choice={\"type\": \"function\", \"function\": {\"name\": \"sentiment_analysis\"}},\n",
" temperature=0,\n",
" extra_body={\"enable_thinking\": False}\n",
" )\n",
" args = json.loads(response.choices[0].message.tool_calls[0].function.arguments)\n",
" return SentimentRequest(**args).sentiment\n",
" except:\n",
" time.sleep(1)\n",
" return \"error\"\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d0fa114e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"成功加载 21 条样本\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"预测进度: 100%|██████████| 21/21 [00:57<00:00, 2.72s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"预测结果已保存至:./prediction_results.csv\n",
"text模式准确率:90.48%\n",
"json模式准确率:90.48%\n",
"tool模式准确率:85.71%\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"def main():\n",
" # 初始化客户端\n",
" client = OpenAI(\n",
" base_url=os.getenv(\"BAILIAN_API_BASE_URL\"),\n",
" api_key=os.getenv(\"BAILIAN_API_KEY\")\n",
" )\n",
" \n",
" # 加载数据\n",
" samples = load_dataset()\n",
" print(f\"成功加载 {len(samples)} 条样本\")\n",
" \n",
" # 执行预测\n",
" results = []\n",
" for sample in tqdm(samples, desc=\"预测进度\"):\n",
" try:\n",
" pred_text = predict_text(client, sample[\"text\"])\n",
" pred_json = predict_json(client, sample[\"text\"])\n",
" pred_tool = predict_tool(client, sample[\"text\"])\n",
" \n",
" results.append({\n",
" \"text\": sample[\"text\"],\n",
" \"true_label\": sample[\"true_label\"],\n",
" \"text_pred\": pred_text,\n",
" \"json_pred\": pred_json,\n",
" \"tool_pred\": pred_tool\n",
" })\n",
" except Exception as e:\n",
" print(f\"预测失败:{str(e)}\")\n",
" \n",
" # 保存结果\n",
" df = pd.DataFrame(results)\n",
" df.to_csv(SAVE_PATH, index=False)\n",
" print(f\"预测结果已保存至:{SAVE_PATH}\")\n",
" \n",
" # 计算指标\n",
" for mode in [\"text_pred\",\"json_pred\",\"tool_pred\"]:\n",
" y_true = df[\"true_label\"]\n",
" y_pred = df[mode]\n",
" accuracy = accuracy_score(y_true, y_pred)\n",
" print(f\"{mode.split('_')[0]}模式准确率:{accuracy:.2%}\")\n",
"if __name__ == \"__main__\":\n",
" main()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f6048b3a",
"metadata": {},
"outputs": [],
"source": [
"# 新单元格1:定义独立函数\n",
"from tqdm.notebook import tqdm\n",
"from openai import OpenAI\n",
"import pandas as pd\n",
"\n",
"def compare_models():\n",
" \"\"\"独立模型比较函数\"\"\"\n",
" # 初始化独立客户端\n",
" local_client = OpenAI(\n",
" base_url=os.getenv(\"BAILIAN_API_BASE_URL\"),\n",
" api_key=os.getenv(\"BAILIAN_API_KEY\")\n",
" )\n",
" \n",
" # 独立模型列表\n",
" MODEL_COMPARE = [\n",
" \"qwen3-32b\",\n",
" \"qwen3-30b-a3b\",\n",
" \"qwen3-0.6b\"\n",
" ]\n",
" \n",
" def local_predict(text, model_name, max_retries=3):\n",
" \"\"\"独立预测函数\"\"\"\n",
" prompt = f\"判断情感倾向(仅输出positive/negative):\\n{text}\"\n",
" for _ in range(max_retries):\n",
" try:\n",
" response = local_client.chat.completions.create(\n",
" model=model_name,\n",
" messages=[{\"role\": \"user\", \"content\": prompt}],\n",
" temperature=0,\n",
" extra_body={\"enable_thinking\": False}\n",
" )\n",
" return response.choices[0].message.content.strip().lower()\n",
" except Exception:\n",
" time.sleep(1)\n",
" return \"error\"\n",
" \n",
" # 独立数据加载\n",
" local_samples = load_dataset() # 复用已有加载函数\n",
" \n",
" # 执行比较\n",
" report = []\n",
" for model in MODEL_COMPARE:\n",
" correct = 0\n",
" for sample in tqdm(local_samples, desc=f\"Testing {model}\"):\n",
" pred = local_predict(sample[\"text\"], model)\n",
" if pred == sample[\"true_label\"]:\n",
" correct += 1\n",
" report.append({\n",
" \"Model\": model,\n",
" \"Accuracy\": f\"{correct/len(local_samples):.2%}\",\n",
" \"Params\": model.split(\"-\")[1].upper()\n",
" })\n",
" \n",
" # 显示结果\n",
" return pd.DataFrame(report)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "6e69bbb2",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8f226078caad47a89f695eb7180f2e27",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Testing qwen3-32b: 0%| | 0/21 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "729126a4fed14b558c36947f65d8d6c0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Testing qwen3-30b-a3b: 0%| | 0/21 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b263d76ce04749b19e8d918b528a5fdb",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Testing qwen3-0.6b: 0%| | 0/21 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" Model | \n",
" Accuracy | \n",
" Params | \n",
"
\n",
" \n",
" \n",
" \n",
" qwen3-32b | \n",
" 90.48% | \n",
" 32B | \n",
"
\n",
" \n",
" qwen3-30b-a3b | \n",
" 85.71% | \n",
" 30B | \n",
"
\n",
" \n",
" qwen3-0.6b | \n",
" 47.62% | \n",
" 0.6B | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result_df = compare_models()\n",
"result_df.style.hide(axis=\"index\").format(precision=2)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "e772335c",
"metadata": {},
"outputs": [],
"source": [
"def analyze_with_reason():\n",
" \"\"\"使用qwen3-0.6b进行带原因的情感分析\"\"\"\n",
" client = OpenAI(\n",
" base_url=os.getenv(\"BAILIAN_API_BASE_URL\"),\n",
" api_key=os.getenv(\"BAILIAN_API_KEY\")\n",
" )\n",
" \n",
" # 自定义工具定义\n",
" tools = [{\n",
" \"type\": \"function\",\n",
" \"function\": {\n",
" \"name\": \"sentiment_analysis\",\n",
" \"description\": \"情感分析结果及原因\",\n",
" \"parameters\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"sentiment\": {\n",
" \"type\": \"string\", \n",
" \"enum\": [\"positive\", \"negative\"],\n",
" \"description\": \"情感倾向\"\n",
" },\n",
" \"reason\": {\n",
" \"type\": \"string\",\n",
" \"description\": \"分析原因\"\n",
" }\n",
" },\n",
" \"required\": [\"sentiment\", \"reason\"]\n",
" }\n",
" }\n",
" }]\n",
" \n",
" # 加载数据\n",
" samples = load_dataset()\n",
" error_cases = []\n",
" \n",
" for sample in tqdm(samples, desc=\"分析进度\"):\n",
" try:\n",
" # 工具调用预测\n",
" response = client.chat.completions.create(\n",
" model=\"qwen3-0.6b\",\n",
" messages=[{\n",
" \"role\": \"user\", \n",
" \"content\": f\"请分析以下影评情感并说明原因:\\n{sample['text']}\"\n",
" }],\n",
" tools=tools,\n",
" tool_choice={\"type\": \"function\", \"function\": {\"name\": \"sentiment_analysis\"}},\n",
" temperature=0,\n",
" extra_body={\"enable_thinking\": False}\n",
" )\n",
" \n",
" # 解析结果\n",
" args = json.loads(response.choices[0].message.tool_calls[0].function.arguments)\n",
" pred = args[\"sentiment\"]\n",
" reason = args[\"reason\"]\n",
" \n",
" # 记录错误案例\n",
" if pred != sample[\"true_label\"]:\n",
" error_cases.append({\n",
" \"text\": sample[\"text\"],\n",
" \"true_label\": sample[\"true_label\"],\n",
" \"prediction\": pred,\n",
" \"reason\": reason\n",
" })\n",
" \n",
" except Exception as e:\n",
" print(f\"分析失败:{str(e)}\")\n",
" continue\n",
" \n",
" # 保存错误案例\n",
" error_df = pd.DataFrame(error_cases)\n",
" error_df.to_csv(\"./error_analysis.csv\", index=False)\n",
" return error_df\n"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "e4fd9997",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "51954bc8a0624b2c8e0e0100edae6fc3",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"分析进度: 0%| | 0/21 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"分析失败:'sentiment'\n",
"发现 4 个错误案例\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" text | \n",
" true_label | \n",
" prediction | \n",
" reason | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" How many movies are there that you can think o... | \n",
" positive | \n",
" negative | \n",
" The movie borrows so much from other movies, l... | \n",
"
\n",
" \n",
" 1 | \n",
" I only went to see this movie because I have a... | \n",
" positive | \n",
" negative | \n",
" The movie missed some acting moments, scenes w... | \n",
"
\n",
" \n",
" 2 | \n",
" I'm a Petty Officer 1st Class (E-6) and have b... | \n",
" positive | \n",
" negative | \n",
" The movie portrayed the Coast Guard in a way t... | \n",
"
\n",
" \n",
" 3 | \n",
" Wow, another Kevin Costner hero movie. Postman... | \n",
" negative | \n",
" positive | \n",
" The review highlights Kevin Costner's consiste... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" text true_label prediction \\\n",
"0 How many movies are there that you can think o... positive negative \n",
"1 I only went to see this movie because I have a... positive negative \n",
"2 I'm a Petty Officer 1st Class (E-6) and have b... positive negative \n",
"3 Wow, another Kevin Costner hero movie. Postman... negative positive \n",
"\n",
" reason \n",
"0 The movie borrows so much from other movies, l... \n",
"1 The movie missed some acting moments, scenes w... \n",
"2 The movie portrayed the Coast Guard in a way t... \n",
"3 The review highlights Kevin Costner's consiste... "
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"error_results = analyze_with_reason()\n",
"print(f\"发现 {len(error_results)} 个错误案例\")\n",
"error_results.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cbb2aba7",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "eec3adfdc2424234b2df5e2e2d78a7b2",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"分析进度: 0%| | 0/21 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"发现 5 个错误案例\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" text | \n",
" true_label | \n",
" prediction | \n",
" reason | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" How many movies are there that you can think o... | \n",
" positive | \n",
" negative | \n",
" Borrowing so much from multiple movies creates... | \n",
"
\n",
" \n",
" 1 | \n",
" I only went to see this movie because I have a... | \n",
" positive | \n",
" negative | \n",
" Although some aspects of the movie have positi... | \n",
"
\n",
" \n",
" 2 | \n",
" I'm a Petty Officer 1st Class (E-6) and have b... | \n",
" positive | \n",
" negative | \n",
" The sentiment is negative. | \n",
"
\n",
" \n",
" 3 | \n",
" I've seen this story before but my kids haven'... | \n",
" positive | \n",
" mixed | \n",
" the review contains both positive and negative... | \n",
"
\n",
" \n",
" 4 | \n",
" Wow, another Kevin Costner hero movie. Postman... | \n",
" negative | \n",
" positive | \n",
" the movie is highlighted by its relatability, ... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" text true_label prediction \\\n",
"0 How many movies are there that you can think o... positive negative \n",
"1 I only went to see this movie because I have a... positive negative \n",
"2 I'm a Petty Officer 1st Class (E-6) and have b... positive negative \n",
"3 I've seen this story before but my kids haven'... positive mixed \n",
"4 Wow, another Kevin Costner hero movie. Postman... negative positive \n",
"\n",
" reason \n",
"0 Borrowing so much from multiple movies creates... \n",
"1 Although some aspects of the movie have positi... \n",
"2 The sentiment is negative. \n",
"3 the review contains both positive and negative... \n",
"4 the movie is highlighted by its relatability, ... "
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#修改temperature查看结果,从0修改至1.9\n",
"error_results = analyze_with_reason()\n",
"print(f\"发现 {len(error_results)} 个错误案例\")\n",
"error_results.head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}