|
@@ -1,5 +1,204 @@
|
|
|
{
|
|
|
- "cells": [],
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "4d0220be",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "from openai import OpenAI\n",
|
|
|
+ "from dotenv import load_dotenv \n",
|
|
|
+ "import json\n",
|
|
|
+ "from pydantic import BaseModel\n",
|
|
|
+ "from openai import BadRequestError\n",
|
|
|
+ "\n",
|
|
|
+ "client = OpenAI(base_url=\"https://dashscope.aliyuncs.com/compatible-mode/v1\",\n",
|
|
|
+ " aapi_key=os.getenv(\"BAILIAN_API_KEY\"))"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "3ab0c10b",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "positive_comment_list = []\n",
|
|
|
+ "negative_comment_list = []\n",
|
|
|
+ "\n",
|
|
|
+ "class Emotions(BaseModel):\n",
|
|
|
+ " emotion : str\n",
|
|
|
+ " reason : str\n",
|
|
|
+ "\n",
|
|
|
+ "def positive_func(emotion : str, reason : str):\n",
|
|
|
+ " positive_emotion = Emotions(emotion = emotion, reason = reason)\n",
|
|
|
+ " positive_comment_list.append(positive_emotion)\n",
|
|
|
+ " return positive_emotion\n",
|
|
|
+ "\n",
|
|
|
+ "def negative_func(emotion : str, reason : str):\n",
|
|
|
+ " negative_emotion = Emotions(emotion = emotion, reason = reason)\n",
|
|
|
+ " negative_comment_list.append(negative_emotion)\n",
|
|
|
+ " return negative_emotion\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "tools = [\n",
|
|
|
+ " {\n",
|
|
|
+ " \"type\": \"function\",\n",
|
|
|
+ " \"function\": {\n",
|
|
|
+ " \"name\": \"positive_func\",\n",
|
|
|
+ " \"description\": \"当你判断用户输入的评论情感倾向为“Positive”时调用此函数\",\n",
|
|
|
+ " \"parameters\":{\n",
|
|
|
+ " \"emotion\":{\n",
|
|
|
+ " \"type\":\"string\",\n",
|
|
|
+ " \"description\":\"此参数为用户输入的评论情感倾向\"\n",
|
|
|
+ " },\n",
|
|
|
+ " \"reason\":{\n",
|
|
|
+ " \"type\":\"string\",\n",
|
|
|
+ " \"description\":\"此参数为判断为此情感倾向的原因\"\n",
|
|
|
+ " },\n",
|
|
|
+ " }\n",
|
|
|
+ " }\n",
|
|
|
+ " },\n",
|
|
|
+ " {\n",
|
|
|
+ " \"type\": \"function\",\n",
|
|
|
+ " \"function\": {\n",
|
|
|
+ " \"name\": \"negative_func\",\n",
|
|
|
+ " \"description\": \"当你判断用户输入的评论情感倾向为“Negative”时调用此函数\",\n",
|
|
|
+ " \"parameters\":{\n",
|
|
|
+ " \"emotion\":{\n",
|
|
|
+ " \"type\":\"string\",\n",
|
|
|
+ " \"description\":\"此参数为用户输入的评论情感倾向\"\n",
|
|
|
+ " },\n",
|
|
|
+ " \"reason\":{\n",
|
|
|
+ " \"type\":\"string\",\n",
|
|
|
+ " \"description\":\"此参数为判断为此情感倾向的原因\"\n",
|
|
|
+ " },\n",
|
|
|
+ " }\n",
|
|
|
+ " }\n",
|
|
|
+ " },\n",
|
|
|
+ "]\n",
|
|
|
+ "\n",
|
|
|
+ "\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "65268481",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "def chat_with_functions(content: str) -> Emotions:\n",
|
|
|
+ " try:\n",
|
|
|
+ " completion = client.chat.completions.create(\n",
|
|
|
+ " model=\"qwen3-4b\",\n",
|
|
|
+ " messages=[\n",
|
|
|
+ " {\"role\": \"system\", \"content\": \"You are a helpful assistant. Determine whether the sentiment entered by the user is positive or negative.\"},\n",
|
|
|
+ " {\"role\": \"user\", \"content\": content},\n",
|
|
|
+ " ],\n",
|
|
|
+ " tools=tools,\n",
|
|
|
+ " tool_choice=\"auto\",\n",
|
|
|
+ " extra_body={\"enable_thinking\": False},\n",
|
|
|
+ " )\n",
|
|
|
+ " response_json = json.loads(completion.model_dump_json())\n",
|
|
|
+ " \n",
|
|
|
+ " if response_json['choices'] and 'tool_calls' in response_json['choices'][0][\"finish_reason\"]:\n",
|
|
|
+ " params = json.loads(response_json['choices'][0][\"message\"][\"tool_calls\"][0][\"function\"][\"arguments\"])\n",
|
|
|
+ " return Emotions(emotion=params[\"emotion\"], reason=params[\"reason\"])\n",
|
|
|
+ " else: \n",
|
|
|
+ " print(\"没用工具\")\n",
|
|
|
+ " return Emotions(emotion=\"Unknown\", reason=\"No reason provided.\")\n",
|
|
|
+ " except BadRequestError:\n",
|
|
|
+ " print(\"文本内容不当:::::>\" + content)\n",
|
|
|
+ " return Emotions(emotion=\"Error\", reason=\"文本内容不当.\")\n",
|
|
|
+ " except Exception as e:\n",
|
|
|
+ " print(f\"Unexpected error: {e}\")\n",
|
|
|
+ " return Emotions(emotion=\"Error\", reason=\"未知错误\")\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "6e04870e",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "import glob\n",
|
|
|
+ "import os\n",
|
|
|
+ "def process_files(file_list, expected_label, res_list, fail_txt_list):\n",
|
|
|
+ " correct_count = 0\n",
|
|
|
+ " for file_path in file_list:\n",
|
|
|
+ " print(f\"找到文件: {file_path}\")\n",
|
|
|
+ " with open(file_path, 'r', encoding='utf-8') as f:\n",
|
|
|
+ " content = f.read()\n",
|
|
|
+ " res = chat_with_functions(content)\n",
|
|
|
+ " res_list.append(res)\n",
|
|
|
+ " print(res.emotion)\n",
|
|
|
+ " if expected_label.lower() in res.emotion.lower():\n",
|
|
|
+ " correct_count += 1\n",
|
|
|
+ " else:\n",
|
|
|
+ " fail_txt_list.append(f\"预测: {res.emotion}\\n原文:\\n{content}\\n原因:\\n{res.reason}\\n\")\n",
|
|
|
+ " return correct_count\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "base_dir = 'C:\\\\Users\\\\28191\\\\Desktop\\\\xuexi_py\\\\xuexi_git\\\\ai_learning\\\\data\\\\acllmdb_sentiment_small'\n",
|
|
|
+ "positive_txt_files = glob.glob(os.path.join(base_dir, 'positive', '*.txt'), recursive=True)\n",
|
|
|
+ "negative_txt_files = glob.glob(os.path.join(base_dir, 'negative', '*.txt'), recursive=True)\n",
|
|
|
+ "\n",
|
|
|
+ "res_list = []\n",
|
|
|
+ "fail_txt_list = []\n",
|
|
|
+ "total_count = len(positive_txt_files) + len(negative_txt_files)\n",
|
|
|
+ "\n",
|
|
|
+ "correct_positive = process_files(positive_txt_files, 'positive', res_list, fail_txt_list)\n",
|
|
|
+ "correct_negative = process_files(negative_txt_files, 'negative', res_list, fail_txt_list)\n",
|
|
|
+ "correct_total = correct_positive + correct_negative\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "6cdf6ddd",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "\n",
|
|
|
+ "accuracy =correct_total/total_count\n",
|
|
|
+ "print(f\"count::> {total_count} ac:::> {correct_total} accuracy:::> {accuracy}\")\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "0537fa27",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "import pandas as pd\n",
|
|
|
+ "\n",
|
|
|
+ "fail_data = []\n",
|
|
|
+ "for item in fail_txt_list:\n",
|
|
|
+ " try:\n",
|
|
|
+ " parts = item.split('\\n')\n",
|
|
|
+ " predicted = parts[0].replace(\"预测: \", \"\").strip()\n",
|
|
|
+ " reason_index = parts.index('原因:') \n",
|
|
|
+ " original_text = '\\n'.join(parts[2:reason_index]) \n",
|
|
|
+ " reason = '\\n'.join(parts[reason_index+1:]) \n",
|
|
|
+ " fail_data.append({\n",
|
|
|
+ " 'predicted_emotion': predicted,\n",
|
|
|
+ " 'original_text': original_text,\n",
|
|
|
+ " 'reason': reason\n",
|
|
|
+ " })\n",
|
|
|
+ " except Exception as e:\n",
|
|
|
+ " print(f\"解析失败: {e}\")\n",
|
|
|
+ " continue\n",
|
|
|
+ "\n",
|
|
|
+ "df = pd.DataFrame(fail_data)\n",
|
|
|
+ "\n",
|
|
|
+ "df.to_csv('failed_predictions.csv', index=False, encoding='utf-8-sig')"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
"metadata": {
|
|
|
"kernelspec": {
|
|
|
"display_name": "qwen-env",
|