hjh преди 1 месец
родител
ревизия
57fc80f09d
променени са 2 файла, в които са добавени 301 реда и са изтрити 42 реда
  1. 200 1
      黄靖淏/t1/code/function_calling.ipynb
  2. 101 41
      黄靖淏/t1/code/structured_output_test.ipynb

+ 200 - 1
黄靖淏/t1/code/function_calling.ipynb

@@ -1,5 +1,204 @@
 {
- "cells": [],
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4d0220be",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from openai import OpenAI\n",
+    "from dotenv import load_dotenv \n",
+    "import json\n",
+    "from pydantic import BaseModel\n",
+    "from openai import BadRequestError\n",
+    "\n",
+    "client = OpenAI(base_url=\"https://dashscope.aliyuncs.com/compatible-mode/v1\",\n",
+    "       aapi_key=os.getenv(\"BAILIAN_API_KEY\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3ab0c10b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "positive_comment_list = []\n",
+    "negative_comment_list = []\n",
+    "\n",
+    "class Emotions(BaseModel):\n",
+    "    emotion : str\n",
+    "    reason : str\n",
+    "\n",
+    "def positive_func(emotion : str, reason : str):\n",
+    "    positive_emotion =  Emotions(emotion = emotion, reason = reason)\n",
+    "    positive_comment_list.append(positive_emotion)\n",
+    "    return positive_emotion\n",
+    "\n",
+    "def negative_func(emotion : str, reason : str):\n",
+    "    negative_emotion =  Emotions(emotion = emotion, reason = reason)\n",
+    "    negative_comment_list.append(negative_emotion)\n",
+    "    return negative_emotion\n",
+    "\n",
+    "\n",
+    "tools = [\n",
+    "    {\n",
+    "        \"type\": \"function\",\n",
+    "        \"function\": {\n",
+    "            \"name\": \"positive_func\",\n",
+    "            \"description\": \"当你判断用户输入的评论情感倾向为“Positive”时调用此函数\",\n",
+    "            \"parameters\":{\n",
+    "                \"emotion\":{\n",
+    "                    \"type\":\"string\",\n",
+    "                    \"description\":\"此参数为用户输入的评论情感倾向\"\n",
+    "                    },\n",
+    "                \"reason\":{\n",
+    "                    \"type\":\"string\",\n",
+    "                    \"description\":\"此参数为判断为此情感倾向的原因\"\n",
+    "                    },\n",
+    "            }\n",
+    "        }\n",
+    "    },\n",
+    "        {\n",
+    "        \"type\": \"function\",\n",
+    "        \"function\": {\n",
+    "            \"name\": \"negative_func\",\n",
+    "            \"description\": \"当你判断用户输入的评论情感倾向为“Negative”时调用此函数\",\n",
+    "            \"parameters\":{\n",
+    "                \"emotion\":{\n",
+    "                    \"type\":\"string\",\n",
+    "                    \"description\":\"此参数为用户输入的评论情感倾向\"\n",
+    "                    },\n",
+    "                \"reason\":{\n",
+    "                    \"type\":\"string\",\n",
+    "                    \"description\":\"此参数为判断为此情感倾向的原因\"\n",
+    "                    },\n",
+    "            }\n",
+    "        }\n",
+    "    },\n",
+    "]\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "65268481",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def chat_with_functions(content: str) -> Emotions:\n",
+    "    try:\n",
+    "        completion = client.chat.completions.create(\n",
+    "            model=\"qwen3-4b\",\n",
+    "            messages=[\n",
+    "                {\"role\": \"system\", \"content\": \"You are a helpful assistant. Determine whether the sentiment entered by the user is positive or negative.\"},\n",
+    "                {\"role\": \"user\", \"content\": content},\n",
+    "            ],\n",
+    "            tools=tools,\n",
+    "            tool_choice=\"auto\",\n",
+    "            extra_body={\"enable_thinking\": False},\n",
+    "        )\n",
+    "        response_json = json.loads(completion.model_dump_json())\n",
+    "        \n",
+    "        if response_json['choices'] and 'tool_calls' in response_json['choices'][0][\"finish_reason\"]:\n",
+    "            params = json.loads(response_json['choices'][0][\"message\"][\"tool_calls\"][0][\"function\"][\"arguments\"])\n",
+    "            return Emotions(emotion=params[\"emotion\"], reason=params[\"reason\"])\n",
+    "        else: \n",
+    "            print(\"没用工具\")\n",
+    "            return Emotions(emotion=\"Unknown\", reason=\"No reason provided.\")\n",
+    "    except BadRequestError:\n",
+    "        print(\"文本内容不当:::::>\" + content)\n",
+    "        return Emotions(emotion=\"Error\", reason=\"文本内容不当.\")\n",
+    "    except Exception as e:\n",
+    "        print(f\"Unexpected error: {e}\")\n",
+    "        return Emotions(emotion=\"Error\", reason=\"未知错误\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6e04870e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import glob\n",
+    "import os\n",
+    "def process_files(file_list, expected_label, res_list, fail_txt_list):\n",
+    "    correct_count = 0\n",
+    "    for file_path in file_list:\n",
+    "        print(f\"找到文件: {file_path}\")\n",
+    "        with open(file_path, 'r', encoding='utf-8') as f:\n",
+    "            content = f.read()\n",
+    "            res = chat_with_functions(content)\n",
+    "            res_list.append(res)\n",
+    "            print(res.emotion)\n",
+    "            if expected_label.lower() in res.emotion.lower():\n",
+    "                correct_count += 1\n",
+    "            else:\n",
+    "                fail_txt_list.append(f\"预测: {res.emotion}\\n原文:\\n{content}\\n原因:\\n{res.reason}\\n\")\n",
+    "    return correct_count\n",
+    "\n",
+    "\n",
+    "base_dir = 'C:\\\\Users\\\\28191\\\\Desktop\\\\xuexi_py\\\\xuexi_git\\\\ai_learning\\\\data\\\\acllmdb_sentiment_small'\n",
+    "positive_txt_files = glob.glob(os.path.join(base_dir, 'positive', '*.txt'), recursive=True)\n",
+    "negative_txt_files = glob.glob(os.path.join(base_dir, 'negative', '*.txt'), recursive=True)\n",
+    "\n",
+    "res_list = []\n",
+    "fail_txt_list = []\n",
+    "total_count = len(positive_txt_files) + len(negative_txt_files)\n",
+    "\n",
+    "correct_positive = process_files(positive_txt_files, 'positive', res_list, fail_txt_list)\n",
+    "correct_negative = process_files(negative_txt_files, 'negative', res_list, fail_txt_list)\n",
+    "correct_total = correct_positive + correct_negative\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6cdf6ddd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "accuracy =correct_total/total_count\n",
+    "print(f\"count::> {total_count}   ac:::> {correct_total}   accuracy:::> {accuracy}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0537fa27",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "fail_data = []\n",
+    "for item in fail_txt_list:\n",
+    "    try:\n",
+    "        parts = item.split('\\n')\n",
+    "        predicted = parts[0].replace(\"预测: \", \"\").strip()\n",
+    "        reason_index = parts.index('原因:') \n",
+    "        original_text = '\\n'.join(parts[2:reason_index])  \n",
+    "        reason = '\\n'.join(parts[reason_index+1:])  \n",
+    "        fail_data.append({\n",
+    "            'predicted_emotion': predicted,\n",
+    "            'original_text': original_text,\n",
+    "            'reason': reason\n",
+    "        })\n",
+    "    except Exception as e:\n",
+    "        print(f\"解析失败: {e}\")\n",
+    "        continue\n",
+    "\n",
+    "df = pd.DataFrame(fail_data)\n",
+    "\n",
+    "df.to_csv('failed_predictions.csv', index=False, encoding='utf-8-sig')"
+   ]
+  }
+ ],
  "metadata": {
   "kernelspec": {
    "display_name": "qwen-env",

+ 101 - 41
黄靖淏/t1/code/structured_output_test.ipynb

@@ -13,21 +13,71 @@
     "import glob\n",
     "from pydantic import BaseModel\n",
     "from openai import BadRequestError\n",
-    "import os\n",
+    "\n",
     "client = OpenAI(base_url=\"https://dashscope.aliyuncs.com/compatible-mode/v1\",\n",
-    "       api_key=os.getenv(\"BAILIAN_API_KEY\"))\n"
+    "       aapi_key=os.getenv(\"BAILIAN_API_KEY\"))\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "20083209",
    "metadata": {},
    "outputs": [],
    "source": [
-    "class CalendarEvent(BaseModel):\n",
-    "    isNegative: bool\n",
-    "    reason: str"
+    "\n",
+    "response_json = json.dumps(\n",
+    "    {\n",
+    "        \"emotion\": \"Positive/Negative\",\n",
+    "        \"reason\": \"Singing\"\n",
+    "    },\n",
+    "    ensure_ascii=False\n",
+    ")\n",
+    "\n",
+    "def chatFunc(contant: str):\n",
+    "    completion = client.chat.completions.create(\n",
+    "    # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models\n",
+    "    model=\"qwen3-4b\",\n",
+    "    messages=[\n",
+    "        # {\"role\": \"system\", \"content\": \"You are a helpful assistant.Determine whether the sentiment of the user's input is positive or negative.Note that only positive or negative values are output.{response_json}\"},\n",
+    "        {\"role\": \"system\", \"content\": f\"You are a helpful assistant. Determine whether the sentiment entered by the user is positive or negative. Note that only positive or negative cases are output.Avoid being ambiguous pleases.Respond in JSON format below:{response_json}\"},\n",
+    "        {\"role\": \"user\", \"content\": contant},\n",
+    "    ],\n",
+    "    response_format ={\"type\": \"json_object\"},\n",
+    "    extra_body={\"enable_thinking\": False},\n",
+    "    )\n",
+    "    \n",
+    "    json_str = completion.choices[0].message.content\n",
+    "    print(json_str)\n",
+    "    data = json.loads(json_str)\n",
+    "    real_content = data['emotion']\n",
+    "    print(real_content)\n",
+    "    return real_content\n",
+    "\n",
+    "def chatFunc(contant: str):\n",
+    "    try:\n",
+    "        completion = client.chat.completions.create(\n",
+    "        # 模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models\n",
+    "        model=\"qwen3-4b\",\n",
+    "        messages=[\n",
+    "            # {\"role\": \"system\", \"content\": \"You are a helpful assistant.Determine whether the sentiment of the user's input is positive or negative.Note that only positive or negative values are output.{response_json}\"},\n",
+    "            {\"role\": \"system\", \"content\": f\"You are a helpful assistant. Determine whether the sentiment entered by the user is positive or negative. Note that only positive or negative cases are output.Avoid being ambiguous pleases.Respond in JSON format below:{response_json}\"},\n",
+    "            {\"role\": \"user\", \"content\": contant},\n",
+    "        ],\n",
+    "        response_format ={\"type\": \"json_object\"},\n",
+    "        extra_body={\"enable_thinking\": False},\n",
+    "        )\n",
+    "        \n",
+    "        json_str = completion.choices[0].message.content\n",
+    "        print(json_str)\n",
+    "        data = json.loads(json_str)\n",
+    "        real_content = data['emotion']\n",
+    "        print(real_content)\n",
+    "        return real_content\n",
+    "    except BadRequestError:\n",
+    "        print(\"文本内容不当:::::>\"+contant)\n",
+    "        return \"Contant Error\"\n",
+    "    "
    ]
   },
   {
@@ -37,30 +87,43 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def chatFunc(contant: str):\n",
-    "    try:\n",
-    "        response = client.responses.parse(\n",
-    "        model=\"qwen-max\",\n",
-    "        input=[\n",
-    "            # {\"role\": \"system\", \"content\": \"You are a helpful assistant.Determine whether the sentiment of the user's input is positive or negative.Note that only positive or negative values are output\"},\n",
-    "            {\"role\": \"system\", \"content\": \"You are a helpful assistant. Determine whether the sentiment entered by the user is positive or negative. Note that only positive or negative cases are output.Avoid being ambiguous pleases.\"},\n",
-    "            {\"role\": \"user\", \"content\": contant},\n",
-    "        ],\n",
-    "        # extra_body={\"enable_thinking\": False},\n",
-    "        text_format=CalendarEvent,\n",
-    "        )\n",
-    "        struct_str = response.output_parsed\n",
-    "        print(struct_str)\n",
-    "        return struct_str\n",
+    "negative_txt_files = glob.glob('../../data/acllmdb_sentiment_small/negative/*.txt', recursive=True)\n",
+    "positive_txt_files = glob.glob('C:\\\\Users\\\\28191\\\\Desktop\\\\xuexi_py\\\\xuexi_git\\\\ai_learning\\\\data\\\\acllmdb_sentiment_small\\\\positive\\\\*.txt', recursive=True)\n",
     "\n",
-    "    except BadRequestError:\n",
-    "        print(\"文本内容不当:::::>\"+contant)\n",
-    "        return None"
+    "res_list = []\n",
+    "fail_txt_list = []\n",
+    "count = 0.0\n",
+    "ca = 0.0\n",
+    "\n",
+    "for index, file_path in enumerate(negative_txt_files, start=0):\n",
+    "    print(f\"找到文件: {file_path}\")\n",
+    "    with open(file_path, 'r', encoding='utf-8') as f:\n",
+    "        count+=1\n",
+    "        content = f.read()  # 读取所有内容\n",
+    "        res = chatFunc(content)a\n",
+    "        if 'negative' in res or 'Negative' in res:  \n",
+    "            ca+=1\n",
+    "        else:\n",
+    "            fail_txt_list.append(res+content)\n",
+    "        res_list.append(res)\n",
+    "\n",
+    "for index, file_path in enumerate(positive_txt_files, start=0):\n",
+    "    print(f\"找到文件: {file_path}\")\n",
+    "    with open(file_path, 'r', encoding='utf-8') as f:\n",
+    "        count+=1\n",
+    "        content = f.read()  # 读取所有内容\n",
+    "        res = chatFunc(content)\n",
+    "        if 'positive' in res or 'positive' in res:  \n",
+    "            ca+=1\n",
+    "        else:\n",
+    "            fail_txt_list.append(res+content)\n",
+    "        res_list.append(res)\n",
+    "\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "id": "bf32f988",
    "metadata": {},
    "outputs": [
@@ -88,22 +151,19 @@
     }
    ],
    "source": [
-    "negative_txt_files = glob.glob('../../data/acllmdb_sentiment_small/negative/*.txt', recursive=True)\n",
-    "res_list = []\n",
-    "fail_txt_list = []\n",
-    "count = 0.0\n",
-    "ca = 0.0\n",
-    "for index, file_path in enumerate(negative_txt_files, start=0):\n",
-    "    print(f\"找到文件: {file_path}\")\n",
-    "    count+=1\n",
-    "    with open(file_path, 'r', encoding='utf-8') as f:\n",
-    "        content = f.read()  # 读取所有内容\n",
-    "        res = chatFunc(content)\n",
-    "        # if 'negative' in res or 'Negative' in res:  \n",
-    "        #     ca+=1\n",
-    "        # else:\n",
-    "        #     fail_txt_list.append(res+content)\n",
-    "        res_list.append(res)"
+    "    \n",
+    "for fail_txt in fail_txt_list:\n",
+    "    print(fail_txt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "efcd8a81",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"count::>\" + str(count) + \"  ac:::>\" + str(ca) + \"   accuracy:::>\" + str(ca/count))\n"
    ]
   }
  ],