Ver Fonte

init commit

JiaQiang há 2 semanas atrás
commit
3f7ab28a3b
100 ficheiros alterados com 3761 adições e 0 exclusões
  1. 3 0
      .idea/.gitignore
  2. 6 0
      .idea/inspectionProfiles/Project_Default.xml
  3. 6 0
      .idea/inspectionProfiles/profiles_settings.xml
  4. 4 0
      .idea/misc.xml
  5. 9 0
      .idea/modules.xml
  6. 9 0
      .idea/tx_flow_analysis.iml
  7. BIN
      __pycache__/config.cpython-310.pyc
  8. 141 0
      config.py
  9. 0 0
      llmops/__init__.py
  10. 0 0
      llmops/agents/__init__.py
  11. 0 0
      llmops/agents/datadev/__init__.py
  12. 0 0
      llmops/agents/datadev/lineage/__init__.py
  13. 1298 0
      llmops/agents/datadev/lineage/sql_lineage_agent_xmgj.py
  14. 56 0
      llmops/agents/datadev/llm.py
  15. 23 0
      llmops/agents/datadev/memory/LimitedConversationBufferMemory.py
  16. 0 0
      llmops/agents/datadev/memory/__init__.py
  17. 139 0
      llmops/agents/datadev/memory/memory_saver_with_expiry.py
  18. 137 0
      llmops/agents/datadev/memory/memory_saver_with_expiry2.py
  19. 0 0
      llmops/agents/datadev/tools/__init__.py
  20. 22 0
      llmops/agents/datadev/tools/del_substr_tool.py
  21. 21 0
      llmops/agents/datadev/tools/gen_query_sql_tool.py
  22. 56 0
      llmops/agents/datadev/tools/get_database_schema_tool.py
  23. 163 0
      llmops/agents/datadev/tools/logger_decorator.py
  24. 14 0
      llmops/agents/datadev/tools/timeit.py
  25. 1 0
      venv/.Python
  26. 247 0
      venv/bin/Activate.ps1
  27. 69 0
      venv/bin/activate
  28. 26 0
      venv/bin/activate.csh
  29. 66 0
      venv/bin/activate.fish
  30. 8 0
      venv/bin/distro
  31. 8 0
      venv/bin/httpx
  32. 41 0
      venv/bin/jsondiff
  33. 107 0
      venv/bin/jsonpatch
  34. 67 0
      venv/bin/jsonpointer
  35. 8 0
      venv/bin/normalizer
  36. 8 0
      venv/bin/openai
  37. 8 0
      venv/bin/pip
  38. 8 0
      venv/bin/pip3
  39. 8 0
      venv/bin/pip3.10
  40. 1 0
      venv/bin/python
  41. 1 0
      venv/bin/python3
  42. BIN
      venv/bin/python3.10
  43. 8 0
      venv/bin/tqdm
  44. 1 0
      venv/include/python3.10
  45. 1 0
      venv/lib/python3.10/LICENSE.txt
  46. 1 0
      venv/lib/python3.10/__future__.py
  47. 1 0
      venv/lib/python3.10/_collections_abc.py
  48. 1 0
      venv/lib/python3.10/_weakrefset.py
  49. 1 0
      venv/lib/python3.10/abc.py
  50. 1 0
      venv/lib/python3.10/base64.py
  51. 1 0
      venv/lib/python3.10/bisect.py
  52. 1 0
      venv/lib/python3.10/codecs.py
  53. 1 0
      venv/lib/python3.10/collections
  54. 1 0
      venv/lib/python3.10/config-3.10-darwin
  55. 1 0
      venv/lib/python3.10/copy.py
  56. 1 0
      venv/lib/python3.10/copyreg.py
  57. 1 0
      venv/lib/python3.10/encodings
  58. 1 0
      venv/lib/python3.10/enum.py
  59. 1 0
      venv/lib/python3.10/fnmatch.py
  60. 1 0
      venv/lib/python3.10/functools.py
  61. 1 0
      venv/lib/python3.10/genericpath.py
  62. 1 0
      venv/lib/python3.10/hashlib.py
  63. 1 0
      venv/lib/python3.10/heapq.py
  64. 1 0
      venv/lib/python3.10/hmac.py
  65. 1 0
      venv/lib/python3.10/imp.py
  66. 1 0
      venv/lib/python3.10/importlib
  67. 1 0
      venv/lib/python3.10/io.py
  68. 1 0
      venv/lib/python3.10/keyword.py
  69. 1 0
      venv/lib/python3.10/lib-dynload
  70. 1 0
      venv/lib/python3.10/linecache.py
  71. 1 0
      venv/lib/python3.10/locale.py
  72. 0 0
      venv/lib/python3.10/no-global-site-packages.txt
  73. 1 0
      venv/lib/python3.10/ntpath.py
  74. 1 0
      venv/lib/python3.10/operator.py
  75. 1 0
      venv/lib/python3.10/orig-prefix.txt
  76. 1 0
      venv/lib/python3.10/os.py
  77. 1 0
      venv/lib/python3.10/posixpath.py
  78. 1 0
      venv/lib/python3.10/random.py
  79. 1 0
      venv/lib/python3.10/re.py
  80. 1 0
      venv/lib/python3.10/readline.so
  81. 1 0
      venv/lib/python3.10/reprlib.py
  82. 1 0
      venv/lib/python3.10/rlcompleter.py
  83. 1 0
      venv/lib/python3.10/shutil.py
  84. BIN
      venv/lib/python3.10/site-packages/__pycache__/jsonpatch.cpython-310.pyc
  85. BIN
      venv/lib/python3.10/site-packages/__pycache__/jsonpointer.cpython-310.pyc
  86. BIN
      venv/lib/python3.10/site-packages/__pycache__/typing_extensions.cpython-310.pyc
  87. 128 0
      venv/lib/python3.10/site-packages/_distutils_hack/__init__.py
  88. BIN
      venv/lib/python3.10/site-packages/_distutils_hack/__pycache__/__init__.cpython-310.pyc
  89. BIN
      venv/lib/python3.10/site-packages/_distutils_hack/__pycache__/override.cpython-310.pyc
  90. 1 0
      venv/lib/python3.10/site-packages/_distutils_hack/override.py
  91. 33 0
      venv/lib/python3.10/site-packages/_yaml/__init__.py
  92. BIN
      venv/lib/python3.10/site-packages/_yaml/__pycache__/__init__.cpython-310.pyc
  93. 1 0
      venv/lib/python3.10/site-packages/annotated_types-0.7.0.dist-info/INSTALLER
  94. 295 0
      venv/lib/python3.10/site-packages/annotated_types-0.7.0.dist-info/METADATA
  95. 10 0
      venv/lib/python3.10/site-packages/annotated_types-0.7.0.dist-info/RECORD
  96. 4 0
      venv/lib/python3.10/site-packages/annotated_types-0.7.0.dist-info/WHEEL
  97. 21 0
      venv/lib/python3.10/site-packages/annotated_types-0.7.0.dist-info/licenses/LICENSE
  98. 432 0
      venv/lib/python3.10/site-packages/annotated_types/__init__.py
  99. BIN
      venv/lib/python3.10/site-packages/annotated_types/__pycache__/__init__.cpython-310.pyc
  100. BIN
      venv/lib/python3.10/site-packages/annotated_types/__pycache__/test_cases.cpython-310.pyc

+ 3 - 0
.idea/.gitignore

@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml

+ 6 - 0
.idea/inspectionProfiles/Project_Default.xml

@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyInterpreterInspection" enabled="false" level="WARNING" enabled_by_default="false" />
+  </profile>
+</component>

+ 6 - 0
.idea/inspectionProfiles/profiles_settings.xml

@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

+ 4 - 0
.idea/misc.xml

@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.1 (2)" project-jdk-type="Python SDK" />
+</project>

+ 9 - 0
.idea/modules.xml

@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file:///Applications/work/宇信科技/智能数据平台/llmops/.idea/llmops.iml" filepath="/Applications/work/宇信科技/智能数据平台/llmops/.idea/llmops.iml" />
+      <module fileurl="file://$PROJECT_DIR$/.idea/tx_flow_analysis.iml" filepath="$PROJECT_DIR$/.idea/tx_flow_analysis.iml" />
+    </modules>
+  </component>
+</project>

+ 9 - 0
.idea/tx_flow_analysis.iml

@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.1 (2)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+    <orderEntry type="module" module-name="llmops" />
+  </component>
+</module>

BIN
__pycache__/config.cpython-310.pyc


+ 141 - 0
config.py

@@ -0,0 +1,141 @@
+
+# SQL血缘解析数据库配置
+sql_parse_db_config = {
+    "db_file": "/Applications/work/宇信科技/WMXT/wmxt.duckdb",
+    # "db_file": "/Applications/work/宇信科技/XMGJ/sql_parse.duckdb",
+    "parse_task_concurrency": 10, # 解析任务并发度
+}
+
+model_name="qwen-max-latest"
+# coder_model_name="qwen2.5-coder-32b-instruct"
+#coder_model_name="qwen2.5-coder-7b-instruct"
+
+coder_model_name="qwen3-coder-plus"
+# model_name="qwen3-32b"
+# model_name="qwq-32b"
+# model_name="qwen2.5-72b-instruct"
+
+# model_name = "deepseek-r1-distill-qwen-32b"
+
+# LLM config
+llm_config = {
+    "temperature": 0,
+    "model": model_name,
+    "base_url": "http://103.154.31.78:20001/compatible-mode/v1",
+    "api_key": "IfWZqv1F55hsRWlWB2fqCca91VK9IMET",
+    "coder_model": coder_model_name
+}
+
+# 210.12.198.141:20010
+
+
+# 词根 rag config
+word_root_config = {
+    "file_path": "/Users/jiaqiang/Downloads/词根V1.0.xls",
+    "index_name": "words_root_idx",
+    "jieba_dict": "/Applications/work/宇信科技/智能数据平台/word_root_dict.txt"
+}
+
+term_mapping_config = {
+    "index_name": "term_mapping_idx",
+    "file_path": "/Applications/work/宇信科技/智能数据平台/业务术语映射.xls"
+}
+
+# schema rag config
+schema_handler_config = {
+    "index_name": "schema"
+}
+
+# check agent config
+check_agent_config = {
+    "dialect": "hive"
+}
+
+# comment agent config
+comment_agent_config = {
+    "dialect": "hive",
+    "top_k": 10,
+    "similarity_threshold": 0.6
+}
+
+explain_agent_config = {
+    "top_k": 10,
+    "similarity_threshold": 0.6
+}
+
+linage_agent_config = {
+    "dialect": "hive",
+    "concurrency": 5,
+    "parse_log_file": "/Applications/work/宇信科技/WMXT/SP/parse_time.txt"
+}
+
+script_agent_config = {
+    "dialect": "hive",
+    "rag_schema_topk": 20,
+    "rag_schema_similarity": 0.3
+}
+
+# 逻辑模型生成config
+model_agent_config = {
+    "db_dialect": "hive",
+    "session_expire_time": 1800,
+    "clean_interval": 600,
+    "similarity_threshold": 0.4,
+    "jieba_dict": "/Applications/work/宇信科技/智能数据平台/word_root_dict.txt"
+}
+
+# 业务问题转 sql config
+biz_query_agent_config = {
+    "db_dialect": "hive",
+    "similarity_threshold": 0.4
+}
+
+# 数据库方言SQL转换 config
+transformer_agent_config = {
+
+}
+
+
+gen_biz_mapping_agent_config = {
+
+}
+
+
+qwen32_app_key = "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZGU0YmRhODc4YzA0ZGVmYTk0NWY5ODFiNTQ5MjUxMyIsImlzcyI6ImFwaS1hdXRoLWtleSIsImV4cCI6NDkwNzU0Njk0Nn0.jCa-XVPspH3GzXuU-sm1SZJuWALgSrFI2QvQbunP50SIiBvlhjIdy7BwZ2oPsqBMoidJx0ujUlo-TGBc6Ea86yjn2J54QQb4pyKZ-r2r6II4qbk9PktqWDO6D9m2M2uide2eiYyDqIkHlYcsl6vmjw0w9I-KZntQrwrJSHgGJF5x_cqDB2iJkH41lDjMxOhNjwRIfj317U5PCoUCDK3GGRxTHwm2Eg1PWDJT1IP7AUnBa6o404ilHBR3o-bCOW2jWY8dAF67ogr8ApnT91TzCuQWrUdQ6IP3PuGiLSjedC-85bDtvgAML_APCguUf6J7K3RpwcnEIcM9u9fznOCJTw"
+qwen32_url="https://ai.sinochem.com/kunlun/ingress/api-safe/5351ea/e4079068806f428b960c0537164cc5a2/ai-fd44da9a49ea4cfc8696e20948160fb8/service-180c8450621e444e81638c9faa545875/v1"
+
+llm_qwen32 = {
+    "name": "qwencoder7",
+    "app-key": qwen32_app_key,
+    "url": qwen32_url
+}
+
+coder7_app_key="eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI4MzY1ZmE3YzEyOTM0OGM5OTA0YTY5Mzk4NGY1ZTFiOCIsImlzcyI6ImFwaS1hdXRoLWtleSIsImV4cCI6NDkwNDUxOTc4MX0.hpO5psDAqlxnzCFSKynghiuxfiair0rkCqulhNTXOd0ZyW1acjqEWq_AyjNv6Zgce2cGenNxx9tkVYPEuGydaiCnQBOGgPjXSoKx5VNa98gxGio1ohQ8O2Rqv7-uarIGow206ROLKhmTLVs48mlk8UpQQUBwBmn9iFliREKhvVBbUNNCMyNPlDJHoh8_pQ38vQmF6MI4xu6loJh-3a6gO_pIWF025KANmoth-IgvlXjjR-1QDpuTnxt2oi_AjLy4rlWd8q9NLCWddBcHN08JlU3Yl87Vb-rHhexPMdOdkOzAZa-7hnAorrxomktLXpNH0j0VgbBBb-Vnved90F5MSg"
+coder7_url="https://ai.sinochem.com/kunlun/ingress/api-safe/5351ea/e4079068806f428b960c0537164cc5a2/ai-6d7b1fff09994a0198093a43980583ea/service-7cb26a8f786140009073a608535f0c95/v1"
+
+llm_coder7 = {
+    "name": "qwencoder7",
+    "app-key": coder7_app_key,
+    "url": coder7_url
+}
+
+
+linage_agent_config = {
+    "dialect": "hive",
+    "concurrency": 12,
+    "parse_log_file": "/home/appuser/parse_time.txt"
+}
+
+# 解析客户端配置
+sql_parse_client_config = {
+    "parse_url": "http://127.0.0.1:3699/api/sqllineage/parse",
+    "export_table_url": "http://127.0.0.1:3699/api/sqllineage/table/export",
+    "export_col_url": "http://127.0.0.1:3699/api/sqllineage/table/col/export"
+}
+
+
+# 交易流水分析配置
+tx_flow_analysis_config = {
+    "llm": model_name
+}
+

+ 0 - 0
llmops/__init__.py


+ 0 - 0
llmops/agents/__init__.py


+ 0 - 0
llmops/agents/datadev/__init__.py


+ 0 - 0
llmops/agents/datadev/lineage/__init__.py


+ 1298 - 0
llmops/agents/datadev/lineage/sql_lineage_agent_xmgj.py

@@ -0,0 +1,1298 @@
+
+
+from langchain_core.prompts import ChatPromptTemplate
+from langgraph.graph import START, StateGraph, END
+
+from llmops.agents.datadev.llm import get_llm, get_llm_coder
+from typing import List
+from typing_extensions import TypedDict
+from pydantic import BaseModel, Field
+from llmops.agents.datadev.memory.memory_saver_with_expiry2 import MemorySaverWithExpiry
+from langchain_core.output_parsers import PydanticOutputParser
+from config import linage_agent_config
+from llmops.agents.datadev.tools.timeit import timeit
+
+import asyncio
+from datetime import datetime
+
+class Column(BaseModel):
+    """
+    字段信息
+    """
+    col_name: str = Field(description="字段名称")
+
+    def __eq__(self, other):
+        if isinstance(other, Column):
+            return self.col_name == other.col_name
+        return False
+
+class ColumnDep(BaseModel):
+    """
+    目标表字段信息,包含依赖关系
+    """
+    col_name: str = Field(description="目标表字段名称")
+    col_comment: str = Field(description="目标表字段说明")
+    from_cols: List[str] = Field(description="字段来源信息,格式是 源库名.源表名.源字段名 或 中间表名.中间表字段名")
+    dep_type: List[str] = Field(description="字段获取方式 1:直取 2:函数 3:表达式")
+    desp: List[str] = Field(description="详细解释选取来源字段的原因")
+
+class SourceTable(BaseModel):
+    """
+    单个来源信息表信息
+    """
+    database: str = Field(description="来源数据库名称")
+    table_name: str = Field(description="来源表名称")
+    table_name_alias: str = Field(default="", description="来源表别名")
+    col_list: List[Column] = Field(description="来源表的字段集合")
+    is_temp: bool = Field(description="是否是临时表")
+
+    def __eq__(self, other):
+        if isinstance(other, SourceTable):
+            return self.database == other.database and self.table_name == other.table_name
+        return False
+
+class SourceTableList(BaseModel):
+    """
+    所有来源信息表信息
+    """
+    source_tables: List[SourceTable] = Field(description="所有来源表信息")
+
+class WhereCondItem(BaseModel):
+    """
+    单个表过滤条件信息
+    """
+    database: str = Field(description="数据库名")
+    table_name: str = Field(description="表原名")
+    col_name: str = Field(description="条件字段名称")
+    operator: str = Field(default="=", description="条件操作符,如=,>,<,in")
+    value: str = Field(description="条件值")
+
+
+class WhereCondList(BaseModel):
+    """
+    表过滤条件集合
+    """
+    where_list: List[WhereCondItem] = Field(description="所有的where条件")
+
+
+class JoinColumnPair(BaseModel):
+    """关联字段对"""
+    main_table_alias: str = Field(description="主表别名")
+    main_column: str = Field(description="主表关联字段")
+    join_table_alias: str = Field(description="从表别名")
+    join_column: str = Field(description="从表关联字段")
+    operator: str = Field(default="=", description="关联操作符,如=,>,<")
+
+
+class JoinRelation(BaseModel):
+    """完整的JOIN关系"""
+    main_database: str = Field(description="主表数据库名")
+    main_table: str = Field(description="主表物理表名")
+    main_alias: str = Field(description="主表别名")
+    join_type: str = Field(description="JOIN类型:INNER/LEFT/RIGHT/FULL")
+    join_database: str = Field(description="从表数据库名")
+    join_table: str = Field(description="从表物理表名")
+    join_alias: str = Field(description="从表别名")
+    column_pairs: List[JoinColumnPair] = Field(description="关联字段对列表")
+
+
+class JoinRelationList(BaseModel):
+    """
+    join关系集合
+    """
+    join_list: List[JoinRelation] = Field(description="所有的join关系")
+
+
+class TargetTable(BaseModel):
+    """
+    目标表信息
+    """
+    database: str = Field(default="", description="目标表的数据库名称")
+    table_name: str = Field(description="目标表名称")
+    col_size: int = Field(description="目标表字段数量")
+    col_list: List[str] = Field(description="目标表字段集合")
+    col_dep: List[ColumnDep] = Field(description="目标表字段依赖信息")
+    src_table_size: int = Field(description="依赖来源表数量")
+
+
+class CreateTable(BaseModel):
+    """
+    建表信息
+    """
+    database: str = Field(description="数据库名")
+    table: str = Field(description="表名")
+    col_list: list[str] = Field(description="字段名集合")
+    source_table_list: list[str] = Field(description="来源表集合")
+
+
+class AgentState(TypedDict):
+    """
+    Agent状态
+    """
+    question: str
+    session: str
+    dialect: str
+    sql: str                # 带行号的SQL语句
+    sql_type: str           # sql操作类型
+    lineage: dict
+    status: str
+    error: str
+    stop: bool
+    file_name: str            # 脚本文件
+    target_table: dict        # 目标表信息
+    source_tables: list[dict] # 来源表信息
+    where_list: list[dict]    # where条件信息
+    join_list: list[dict]     # 表关联信息
+
+
+class SqlLineageAgent:
+    """
+    解析用户提供的SQL语句/SQL脚本中的数据血缘关系
+    """
+    def __init__(self):
+        # 读取配置文件
+        c = linage_agent_config
+        # 数据库方言
+        self.db_dialect = c["dialect"]
+        # 并发度
+        self.concurrency = c["concurrency"]
+
+        # 大模型实例
+        self.llm = get_llm()
+        self.llm_coder = get_llm_coder()
+
+        self.memory = MemorySaverWithExpiry(expire_time=600, clean_interval=60)
+        # 构建图
+        self.graph = self._build_graph()
+        # SQL 语句中设置的变量
+        self.var_list: list[str] = []
+        # 最终的来源表集合
+        self.final_source_table_list: list[dict] = []
+        # 最终的中间表集合
+        self.final_mid_table_list: list[dict] = []
+        # 最终的where集合
+        self.final_where_list: list[dict] = []
+        # 最终的join集合
+        self.final_join_list: list[dict] = []
+        # 最终的目标表集合
+        self.final_target_list: list[dict] = []
+
+        # 最终目标表,需合并
+        self.final_target_table = {}
+
+        # 目标表 格式 库名.表名
+        self.target_table = ""
+
+        # 内部建表信息
+        self.ct_list = []
+
+
+    def _build_graph(self):
+        # 构造计算流程
+        graph_builder = StateGraph(AgentState)
+
+        # 添加节点
+        graph_builder.add_node("_sql_type", self._sql_type)
+        graph_builder.add_node("_extract_set", self._extract_set)
+        graph_builder.add_node("_extract_create_table", self._extract_create_table)
+        graph_builder.add_node("_invalid", self._invalid)
+        graph_builder.add_node("source", self.__extract_source)
+        graph_builder.add_node("target", self.__extract_target)
+        graph_builder.add_node("where", self.__extract_where)
+        graph_builder.add_node("join", self.__extract_join)
+        graph_builder.add_node("merge", self.__merge_result)
+
+        # 添加边
+        graph_builder.add_edge(START, "_sql_type")
+        graph_builder.add_conditional_edges("_sql_type", path=self.__parallel_route2, path_map={
+            "_invalid": "_invalid",
+            "_extract_set": "_extract_set",
+            "_extract_create_table": "_extract_create_table",
+            "where": "where",
+            "join": "join"
+        })
+        graph_builder.add_edge("_invalid", END)
+        graph_builder.add_edge("_extract_set", END)
+        graph_builder.add_edge("_extract_set", END)
+        graph_builder.add_edge("_extract_create_table", END)
+        graph_builder.add_edge("where", "source")
+        graph_builder.add_edge("join", "source")
+        graph_builder.add_edge("source", "target")
+        graph_builder.add_edge("target", "merge")
+        graph_builder.add_edge("merge", END)
+
+        return graph_builder.compile(checkpointer=self.memory)
+
+    def _sql_type(self, state: AgentState):
+        """
+        判断SQL的操作类型
+        """
+        template = """
+            你是数据库 {dialect} 专家,对 SQL语句: {sql} 中的操作进行分类, 直接返回分类ID(数字)
+           
+            ### 操作分类标准如下:
+            1:create, 如建表、建视图等
+            2:insert, 向表中插入数据
+            3:set、设置参数操作、变量赋值操作
+            4:其它
+            
+            不要重复用户问题,不要中间分析过程,直接回答。
+        """
+        pt = ChatPromptTemplate.from_template(template)
+        chain = pt | self.llm
+        response = chain.invoke({"dialect": state["dialect"], "sql": state["question"]})
+        return {"sql_type": response.content}
+
+    def __parallel_route(self, state: AgentState):
+        """并发节点"""
+        status = state["status"]
+        if status == "invalid" or status == "canceled" or state["sql_type"] == "3":  # 无效问题 或 用户中止
+            return END
+
+        return "continue"
+
+    def __parallel_route2(self, state: AgentState):
+        """并发节点"""
+        if state["sql_type"] == "4":  # 非create和非insert操作
+            return "_invalid"
+        if state["sql_type"] == "3":  # 设置参数
+            return "_extract_set"
+        if state["sql_type"] == "1":  # 建表
+            return "_extract_create_table"
+
+        return ["where", "join"]
+
+    def _invalid(self, state: AgentState):
+        """
+        sql_type == 4,不关注的语句,直接跳过
+        """
+        return {
+            "lineage": {
+                "source_tables": [],
+                "join_list": [],
+                "where_list": [],
+                "target_table": {}
+            }
+        }
+
+    async def _extract_set(self, state: AgentState):
+        """
+        提取 SQL 中设置的参数变量
+        """
+        if state["sql_type"] != "3":
+            return state
+
+        template = """
+            你是数据库 {dialect} SQL分析专家,从SQL语句: {sql} 中提取 参数设置的全部内容,包括变量赋值、注释。
+            不要重复用户问题,不需要中间思考过程,直接回答。
+        """
+        pt = ChatPromptTemplate.from_template(template)
+        chain = pt | self.llm_coder
+        response = await chain.ainvoke({"dialect": state["dialect"], "sql": state["question"]})
+        self.var_list.append(response.content)
+        return {
+            "lineage": {
+                "source_tables": [],
+                "join_list": [],
+                "where_list": [],
+                "target_table": {}
+            }
+        }
+
+
+    async def _extract_create_table(self, state: AgentState):
+        """
+        提取建表信息
+        """
+        if state["sql_type"] != "1":
+            return state
+
+        template = """
+            你是数据库 {dialect} SQL分析专家,从SQL语句: {sql} 中提取 建表信息。
+            
+            ### 提取要求
+            1、提取建表对应的数据库名(无则留空)
+            2、提取建表对应的表名
+            3、提取建表对应的字段名
+            4、提取建表对应的来源表名,来源表名格式是 来源库库名.来源表名
+            
+            ### 输出规范
+            {format_instructions}
+            
+            不要重复用户问题,不需要中间思考过程,直接回答。
+        """
+        parser = PydanticOutputParser(pydantic_object=CreateTable)
+        pt = ChatPromptTemplate.from_template(template).partial(format_instructions=parser.get_format_instructions())
+        chain = pt | self.llm_coder | parser
+        response = await chain.ainvoke({"dialect": state["dialect"], "sql": state["question"]})
+        #
+        print(f"extract-create-table: {response}")
+        self.ct_list.append(response.model_dump())
+        return {
+            "lineage": {
+                "source_tables": [],
+                "join_list": [],
+                "where_list": [],
+                "target_table": {}
+            }
+        }
+
+
+    async def _parse(self, semaphore, sql: str, session: str, dialect: str, file_name: str = "SQL_FILE.SQL"):
+        """
+        解析SQL(片段)的血缘关系
+        """
+        async with semaphore:  # 进入信号量区域(限制并发)
+            config = {"configurable": {"thread_id": session}}
+            # 调用任务流
+            response = await self.graph.ainvoke({
+                "question": sql,
+                "session": session,
+                "dialect": dialect or self.db_dialect,
+                "file_name": file_name,
+                "stop": False,
+                "error": "",
+                "sql_type": "0",
+                "status": "success"}, config)
+            return response
+
+    async def find_target_table(self, sql: str, dialect: str):
+        """
+        找出目标表
+        """
+        template = """
+            你是SQL数据血缘分析专家,参照数据库方言{dialect},仔细分析SQL语句,找出目标表并返回。
+            ### SQL片段如下
+            {sql}
+            
+            ### 目标表标准
+            1、目标表一定是出现在 insert into 后的 表
+            2、最后 insert into 后的表为目标表
+            3、目标表只有一张
+           
+            ### 核心要求
+            1、目标表的返回格式是 数据库名.表名 (如果无数据库名,则留空)
+            2、SQL语句中可能包括多个insert into语句,但要从全局分析,找出最终插入的目标表
+            
+            不要重复用户问题、不要中间分析过程,直接回答。
+        """
+        pt = ChatPromptTemplate.from_template(template)
+        chain = pt | self.llm_coder
+        response = await chain.ainvoke({"sql": sql, "dialect": dialect})
+        return response.content.strip().upper()
+
+    def split_sql_statements(self, sql):
+        """
+        将SQL语句按分号分段,同时正确处理字符串和注释中的分号
+        参数:
+            sql: 包含一个或多个SQL语句的字符串
+        返回:
+            分割后的SQL语句列表
+        """
+        # 状态变量
+        in_single_quote = False
+        in_double_quote = False
+        in_line_comment = False
+        in_block_comment = False
+        escape_next = False
+
+        statements = []
+        current_start = 0
+        i = 0
+
+        while i < len(sql):
+            char = sql[i]
+
+            # 处理转义字符
+            if escape_next:
+                escape_next = False
+                i += 1
+                continue
+
+            # 处理字符串和注释中的情况(这些地方的分号不应该作为分隔符)
+            if in_line_comment:
+                if char == '\n':
+                    in_line_comment = False
+            elif in_block_comment:
+                if char == '*' and i + 1 < len(sql) and sql[i + 1] == '/':
+                    in_block_comment = False
+                    i += 1  # 跳过下一个字符
+            elif in_single_quote:
+                if char == "'":
+                    in_single_quote = False
+                elif char == '\\':
+                    escape_next = True
+            elif in_double_quote:
+                if char == '"':
+                    in_double_quote = False
+                elif char == '\\':
+                    escape_next = True
+            else:
+                # 不在字符串或注释中,检查是否进入这些状态
+                if char == "'":
+                    in_single_quote = True
+                elif char == '"':
+                    in_double_quote = True
+                elif char == '-' and i + 1 < len(sql) and sql[i + 1] == '-':
+                    in_line_comment = True
+                    i += 1  # 跳过下一个字符
+                elif char == '/' and i + 1 < len(sql) and sql[i + 1] == '*':
+                    in_block_comment = True
+                    i += 1  # 跳过下一个字符
+                elif char == ';':
+                    # 找到真正的分号分隔符
+                    statement = sql[current_start:i].strip()
+                    if statement:
+                        statements.append(statement)
+                    current_start = i + 1
+
+            i += 1
+
+        # 添加最后一个语句(如果没有以分号结尾)
+        if current_start < len(sql):
+            statement = sql[current_start:].strip()
+            if statement:
+                statements.append(statement)
+
+        return statements
+
+    async def find_target_table2(self, results: list[dict]) -> str:
+        """
+        根据各段解析结果查找出目标表
+        规则:一个段的目标表如果没有出现在其它各段的来源表中,即为目标表
+        """
+        target_table = ""
+        exists = False
+        for i, item in enumerate(results):
+            tt = item["lineage"]["target_table"]
+            # 取一个目标表
+            db = tt.get("database", "").strip().upper()
+            table = tt.get("table_name", "").strip().upper()
+            exists = False
+            if len(table) > 0:
+                # 从源表里面查找
+                for j, item2 in enumerate(results):
+                    if i != j:  #  不跟自身比
+                        st_list = item2["lineage"]["source_tables"]
+                        for st in st_list:
+                            sdb = st.get("database", "").strip().upper()
+                            stable = st.get("table_name", "").strip().upper()
+                            if db == sdb and table == stable: # 目标表在源表中存在
+                                exists = True
+                                break
+                        if exists:
+                            break
+                if not exists: # 说明目标表不在其它各段的源表中存在
+                    target_table = ".".join([db, table])
+                    break
+
+        return target_table.strip().upper()
+
+    async def ask_question(self, sql_content: str, session: str, dialect: str, owner: str, sql_file: str = "SQL_FILE.SQL"):
+        """
+        功能:根据用户问题,解析SQL中的数据血缘关系。先分段解析,再组织合并。
+        :param sql_content: SQL内容
+        :param session:     会话
+        :param dialect:     数据库方言
+        :param owner:       脚本平台属主
+        :param var_map:     变量关系
+        :sql_file:          脚本名称
+        """
+
+
+        # 通过session保持记忆
+        t1 = datetime.now()
+        # 最终解析结果
+        final_result = {}
+        final_result["task_id"] = session
+        final_result["file_name"] = sql_file
+        final_result["owner"] = owner
+        final_result["status"] = "success"
+        final_result["error"] = ""
+        lineage = {}
+
+        try:
+            # 根据全局SQL找出目标表
+            self.target_table = await self.find_target_table(sql_content, dialect)
+            print(f"大模型查找目标表:{self.target_table}, {len(self.target_table)}")
+
+            # 对SQL分段,并发执行解析
+            sql_list = self.split_sql_statements(sql=sql_content)
+            print("SQL分段数量:", len(sql_list))
+
+            # 信号量,控制并发量
+            semaphore = asyncio.Semaphore(self.concurrency)
+            task_list = [self._parse(semaphore=semaphore, sql=sql, session=session, dialect=dialect, file_name=sql_file) for sql in sql_list]
+            # 并发分段解析
+            results = await asyncio.gather(*task_list)
+
+            if len(self.target_table) == 0:
+                # 从各段解析结果中查找出目标表
+                self.target_table = await self.find_target_table2(results) or ""
+                print(f"从血缘关系中找出目标表:{self.target_table}")
+                if not self.target_table:  # 未出现目标表, 直接返回
+                    final_result["status"] = "error",
+                    final_result["error"] = "未找到目标表"
+                    return final_result
+
+            for response in results:
+                # 来源表、where、join、target
+                self._merge_source_tables(response["lineage"]["source_tables"])
+                self._merge_where(response["lineage"].get("where_list",[]))
+                self._merge_join(response["lineage"]["join_list"])
+                # 合并目标表
+                self._merge_target(response["lineage"]["target_table"])
+
+            # 增加中间表标识
+            self._add_mid_table_label(self.final_source_table_list, self.final_mid_table_list)
+            # 补充中间表依赖字段
+            self._add_mid_table_col(self.final_mid_table_list, self.final_target_list)
+
+            # 多目标表合并
+            self.final_target_table["src_table_size"] = len(self.final_source_table_list)
+            lineage["target_table"] = self.final_target_table
+            lineage["source_tables"] = self.final_source_table_list
+            lineage["mid_table_list"] = self.final_mid_table_list
+            lineage["join_list"] = self.final_join_list
+            lineage["where_list"] = self.final_where_list
+            final_result["lineage"] = lineage
+        except Exception as e:
+            final_result["status"] = "error"
+            final_result["error"] = str(e)
+            print(str(e))
+
+        t2 = datetime.now()
+        print("总共用时(sec):", (t2-t1).seconds)
+        parse_time = (t2 - t1).seconds
+        # 解析时间(秒)
+        final_result["parse_time"] = parse_time
+        final_result["parse_end_time"] = t2.strftime("%Y-%m-%d %H:%M:%S")
+        return final_result
+
+    def _merge_source_tables(self, source_table_list: list[dict]):
+        """
+        合并分段来源表
+        """
+        if len(source_table_list) > 0:
+            # 目标源表的keys
+            table_keys = [(t.get("database","").strip().upper(), t.get("table_name","").strip().upper()) for t in self.final_source_table_list]
+            for st in source_table_list:
+                # 将字典转换为元组
+                db = st.get("database","").strip().upper()
+                table = st.get("table_name","").strip().upper()
+                key = (db, table)
+                if key not in table_keys:
+                    table_keys.append(key)
+                    self.final_source_table_list.append(st)
+                else:
+                    # 合并字段
+                    col_list = st.get("col_list", [])
+                    # 找出相同的元素, 库名,表名相同
+                    e = next(filter(lambda item: item["database"].upper()==db and item["table_name"].upper()==table, self.final_source_table_list), None)
+                    if e:
+                        final_col_list = e.get("col_list",[])
+                        col_keys = [(c.get("col_name","").upper()) for c in final_col_list]
+                        for c in col_list:
+                            ck = (c["col_name"].upper())
+                            if ck not in col_keys and len(c) > 0:
+                                col_keys.append(ck)
+                                final_col_list.append(c)
+
+    @timeit
+    def _merge_where(self, where_list: list[dict]):
+        """
+        最终合并所有的where条件
+        """
+        if where_list and len(where_list) > 0:
+            self.final_where_list += where_list
+
+
+    def _merge_join(self, join_list: list[dict]):
+        """
+        最终合并所有的join条件
+        """
+        if len(join_list) > 0:
+            self.final_join_list += join_list
+
+
+    def _merge_target(self, mid_target_table: dict):
+        """
+        合并中间目标表,只有 名称是 target_table_name 的表才是真目标表
+        目标表可能出现多次(如多次插入),需要合并 字段信息(字段名称和字段来源)
+        对于非目标表的中间目标表,放到中间表集合中
+        """
+        if not mid_target_table:  # 中间目标表为空
+            return
+        else:  # 将临时目标表与目标表做合并
+            db = mid_target_table.get("database", "").strip().upper()
+            table = mid_target_table.get("table_name", "").strip().upper()
+            col_size = mid_target_table.get("col_list",[])
+            if len(table) == 0 or len(col_size) == 0:  # 表名为空 或 无字段 直接返回
+                return
+            if len(db) == 0:  # 缺少数据库名, 检查表名中是否带库名
+                arr = table.split('.')
+                if len(arr) == 2:  # 说明表名中当库名,解析有错,修正
+                    mid_target_table["database"] = arr[0].strip().upper()
+                    mid_target_table["table_name"] = arr[1].strip().upper()
+                    db = arr[0].strip().upper()
+                    table = arr[1].strip().upper()
+                elif len(arr) == 1:  # 无库名,只有表名,使用真目标表库名替换
+                    arr = self.target_table.split(".")
+                    if len(arr) == 2:
+                        db = mid_target_table["database"] = arr[0].strip().upper()
+
+            key = ".".join([db, table])
+
+            if key == self.target_table:  # 找到最终目标表
+                print(f"合并目标表:{mid_target_table}")
+                if not self.final_target_table:  # 目标表为空(首次)
+                    self.final_target_table = mid_target_table
+                    return
+
+                # 合并字段和字段来源信息
+                # 合并新字段
+                new_col_list = []  # 新字段集
+                col_list = mid_target_table.get("col_list", [])
+                for new_col in col_list:
+                    # 检查字段是否存在、忽略大小写
+                    if new_col.strip().upper() not in [col.strip().upper() for col in self.final_target_table.get("col_list", [])]:  # 不存在
+                        print(f"合并新字段:{new_col}")
+                        self.final_target_table.get("col_list", []).append(new_col.strip().upper())
+                        new_col_list.append(new_col.strip().upper())
+
+                # 合并字段来源信息
+                col_dep_list = mid_target_table.get("col_dep", [])
+                for col_dep in col_dep_list:
+                    col_name = col_dep["col_name"].strip().upper()
+                    if col_name in new_col_list:  # 新字段, 直接添加
+                        self.final_target_table.get("col_dep",[]).append(col_dep)
+                    else:  # 合并来源信息
+                        # 找到同名字段,合并来源字段
+                        for dep in self.final_target_table.get("col_dep", []):
+                            cn = dep["col_name"].strip().upper()
+                            if col_name == cn:
+                                print(f"合并来源字段, {col_name}")
+                                m_from_col = col_dep.get('from_cols', [])
+                                f_from_col = dep.get("from_cols", [])
+                                print(f"中间表来源字段:{m_from_col}")
+                                print(f"目标表来源字段:{f_from_col}")
+                                # 将临时中间表的来源字段合并至目标表的来源字段
+                                self._merge_col_dep(m_from_col, f_from_col)
+            else:
+                # 加入中间表集合中
+                print(f"加入中间表:{mid_target_table}")
+                self.final_mid_table_list.append(mid_target_table)
+
+            print(f"final tt:{self.final_target_table}")
+
+    def _merge_col_dep(self, mid_from_cols: list[str], final_from_cols: list[str]):
+        """
+        合并临时中间表来源字段 到目标表来源字段中
+        """
+        for from_col in mid_from_cols:
+            if len(from_col.split(".")) > 0: # 忽略常量,NULL值
+                if from_col.strip().upper() not in [fc.strip().upper() for fc in final_from_cols]:
+                    print(f"合并字段:{from_col}")
+                    final_from_cols.append(from_col.strip().upper())
+
+    @staticmethod
+    @timeit
+    def _add_mid_table_label(source_table_list: list[dict], mid_table_list: list[dict]):
+        """
+        给最终的来源表增加中间表标识
+        """
+        if len(source_table_list) > 0 and len(mid_table_list) > 0:
+            mid_table_keys = [(t["database"].strip().upper(), t["table_name"].strip().upper()) for t in mid_table_list]
+            for st in source_table_list:
+                st["is_temp"] = False
+                # 将字典转换为元组
+                key = (st["database"].strip().upper(), st["table_name"].strip().upper())
+                if key in mid_table_keys:  # 是中间表
+                    st["is_temp"] = True
+
+    @staticmethod
+    @timeit
+    def _add_mid_table_col(mid_table_list: list[dict], target_table_list: list[dict]):
+        """
+        给中间表补充来源字段,使用对应的目标表col_dep去替换
+        """
+        if len(mid_table_list) > 0 and len(target_table_list) > 0:
+            for mt in mid_table_list:
+                for tt in target_table_list:
+                    tt["is_temp"] = False
+                    # 目标表的数据库名与表名相同
+                    tdn = tt["database"].strip().upper()
+                    ttn = tt["table_name"].strip().upper()
+                    if len(tdn) == 0 and len(ttn) == 0: # 全部为空
+                        tt["is_temp"] = True
+                        continue
+                    if mt["database"].strip().upper()==tdn and mt["table_name"].strip().upper()==ttn:
+                        mt["col_dep"] = tt["col_dep"]
+                        tt["is_temp"] = True
+                        if len(mt["col_list"]) == 0:
+                            mt["col_list"] = [c.upper() for c in tt["col_list"]]
+                            mt["col_size"] = len(mt["col_list"])
+
+    @timeit
+    def _merge_final_target_table(self, seg_target_table_list: list[dict], target_table_name: str):
+        """
+        合并分段目标表内容
+        """
+        # 可能存在多个相同的真正目标表(多次插入目标表情况)
+        for tt in seg_target_table_list:
+            tt["is_target"] = False
+            database = tt["database"].strip()
+            table = tt["table_name"].strip()
+            if len(database) > 0:
+                dt = ".".join([database,table])
+            else:
+                dt = table
+            if dt.upper() == target_table_name.strip().upper():
+                tt["is_target"] = True
+
+        # 合并真正的目标表
+        i = 0
+        result = {}
+        final_col_list: list[str] = []
+        final_col_dep_list: list[dict] = []
+        # 合并目标表信息
+        for target_table in seg_target_table_list:
+            if target_table["is_target"]:
+                if i == 0:
+                    result["database"] = target_table["database"]
+                    result["table_name"] = target_table["table_name"]
+                    i = 1
+                # 合并列信息
+                col_list = target_table["col_list"]
+                for c in col_list:
+                    if c not in final_col_list and len(c.strip()) > 0:
+                        final_col_list.append(c.upper())
+
+                # 合并列来源信息
+                col_dep_list = target_table["col_dep"]
+                for col_dep in col_dep_list:
+                    cn = col_dep["col_name"].strip().upper()
+                    from_cols = col_dep["from_cols"]
+                    existed = False
+                    if len(cn) > 0:
+                        for fcd in final_col_dep_list:
+                            if cn == fcd["col_name"].upper():
+                                existed = True
+                                # 合并来源字段
+                                final_from_cols = fcd["from_cols"]
+                                fcd["from_cols"] += [c.upper() for c in from_cols if c.upper() not in final_from_cols]
+                        if not existed:
+                            final_col_dep_list.append({
+                                "col_name": cn.upper(),
+                                "col_comment": col_dep["col_comment"],
+                                "from_cols": [c.upper() for c in from_cols]
+                            })
+
+        result["col_size"] = len(final_col_list)
+        result["col_list"] = final_col_list
+        result["col_dep"] = final_col_dep_list
+
+        return result
+
+    async def __check(self, state: AgentState):
+        """
+        检查用户问题是否是有效的SQL语句/SQL片段
+        """
+        template = """
+            你是 数据库 {dialect} SQL分析助手, 仔细分析SQL语句: {sql} ,判断是否语法正确,如果是 直接返回 1,否则说明错误地方,并返回 0。 
+            
+            ### 分析要求
+            1. 判断子查询的正确性
+            2. 判断整体语句的正确性
+            3. 如果存在语法错误,给出说明
+            
+            不要重复用户问题、不要分析过程、直接回答。
+        """
+        pt = ChatPromptTemplate.from_template(template)
+        chain = pt | self.llm
+        stopped = False
+        response = ""
+        config = {"configurable": {"thread_id": state["session"]}}
+        async for chunk in chain.astream({"sql": state["question"], "dialect": state["dialect"]}, config=config):
+            current_state = self.memory.get(config)
+            if current_state and current_state["channel_values"]["stop"]:
+                stopped = True
+                break
+            response += chunk.content
+
+        if stopped:
+            return {"stop": True, "error": "用户中止", "status": "canceled", "lineage": {}}
+
+        if response == "0":
+            return {"status": "invalid", "error": "无效问题或SQL存在语法错误,请重新描述!", "lineage": {}}
+
+        return {"status": "success"}
+
+    async def __merge_result(self, state: AgentState):
+        """
+        合并所有节点结果,输出血缘关系
+        """
+        result = {}
+        result["file_name"] = state["file_name"]
+        source_tables = state["source_tables"]
+        target_table = state["target_table"]
+        # 设置目标表依赖的来源表数量和字段数量
+        target_table["src_table_size"] = len(source_tables)
+        target_table["col_size"] = len(target_table["col_dep"])
+
+        result["target_table"] = target_table
+        result["source_tables"] = source_tables
+        result["where_list"] = state.get("where_list",[])
+        result["join_list"] = state["join_list"]
+
+        return {"lineage": result}
+
+    async def __extract_source(self, state: AgentState):
+        """
+        根据SQL,提取其中的来源库表信息
+        """
+        dt1 = datetime.now()
+
+        # 默认DB
+        default_db = "TMP"
+        if len(self.target_table) > 0:
+            arr = self.target_table.split(".")
+            if len(arr) == 2:  # 使用识别出的目标表db作为默认DB
+                default_db = arr[0]
+
+        template = """
+            你是SQL数据血缘分析专家,仔细分析SQL片段和上下文信息, 从SQL片段中提取出 来源表信息。
+            
+            ### SQL和上下文信息如下
+            - SQL片段:{sql}
+            - 数据库方言: {dialect}
+            - 变量设置信息:{var}
+            - 已有的建表信息: {create_table_info}
+           
+            ### 核心要求:
+            1. 提取 来源表数据库名称(无则使用 {default_db} 填充)
+            2. 提取 来源表名,包括 from子句、子查询、嵌套子查询、union语句、with语句中出现的物理表名(注意:不要带库名前缀)
+            3. 提取 来源表别名(无则留空)
+            4. 提取来源表的所有字段信息,提取来自几种:
+            - 从select中提取,如果带有表别名,则转换成物理表名
+            - 从where过滤条件中提取
+            - 从关联条件(inner join,left join,right join,full join)中提取
+            - 从group中提取
+            - 从函数参数中提取,比如函数 COALESCE(T13.ECIF_CUST_ID,T2.RELAT_PTY_ID,'') AS ECIF_CUST_ID,提取出T13.ECIF_CUST_ID和T2.RELAT_PTY_ID
+            - 从表达式参数中提取
+            5. 判断来源表是否是临时表
+            6. 不要包含目标表(目标表指最终插入的表)
+            
+            ### 输出规范
+            {format_instructions}
+            
+            不要重复用户问题,不要分析中间过程,直接给出答案。
+        """
+        parser = PydanticOutputParser(pydantic_object=SourceTableList)
+        pt = ChatPromptTemplate.from_template(template=template).partial(format_instructions=parser.get_format_instructions())
+        chain = pt | self.llm_coder | parser
+        answer = {}
+        config = {"configurable": {"thread_id": state["session"]}}
+
+        stopped = False
+        async for chunk in chain.astream({
+            "sql": state["question"],
+            "dialect": state["dialect"],
+            "var": self.var_list or [],
+            "default_db": default_db,
+            "create_table_info": self.ct_list
+        }):
+            current_state = self.memory.get(config)
+            if current_state and current_state["channel_values"]["stop"]:
+                stopped = True
+                break
+            # 设置目标表字段数量,来源表数量
+            # 合并相同源表
+            table_list: list[SourceTable] = []
+            for table in chunk.source_tables:
+                if table not in table_list:
+                    table_list.append(table)
+                else:
+                    idx = table_list.index(table)
+                    tt = table_list[idx]
+                    # 合并相同的列
+                    for col in table.col_list:
+                        if col not in tt.col_list:
+                            tt.col_list.append(col)
+
+            answer["source_tables"] = [table.model_dump() for table in table_list]
+
+        dt2 = datetime.now()
+        print("提取源表用时(sec):", (dt2-dt1).seconds)
+        if stopped:
+            return {"status": "canceled", "error": "用户中止", "lineage": {}}
+
+        return answer
+
+
+    async def __extract_target(self, state: AgentState):
+        """
+        根据SQL,提取其中的目标表信息
+        """
+        if state["sql_type"] == "1":
+            return {"target_table": {"database":"", "table_name": "", "col_list": [], "col_size": 0, "col_dep": []}}
+
+        # 默认DB
+        default_db = "TMP"
+        if len(self.target_table) > 0:
+            arr = self.target_table.split(".")
+            if len(arr) == 2: # 使用识别出的目标表db作为默认DB
+                default_db = arr[0]
+
+        template = """
+            你是SQL数据血缘分析专家,仔细分析SQL片段和以下上下文:
+            - SQL片段: {sql}
+            - 来源表信息: {source_tables}
+            - 数据库方言: {dialect}
+            - 变量设置信息:{var}
+            - 已有的建表信息: {create_table_info}
+            
+            ### 核心要求:
+            1. **目标表识别**:
+               - 提取 INSERT INTO 后的表作为目标表
+               - 格式:`数据库名.表名`(未指定库名则使用 {default_db} 填充)     
+            2. **字段依赖分析**:
+               - 目标表字段必须溯源到来源表的物理字段
+               - 字段获取方式分类:
+                 - `直取`:直接引用源字段(如 `src.col`)
+                 - `函数`:通过函数转换(如 `COALESCE(col1, col2)`)
+                 - `表达式`:计算表达式(如 `col1 + col2`)
+                 - `常量`:固定值(如 `'2023'`)
+                 - `子查询`:嵌套SELECT结果
+               - 字段来源格式:`源库.源表.字段名`(不要使用表别名,如果源库未指定则使用 {default_db}替换)
+               - 字段来源信息只能包含 库名、表名和字段信息,如果经过函数或表达式处理,则从参数中提取出具体的字段信息
+            3. **关键约束**:
+               - 每个目标字段必须有详细的来源说明
+               - 函数参数需完整展开(如 `COALESCE(T1.id, T2.id)` 需解析所有参数)
+               - 表名必须是 英文字符 构成(非汉字构成)
+            
+            ### 输出规范
+            {format_instructions}
+            
+            请直接输出JSON格式结果,无需解释过程。
+        """
+        parser = PydanticOutputParser(pydantic_object=TargetTable)
+        pt = ChatPromptTemplate.from_template(template).partial(format_instructions=parser.get_format_instructions())
+        chain = pt | self.llm_coder | parser
+        answer = {}
+        config = {"configurable": {"thread_id": state["session"]}}
+        dt1 = datetime.now()
+        print("开始解析目标表...")
+
+        stopped = False
+        async for chunk in chain.astream({
+            "sql": state["question"],
+            "dialect": state["dialect"],
+            "source_tables": state["source_tables"],
+            "var": self.var_list or [],
+            "default_db": default_db,
+            "default_db": default_db,
+            "create_table_info": self.ct_list
+        }):
+            current_state = self.memory.get(config)
+            if current_state and current_state["channel_values"]["stop"]:
+                stopped = True
+                break
+            answer["target_table"] = chunk.model_dump()
+
+        if stopped:
+            return {"status": "canceled", "error": "用户中止", "lineage": {}}
+
+        dt2 = datetime.now()
+        print("提取目标表用时(sec):", (dt2 - dt1).seconds)
+        print(f"target:{answer}")
+        return answer
+
+
+    async def __extract_where(self, state: AgentState):
+        """
+        根据SQL,提取来源表 where 信息
+        """
+        # 默认DB
+        default_db = "TMP"
+        if len(self.target_table) > 0:
+            arr = self.target_table.split(".")
+            if len(arr) == 2:  # 使用识别出的目标表db作为默认DB
+                default_db = arr[0]
+
+        dt1 = datetime.now()
+        template = """
+            你是SQL数据血缘分析专家,仔细分析 SQL语句和上下文,提取 where 条件信息。
+            ### SQL语句和上下文如下
+            - SQL语句: {sql}
+            - 数据库方言: {dialect}
+            - 变量设置信息: {var}
+            
+            ### 提取要求:
+            1. 提取 所有 来源表 中的 where 条件信息,包含 where 中出现的 表原名(非别名)、 表所在数据库名(如果没有则使用 {default_db} 填充)、字段名、条件操作符和条件值
+            2. 字段名不能包括函数,比如 length(INT_ORG_NO),提取的字段名是 INT_ORG_NO
+            3. 如果SQL语句中存在变量,则根据变量设置信息转换成实际值
+            
+            ### 输出规范
+            {format_instructions}
+            
+            不要重复用户问题,不要分析中间过程,直接给出答案。
+        """
+        parser = PydanticOutputParser(pydantic_object=WhereCondList)
+        pt = ChatPromptTemplate.from_template(template).partial(format_instructions=parser.get_format_instructions())
+
+        chain = pt | self.llm_coder | parser
+        answer = {}
+        config = {"configurable": {"thread_id": state["session"]}}
+        stopped = False
+
+        async for chunk in chain.astream({
+            "sql": state["question"],
+            "dialect": state["dialect"],
+            "var": self.var_list or [],
+            "default_db": default_db
+        }):
+            current_state = self.memory.get(config)
+            if current_state and current_state["channel_values"]["stop"]:
+                stopped = True
+                break
+            # 设置目标表字段数量,来源表数量
+        dt2 = datetime.now()
+        if stopped:
+            return {"status": "canceled", "error": "用户中止", "lineage": {}}
+        return answer
+
+    async def __extract_join(self, state: AgentState):
+        """
+        根据SQL,提取 表关联 信息
+        """
+        # 默认DB
+        default_db = "TMP"
+        if len(self.target_table) > 0:
+            arr = self.target_table.split(".")
+            if len(arr) == 2:  # 使用识别出的目标表db作为默认DB
+                default_db = arr[0]
+
+        dt1 = datetime.now()
+        template = """
+            你是SQL数据血缘分析专家,负责提取完整的JOIN关系。仔细分析SQL片段:
+            - SQL片段: {sql}
+            - 数据库方言: {dialect}
+    
+            ### 关键要求:
+            1. **SQL语句中必须存在关联条件(包括inner、left join、right join、full join)**
+            2. **识别JOIN结构**:
+               - 提取FROM子句中的主表信息(包括 数据库名和物理表名),如果无数据库名,则使用 {default_db} 填充
+               - 提取每个JOIN子句中的从表信息(包括 数据库名和物理表名),如果无数据库名,则使用 {j_default_db} 填充
+               - 明确标注JOIN类型(INNER/LEFT/RIGHT/FULL)
+            3. **关联字段提取**:
+               - 必须提取每对关联字段的完整信息:
+                 * 主表端:表别名.字段名
+                 * 从表端:表别名.字段名
+               - 明确标注关联操作符(=, >, <等)
+               - 多条件JOIN拆分为多个字段对
+            4. **特殊场景处理**:
+                - 子查询作为表时,表名填写子查询别名
+                - 隐式JOIN(WHERE条件)需转换为显式JOIN结构
+                - 多表JOIN时保持原始顺序
+                - 如果关联字段是 常量,则关联字段取空
+                - 如果关联字段经函数处,则提取参数中的 字段,如果参数是常量,则关联字段取空
+                
+            ### 输出规范
+            {format_instructions}
+
+            不要重复用户问题,不要中间思考过程,直接回答。
+        """
+        parser = PydanticOutputParser(pydantic_object=JoinRelationList)
+        pt = ChatPromptTemplate.from_template(template).partial(format_instructions=parser.get_format_instructions())
+
+        chain = pt | self.llm_coder | parser
+        answer = {}
+        config = {"configurable": {"thread_id": state["session"]}}
+
+        stopped = False
+
+        async for chunk in chain.astream({
+            "sql": state["question"],
+            "dialect": state["dialect"],
+            "default_db": default_db,
+            "j_default_db": default_db
+        }):
+            current_state = self.memory.get(config)
+            if current_state and current_state["channel_values"]["stop"]:
+                stopped = True
+                break
+            # 设置目标表字段数量,来源表数量
+            answer["join_list"] = [join.model_dump() for join in chunk.join_list]
+
+        dt2 = datetime.now()
+        if stopped:
+            return {"status": "canceled", "error": "用户中止", "lineage": {}}
+
+        return answer
+
+    def trigger_stop(self, session: str):
+        """外部触发中止"""
+        config = {"configurable": {"thread_id": session}}
+        current_state = self.memory.get(config)
+        if current_state:
+            current_state["channel_values"]["stop"] = True
+
+
+async def main(sql_content: str, sql_file: str, dialect: str, owner: str):
+    agent = SqlLineageAgent()
+    result = await agent.ask_question(sql_content=sql_content, session="s-1", dialect=dialect, sql_file=sql_file, owner=owner)
+    return result
+
+
+def read_var_file(var_file: str):
+    """
+    读取变量文件
+    :param var_file: 变量文件(格式excel)
+    """
+    import pandas as pd
+
+    # 读取变量excel文件
+    result = []
+    df = pd.read_excel(var_file)
+    for row in df.itertuples():
+        print(f"变量 {row.Index + 1}: {row.变量}, {row.含义}")
+        item = {}
+        arr = row.变量.split("=")
+        item["var_name"] = arr[0]
+        item["var_value"] = arr[1]
+        item["var_comment"] = row.含义
+        result.append(item)
+    return result
+
+def replace_vars(sql_file: str, var_list: list[dict]):
+    """
+    替换 sql_file 中的变量
+    :param sql_file: SQL文件
+    :param var_list: 变量集合 [{'var_name':xx,'var_value':xxx,'var_comment':xx}]
+    """
+    new_content = ""
+    encodings = ['utf-8', 'latin-1', 'iso-8859-1', 'cp1252', 'gbk', 'gb2312']
+    for encoding in encodings:
+        try:
+            with open(sql_file, 'r', encoding=encoding) as f:
+                # 读取文件内容
+                content = f.read()
+                # 替换变量
+                for var in var_list:
+                    name = var["var_name"]
+                    value = var["var_value"]
+                    new_content = content.replace(name, value)
+                    content = new_content
+            break
+        except UnicodeDecodeError:
+            print(f"文件{os.path.basename(f.name)} 编码 {encoding} 不支持")
+            continue
+    else:
+        print("无法找到合适的编码")
+        raise Exception(f"无法读取文件 {sql_file}, 未找到合适的编码.")
+
+    return new_content
+
+
+def get_ddl_list(ddl_dir: str, var_list: list[dict]):
+    """
+    获取 DDL目录中定义的所有建表DDL,并替换其中的变量
+    :param ddl_dir: DDL目录,目录结构 ddl/db/xx.ddl
+    :param var_list: 变量集合 [{'var_name':xx,'var_value':xxx,'var_comment':xx}]
+    """
+    file_list = []
+    # 遍历目录,获取所有.ddl文件
+    for root, dirs, files in os.walk(ddl_dir):
+        for file in files:
+            if file.endswith(".ddl"):
+                file_path = os.path.join(root, file)
+                file_list.append(file_path)
+
+    print(f"DDL文件数量:{len(file_list)}")
+    # 读取ddl文件,替换其中的变量
+    result = []
+    for file in file_list:
+        fp = Path(file)
+        # 文件的名称规则:db__table,解析出数据库和表名
+        name = fp.name.replace(".ddl", "")
+        arr = name.split("__")
+        db = arr[0]
+        table = arr[1]
+        # key
+        key = f"{db.upper()}.{table.upper()}"
+        ddl_content = ""
+        encodings = ['utf-8', 'latin-1', 'iso-8859-1', 'cp1252', 'gbk', 'gb2312']
+        for encoding in encodings:
+            try:
+                with open(file, 'r', encoding=encoding) as f:
+                    # 读取文件内容
+                    content = f.read()
+                    # 替换变量
+                    for var in var_list:
+                        name = var["var_name"]
+                        value = var["var_value"]
+                        new_content = content.replace(name, value)
+                        content = new_content
+                    result.append({key: content})
+                break
+            except UnicodeDecodeError:
+                print(f"文件{os.path.basename(f.name)} 编码 {encoding} 不支持")
+                continue
+        else:
+            print("无法找到合适的编码")
+            raise Exception(f"无法读取文件 {file}, 未找到合适的编码.")
+
+    print(f"result:{result[0]}")
+    return result
+
+
+if __name__ == "__main__":
+    import sys
+    import os
+    from pathlib import Path
+    import json
+
+    # 参数检查
+    if len(sys.argv) <= 4:
+        print(f"usage python sql_lineage_agent_xmgj.py var_file sql_file dialect owner")
+        exit(1)
+    # 变量文件
+    var_file = sys.argv[1]
+    # sql脚本文件或目录
+    sql_file = sys.argv[2]
+    # 数据库方言
+    dialect = sys.argv[3]
+    # 属主
+    owner = sys.argv[4]
+
+    print(f"1、解析SQL文件/目录:{sql_file}")
+    print(f"2、SQL文件属主平台:{owner}")
+    print(f"3、变量文件:{var_file}")
+    print(f"4、SQL数据库方言: {dialect}")
+
+    # 检查变量文件是否存在
+    var_fp = Path(var_file)
+    if not var_fp.exists():
+        raise FileNotFoundError(f"变量文件 {var_file} 不存在,请指定正确路径.")
+
+    # 检查SQL脚本文件是否存在
+    sql_fp = Path(sql_file)
+    if not sql_fp.exists():
+        raise FileNotFoundError(f"SQL脚本文件 {sql_file} 不存在,请指定正确路径.")
+
+    # 读取变量文件,获取变量值
+    var_list = read_var_file(var_file)
+
+    # 读取SQL文件, 替换变量
+    sql_content = replace_vars(sql_file=sql_file, var_list=var_list)
+
+    # 解析SQL血缘
+    result = asyncio.run(main(sql_content=sql_content, sql_file=sql_fp.name, dialect=dialect, owner=owner))
+    # 将结果写入同级目录下,文件后缀为.json
+    target_file = Path(sql_fp.parent.absolute() / (sql_fp.name + ".json"))
+    print(f"写目标文件:{target_file}")
+    # 写文件
+    with open(target_file, 'w', encoding="utf-8") as t:
+        t.write(json.dumps(result, ensure_ascii=False, indent=2))
+

+ 56 - 0
llmops/agents/datadev/llm.py

@@ -0,0 +1,56 @@
+
+from langchain_openai import ChatOpenAI
+from config import llm_config
+
+# 数据开发全局大模型
+global_llm: ChatOpenAI = None
+
+
+# 初使化模型
+def init_llm() -> ChatOpenAI:
+    # 读取配置文件,获取大模型的配置信息
+    c = llm_config
+
+    global global_llm
+    if global_llm is None:
+        """
+        根据配置,创建大模型实例
+        """
+        global_llm = ChatOpenAI(
+            model = c["model"],
+            base_url = c["base_url"],
+            api_key = c["api_key"],
+            temperature= c["temperature"],
+            streaming=True
+        )
+    return global_llm
+
+# 获取大模型
+def get_llm() -> ChatOpenAI:
+    if global_llm is None:
+        init_llm()
+        # raise ValueError("请先调用 initllm(config)方法初始化llm模型")
+    return global_llm
+
+
+# 获取大模型
+def get_llm_coder() -> ChatOpenAI:
+    c = llm_config
+    return ChatOpenAI(
+        model=c["coder_model"],
+        base_url=c["base_url"],
+        api_key=c["api_key"],
+        temperature=c["temperature"],
+        streaming=True
+    )
+
+if __name__ == '__main__':
+    llm = get_llm()
+    print(f"llm:", llm)
+    # llm = get_llm_coder()
+    # response = llm.invoke("你好")
+    # print(response)
+
+    question = "你好"
+    response = llm.invoke(question)
+    print(response)

+ 23 - 0
llmops/agents/datadev/memory/LimitedConversationBufferMemory.py

@@ -0,0 +1,23 @@
+
+
+from langchain.memory import ConversationBufferMemory
+from typing import Dict, Any
+from pydantic import Field
+
+class LimitedConversationBufferMemory(ConversationBufferMemory):
+    """
+    可以设置大小的对话Memeory缓存
+    """
+
+    max_size: int = Field(default=10, description="缓存大小")
+
+    def __init__(self, max_size: int = 500, **kwargs: Any):
+        super().__init__(**kwargs)
+        self.max_size = max_size
+
+    def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
+        # 先调用父类的保存方法
+        super().save_context(inputs, outputs)
+        # 当缓冲区超过最大大小时,移除最早的消息
+        while len(self.chat_memory.messages) > self.max_size:
+            self.chat_memory.messages.pop(0)

+ 0 - 0
llmops/agents/datadev/memory/__init__.py


+ 139 - 0
llmops/agents/datadev/memory/memory_saver_with_expiry.py

@@ -0,0 +1,139 @@
+
+from langchain_core.runnables import RunnableConfig
+from langgraph.checkpoint.base import Checkpoint, CheckpointMetadata, ChannelVersions, BaseCheckpointSaver, \
+    CheckpointTuple
+from datetime import datetime, timedelta
+from typing import Optional, Dict, Sequence, Tuple, Any
+from collections.abc import Iterator
+import threading
+
+class MemorySaverWithExpiry(BaseCheckpointSaver):
+    """
+    支持自动过期的内存检查点
+    """
+
+    def __init__(self, expire_time: int = 3600, clean_interval: int = 300):
+        """
+        自动清理过期 检查点
+        """
+        super().__init__()
+        self._checkpoints: Dict[str, Checkpoint] = {}
+        self._thread_ts: Dict[str, datetime] = {}
+        self.expire_time = timedelta(seconds=expire_time)
+        self.clean_interval = clean_interval
+        self._lock = threading.RLock()
+
+        # 后台清除线程
+        self.clean_thread = threading.Thread(target=self._background_clean, daemon=True)
+        self.clean_thread.start()
+
+    @staticmethod
+    def get_thread_id(config: RunnableConfig) -> str:
+        """安全获取线程ID"""
+        try:
+            return str(config["configurable"]["thread_id"])
+        except KeyError:
+            raise ValueError("RunnableConfig must contain 'configurable.thread_id'")
+
+    def _background_clean(self):
+        """
+        后台清除线程
+        """
+        while True:
+            # 等待
+            threading.Event().wait(timeout=self.clean_interval)
+            with self._lock:
+                # 找出过期的 thread_id
+                expired = [
+                    thread_id
+                    for thread_id, ts in self._thread_ts.items()
+                        if (datetime.now() - ts) > self.expire_time
+                ]
+                for thread_id in expired:
+                    self._delete(thread_id)
+
+    def _is_expired(self, thread_id: str):
+        """
+        判断 thread_id 是否过期
+        """
+        ts = self._thread_ts.get(thread_id)
+        return ts is None or (datetime.now() - ts) > self.expire_time
+
+    def _delete(self, thread_id: str):
+        """
+        删除检查点
+        """
+        if thread_id in self._thread_ts:
+            del self._thread_ts[thread_id]
+        if thread_id in self._checkpoints:
+            del self._checkpoints[thread_id]
+
+    def put(
+        self,
+        config: RunnableConfig,
+        checkpoint: Checkpoint,
+        metadata: CheckpointMetadata,
+        new_versions: ChannelVersions,
+    ) -> RunnableConfig:
+        """
+        保存 检查点
+        """
+        thread_id = self.get_thread_id(config)
+        with self._lock:
+            self._thread_ts[thread_id] = datetime.now()
+            self._checkpoints[thread_id] = checkpoint
+
+        return config
+
+    def get(self, config: RunnableConfig) -> Optional[Checkpoint]:
+        """
+        获取 检查点
+        """
+        thread_id = self.get_thread_id(config=config)
+        with self._lock:
+            if self._is_expired(thread_id):
+                print("clear thread_id:", thread_id)
+                self._delete(thread_id)
+                return None
+        return self.checkpoints.get(thread_id)
+
+    def list(
+        self,
+        config: Optional[RunnableConfig],
+        *,
+        filter: Optional[Dict[str, any]] = None,
+        before: Optional[RunnableConfig] = None,
+        limit: Optional[int] = None,
+    ) -> Iterator[CheckpointTuple]:
+        return super().list(config=config)
+
+    def get_tuple(self, config: RunnableConfig) -> Optional[CheckpointTuple]:
+        """获取检查点元组"""
+        checkpoint = self.get(config)
+        if checkpoint:
+            return CheckpointTuple(
+                config=config,
+                checkpoint=checkpoint,
+                metadata=CheckpointMetadata(
+                    source="memory",
+                    timestamp=self._timestamps[self._get_thread_id(config)],
+                )
+            )
+        return None
+
+    def put_writes(
+        self,
+        config: RunnableConfig,
+        writes: Sequence[Tuple[str, Any]],
+        task_id: str,
+        task_path: str = "",
+    ) -> None:
+    # def put_writes(
+    #         self,
+    #         config: RunnableConfig,
+    #         checkpoint: Checkpoint,
+    #         metadata: CheckpointMetadata,
+    #         new_versions: ChannelVersions,
+    # ) -> RunnableConfig:
+    #     """必须实现的写入方法"""
+    #     return self.put(config, checkpoint, metadata, new_versions)

+ 137 - 0
llmops/agents/datadev/memory/memory_saver_with_expiry2.py

@@ -0,0 +1,137 @@
+
+from langchain_core.runnables import RunnableConfig
+from langgraph.checkpoint.base import Checkpoint, CheckpointMetadata, ChannelVersions, BaseCheckpointSaver, \
+    CheckpointTuple
+from datetime import datetime, timedelta
+from typing import Optional, Dict, Sequence, Tuple, Any
+from collections.abc import Iterator
+import threading
+
+from langgraph.checkpoint.memory import MemorySaver
+
+
+class MemorySaverWithExpiry(MemorySaver):
+    """
+    支持自动过期的内存检查点
+    """
+
+    def __init__(self, expire_time: int = 3600, clean_interval: int = 300):
+        """
+        自动清理过期 检查点
+        """
+        super().__init__()
+        self._checkpoints: Dict[str, Checkpoint] = {}
+        self._thread_ts: Dict[str, datetime] = {}
+        self._metadata: Dict[str, CheckpointMetadata] = {}
+        self.expire_time = timedelta(seconds=expire_time)
+        self.clean_interval = clean_interval
+        self._lock = threading.RLock()
+
+        # 后台清除线程
+        self.clean_thread = threading.Thread(target=self._background_clean, daemon=True)
+        self.clean_thread.start()
+
+    @staticmethod
+    def get_thread_id(config: RunnableConfig) -> str:
+        """安全获取线程ID"""
+        try:
+            return str(config["configurable"]["thread_id"])
+        except KeyError:
+            raise ValueError("RunnableConfig must contain 'configurable.thread_id'")
+
+    def _background_clean(self):
+        """
+        后台清除线程
+        """
+        while True:
+            # 等待
+            threading.Event().wait(timeout=self.clean_interval)
+            with self._lock:
+                # 找出过期的 thread_id
+                expired = [
+                    thread_id
+                    for thread_id, ts in self._thread_ts.items()
+                        if (datetime.now() - ts) > self.expire_time
+                ]
+                for thread_id in expired:
+                    self._delete(thread_id)
+
+    def _is_expired(self, thread_id: str):
+        """
+        判断 thread_id 是否过期
+        """
+        ts = self._thread_ts.get(thread_id)
+        return ts is None or (datetime.now() - ts) > self.expire_time
+
+    def _delete(self, thread_id: str):
+        """
+        删除检查点
+        """
+        if thread_id in self._thread_ts:
+            del self._thread_ts[thread_id]
+        if thread_id in self._checkpoints:
+            del self._checkpoints[thread_id]
+        if thread_id in self._metadata:
+            del self._metadata[thread_id]
+
+    def put(
+        self,
+        config: RunnableConfig,
+        checkpoint: Checkpoint,
+        metadata: CheckpointMetadata,
+        new_versions: ChannelVersions,
+    ) -> RunnableConfig:
+        """
+        保存 检查点
+        """
+        thread_id = self.get_thread_id(config)
+        with self._lock:
+            self._thread_ts[thread_id] = datetime.now()
+            self._checkpoints[thread_id] = checkpoint
+            self._metadata[thread_id] = metadata
+
+        return config
+
+    def get(self, config: RunnableConfig) -> Optional[Checkpoint]:
+        """
+        获取 检查点
+        """
+        thread_id = self.get_thread_id(config=config)
+        with self._lock:
+            if self._is_expired(thread_id):
+                self._delete(thread_id)
+                return None
+        return self._checkpoints.get(thread_id)
+
+    def list(
+        self,
+        config: Optional[RunnableConfig],
+        *,
+        filter: Optional[Dict[str, any]] = None,
+        before: Optional[RunnableConfig] = None,
+        limit: Optional[int] = None,
+    ) -> Iterator[CheckpointTuple]:
+        return super().list(config=config)
+
+    def get_tuple(self, config: RunnableConfig) -> Optional[CheckpointTuple]:
+        """获取检查点元组"""
+        thread_id = self.get_thread_id(config)
+        with self._lock:
+            checkpoint = self._checkpoints.get(thread_id)
+            if not checkpoint:
+                return None
+
+        # 从单独存储的元数据字典中获取 metadata
+        metadata = self._metadata.get(thread_id, CheckpointMetadata())  # 添加此行
+        ts = self._thread_ts.get(thread_id)
+        if not ts:
+            return None
+        return CheckpointTuple(
+            config=config,
+            checkpoint=checkpoint,
+            metadata=CheckpointMetadata(
+                source="memory",
+                timestamp=ts,
+                step=metadata.get("step", 0)  # 添加 step 字段
+            )
+        )

+ 0 - 0
llmops/agents/datadev/tools/__init__.py


+ 22 - 0
llmops/agents/datadev/tools/del_substr_tool.py

@@ -0,0 +1,22 @@
+
+
+
+from langchain.agents.tools import BaseTool
+from pydantic import BaseModel, Field
+from typing import Type
+
+class DelSubstrToolInput(BaseModel):
+    text: str = Field(description="原字符串")
+    content: str = Field(description="去除的内容")
+
+
+class DelSubstrTool(BaseTool):
+    name: str = "remove_str"
+
+    description: str = (
+        "去除字符串的指定的内容"
+    )
+    args_schema: Type[BaseModel] = DelSubstrToolInput
+
+    def _run(self, text: str, content: str):
+        return text.replace(r"{content}","")

+ 21 - 0
llmops/agents/datadev/tools/gen_query_sql_tool.py

@@ -0,0 +1,21 @@
+
+
+
+from langchain.agents.tools import BaseTool
+from pydantic import BaseModel, Field
+from typing import Type
+
+class GenQuerySqlToolInput(BaseModel):
+    query: str = Field(description="查询的文字描述")
+    table_schema: str = Field(description="表schma信息说明")
+    dialect: str = Field(default="hive", description="数据库方言")
+
+
+class GenQuerySqlTool(BaseTool):
+    name = "gen_query_sql"
+    description = (
+        "根据用户描述和表schema信息,生成查询语句"
+    )
+    args_schema: Type[BaseModel] = GenQuerySqlToolInput
+
+    def _run(self, query: str, table_schema: str, dialect: str):

+ 56 - 0
llmops/agents/datadev/tools/get_database_schema_tool.py

@@ -0,0 +1,56 @@
+
+
+
+from pydantic import BaseModel, Field
+from langchain.agents.tools import BaseTool
+from typing import Type
+
+from llmops.agents.datadev.rag.schema_handler import SchemaHandler
+
+class GetDatabaseSchemaToolInput(BaseModel):
+    """
+    GetTableSchemaTool 输入参数结构
+    """
+    query: str = Field(description="检索文本")
+
+class GetDatabaseSchemaTool(BaseTool):
+
+    # 工具名称
+    name: str = "get_database_schema"
+
+    # 工具描述
+    description: str = (
+        "根据用户问题,从知识库中检索数据库schema定义信息",
+        "输入是查找的文本字符串",
+        "返回的形式是json数组"
+    )
+
+    # 工具参数结构
+    args_schema: Type[BaseModel] = GetDatabaseSchemaToolInput
+
+    # schema查询器
+    schema_handler: Type[SchemaHandler] = SchemaHandler()
+
+    def _run(self, query: str):
+        result = ""
+        try:
+            result = self.query(query)
+            print("schema=====:", result)
+        except Exception as e:
+            print(f"调用工具出现异常{str(e)}")
+            result = ""
+
+        return result
+
+    def _arun(self, query_list: list[str]):
+        return self._run(query_list)
+
+    def query(self, query: str):
+        result = self.schema_handler.query_mulsimilar(query_list=query.split(","), top_k=10, similarity_threshold=0.45)
+        return result
+
+
+if __name__ == '__main__':
+    tool = GetTableSchemaTool()
+    result = tool.run("查询机构编号是100的机构信息")
+    print(result)

+ 163 - 0
llmops/agents/datadev/tools/logger_decorator.py

@@ -0,0 +1,163 @@
+
+from functools import wraps
+import time
+from time import perf_counter
+import logging
+from logging.handlers import RotatingFileHandler
+from typing import Callable, Any
+
+
+# 配置日志记录器
+def configure_logger(
+    logger_name: str = "app",
+    log_level: int = logging.INFO,
+    log_file: str = "app.log",
+    max_bytes: int = 10 * 1024 * 1024,  # 10MB
+    backup_count: int = 5,
+    console_output: bool = False
+) -> logging.Logger:
+    """
+    配置应用程序日志记录器
+
+    参数:
+        logger_name: 日志记录器名称
+        log_level: 日志记录级别
+        log_file: 日志文件路径
+        max_bytes: 单个日志文件最大字节数
+        backup_count: 保留的备份文件数量
+        console_output: 是否同时输出到控制台
+    """
+    logger = logging.getLogger(logger_name)
+    logger.setLevel(log_level)
+
+    # 避免重复添加handler
+    if logger.handlers:
+        return logger
+
+    # 文件处理器配置
+    file_handler = RotatingFileHandler(
+        filename = log_file,
+        maxBytes = max_bytes,
+        backupCount = backup_count,
+        encoding = "utf-8",
+        delay=True  # 延迟打开文件直到实际写入
+    )
+    file_formatter = logging.Formatter(
+        "[%(asctime)s] %(levelname)-8s %(module)s:%(lineno)d - %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S"
+    )
+    file_handler.setFormatter(file_formatter)
+    logger.addHandler(file_handler)
+
+    # 控制台处理器(可选)
+    if console_output:
+        console_handler = logging.StreamHandler()
+        console_handler.setLevel(logging.WARNING)  # 控制台只显示警告及以上
+        console_formatter = logging.Formatter(
+            "%(levelname)-8s %(message)s"
+        )
+        console_handler.setFormatter(console_formatter)
+        logger.addHandler(console_handler)
+
+    return logger
+
+def log(
+    logger_name: str = "agent",
+    log_file: str = "app.log",
+    level: int = logging.INFO,
+    log_args: bool = True,
+    log_return: bool = True,
+    max_arg_length: int = 500,
+    max_result_length: int = 500,
+    log_exceptions: bool = True,
+    enable: bool = True
+) -> Callable:
+    """
+    增强型日志装饰器
+
+    参数:
+        logger_name: 使用的日志记录器名称
+        level: 日志记录级别
+        log_args: 是否记录函数参数
+        log_return: 是否记录返回值
+        max_arg_length: 参数最大记录长度
+        max_result_length: 结果最大记录长度
+        log_exceptions: 是否记录异常信息
+        enable: 是否启用日志记录
+    """
+    def decorator(func: Callable) -> Callable:
+        @wraps(func)
+        def wrapper(*args, **kwargs) -> Any:
+            # 快速返回路径:禁用日志时
+            if not enable:
+                return func(*args, **kwargs)
+
+            logger = logging.getLogger(logger_name)
+            # 初始化日志记录器(如果尚未配置)
+            if not logger.handlers:
+                configure_logger(logger_name, level, log_file)
+
+            # 构建日志元数据
+            func_name = func.__qualname__
+            module_name = func.__module__
+            if module_name != "__main__":
+                func_name = f"{module_name}.{func_name}"
+
+            # 参数处理
+            args_repr = []
+            if log_args:
+                try:
+                    # 参数安全处理
+                    args_repr = [
+                        repr(arg)[:max_arg_length] + ("..." if len(repr(arg)) > max_arg_length else "")
+                        for arg in args
+                    ]
+                    kwargs_repr = [
+                        f"{k}={repr(v)[:max_arg_length]}" + ("..." if len(repr(v)) > max_arg_length else "")
+                        for k, v in kwargs.items()
+                    ]
+                    all_args = ", ".join(args_repr + kwargs_repr)
+                except Exception as e:
+                    all_args = f"[参数序列化失败: {str(e)}]"
+                    logger.warning("参数记录失败", exc_info=True)
+            else:
+                all_args = "..."
+
+            # 记录开始信息
+            start_time = perf_counter()
+            logger.log(level, f"调用函数 {func_name}({all_args})")
+
+            try:
+                result = func(*args, **kwargs)
+                duration = perf_counter() - start_time
+
+                # 返回值处理
+                return_repr = ""
+                if log_return:
+                    try:
+                        result_repr = repr(result)
+                        if len(result_repr) > max_result_length:
+                            return_repr = result_repr[:max_result_length] + "..."
+                        else:
+                            return_repr = result_repr
+                    except Exception as e:
+                        return_repr = f"[结果序列化失败: {str(e)}]"
+                        logger.warning("结果记录失败", exc_info=True)
+
+                logger.log(
+                    level,
+                    f"成功执行函数 {func_name} [耗时: {duration:.4f}s] → {return_repr}"
+                )
+                return result
+
+            except Exception as e:
+                duration = perf_counter() - start_time
+                if log_exceptions:
+                    logger.error(
+                        f"函数执行失败 {func_name} [耗时: {duration:.4f}s] | "
+                        f"异常类型: {type(e).__name__}, 异常信息: {str(e)}",
+                        exc_info=logger.isEnabledFor(logging.DEBUG)
+                    )
+                raise
+        return wrapper
+    return decorator

+ 14 - 0
llmops/agents/datadev/tools/timeit.py

@@ -0,0 +1,14 @@
+
+from functools import wraps
+import time
+
+# 函数运行计数装饰器
+def timeit(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        start = time.perf_counter()
+        result = func(*args, **kwargs)
+        elapsed = time.perf_counter() - start
+        print(f"函数 {func.__name__} 耗时: {elapsed:.4f}秒")
+        return result
+    return wrapper

+ 1 - 0
venv/.Python

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/Python

+ 247 - 0
venv/bin/Activate.ps1

@@ -0,0 +1,247 @@
+<#
+.Synopsis
+Activate a Python virtual environment for the current PowerShell session.
+
+.Description
+Pushes the python executable for a virtual environment to the front of the
+$Env:PATH environment variable and sets the prompt to signify that you are
+in a Python virtual environment. Makes use of the command line switches as
+well as the `pyvenv.cfg` file values present in the virtual environment.
+
+.Parameter VenvDir
+Path to the directory that contains the virtual environment to activate. The
+default value for this is the parent of the directory that the Activate.ps1
+script is located within.
+
+.Parameter Prompt
+The prompt prefix to display when this virtual environment is activated. By
+default, this prompt is the name of the virtual environment folder (VenvDir)
+surrounded by parentheses and followed by a single space (ie. '(.venv) ').
+
+.Example
+Activate.ps1
+Activates the Python virtual environment that contains the Activate.ps1 script.
+
+.Example
+Activate.ps1 -Verbose
+Activates the Python virtual environment that contains the Activate.ps1 script,
+and shows extra information about the activation as it executes.
+
+.Example
+Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
+Activates the Python virtual environment located in the specified location.
+
+.Example
+Activate.ps1 -Prompt "MyPython"
+Activates the Python virtual environment that contains the Activate.ps1 script,
+and prefixes the current prompt with the specified string (surrounded in
+parentheses) while the virtual environment is active.
+
+.Notes
+On Windows, it may be required to enable this Activate.ps1 script by setting the
+execution policy for the user. You can do this by issuing the following PowerShell
+command:
+
+PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
+
+For more information on Execution Policies: 
+https://go.microsoft.com/fwlink/?LinkID=135170
+
+#>
+Param(
+    [Parameter(Mandatory = $false)]
+    [String]
+    $VenvDir,
+    [Parameter(Mandatory = $false)]
+    [String]
+    $Prompt
+)
+
+<# Function declarations --------------------------------------------------- #>
+
+<#
+.Synopsis
+Remove all shell session elements added by the Activate script, including the
+addition of the virtual environment's Python executable from the beginning of
+the PATH variable.
+
+.Parameter NonDestructive
+If present, do not remove this function from the global namespace for the
+session.
+
+#>
+function global:deactivate ([switch]$NonDestructive) {
+    # Revert to original values
+
+    # The prior prompt:
+    if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
+        Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
+        Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
+    }
+
+    # The prior PYTHONHOME:
+    if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
+        Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
+        Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
+    }
+
+    # The prior PATH:
+    if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
+        Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
+        Remove-Item -Path Env:_OLD_VIRTUAL_PATH
+    }
+
+    # Just remove the VIRTUAL_ENV altogether:
+    if (Test-Path -Path Env:VIRTUAL_ENV) {
+        Remove-Item -Path env:VIRTUAL_ENV
+    }
+
+    # Just remove VIRTUAL_ENV_PROMPT altogether.
+    if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
+        Remove-Item -Path env:VIRTUAL_ENV_PROMPT
+    }
+
+    # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
+    if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
+        Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
+    }
+
+    # Leave deactivate function in the global namespace if requested:
+    if (-not $NonDestructive) {
+        Remove-Item -Path function:deactivate
+    }
+}
+
+<#
+.Description
+Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
+given folder, and returns them in a map.
+
+For each line in the pyvenv.cfg file, if that line can be parsed into exactly
+two strings separated by `=` (with any amount of whitespace surrounding the =)
+then it is considered a `key = value` line. The left hand string is the key,
+the right hand is the value.
+
+If the value starts with a `'` or a `"` then the first and last character is
+stripped from the value before being captured.
+
+.Parameter ConfigDir
+Path to the directory that contains the `pyvenv.cfg` file.
+#>
+function Get-PyVenvConfig(
+    [String]
+    $ConfigDir
+) {
+    Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
+
+    # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
+    $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
+
+    # An empty map will be returned if no config file is found.
+    $pyvenvConfig = @{ }
+
+    if ($pyvenvConfigPath) {
+
+        Write-Verbose "File exists, parse `key = value` lines"
+        $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
+
+        $pyvenvConfigContent | ForEach-Object {
+            $keyval = $PSItem -split "\s*=\s*", 2
+            if ($keyval[0] -and $keyval[1]) {
+                $val = $keyval[1]
+
+                # Remove extraneous quotations around a string value.
+                if ("'""".Contains($val.Substring(0, 1))) {
+                    $val = $val.Substring(1, $val.Length - 2)
+                }
+
+                $pyvenvConfig[$keyval[0]] = $val
+                Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
+            }
+        }
+    }
+    return $pyvenvConfig
+}
+
+
+<# Begin Activate script --------------------------------------------------- #>
+
+# Determine the containing directory of this script
+$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
+$VenvExecDir = Get-Item -Path $VenvExecPath
+
+Write-Verbose "Activation script is located in path: '$VenvExecPath'"
+Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
+Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
+
+# Set values required in priority: CmdLine, ConfigFile, Default
+# First, get the location of the virtual environment, it might not be
+# VenvExecDir if specified on the command line.
+if ($VenvDir) {
+    Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
+}
+else {
+    Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
+    $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
+    Write-Verbose "VenvDir=$VenvDir"
+}
+
+# Next, read the `pyvenv.cfg` file to determine any required value such
+# as `prompt`.
+$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
+
+# Next, set the prompt from the command line, or the config file, or
+# just use the name of the virtual environment folder.
+if ($Prompt) {
+    Write-Verbose "Prompt specified as argument, using '$Prompt'"
+}
+else {
+    Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
+    if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
+        Write-Verbose "  Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
+        $Prompt = $pyvenvCfg['prompt'];
+    }
+    else {
+        Write-Verbose "  Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
+        Write-Verbose "  Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
+        $Prompt = Split-Path -Path $venvDir -Leaf
+    }
+}
+
+Write-Verbose "Prompt = '$Prompt'"
+Write-Verbose "VenvDir='$VenvDir'"
+
+# Deactivate any currently active virtual environment, but leave the
+# deactivate function in place.
+deactivate -nondestructive
+
+# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
+# that there is an activated venv.
+$env:VIRTUAL_ENV = $VenvDir
+
+if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
+
+    Write-Verbose "Setting prompt to '$Prompt'"
+
+    # Set the prompt to include the env name
+    # Make sure _OLD_VIRTUAL_PROMPT is global
+    function global:_OLD_VIRTUAL_PROMPT { "" }
+    Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
+    New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
+
+    function global:prompt {
+        Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
+        _OLD_VIRTUAL_PROMPT
+    }
+    $env:VIRTUAL_ENV_PROMPT = $Prompt
+}
+
+# Clear PYTHONHOME
+if (Test-Path -Path Env:PYTHONHOME) {
+    Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
+    Remove-Item -Path Env:PYTHONHOME
+}
+
+# Add the venv to the PATH
+Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
+$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"

+ 69 - 0
venv/bin/activate

@@ -0,0 +1,69 @@
+# This file must be used with "source bin/activate" *from bash*
+# you cannot run it directly
+
+deactivate () {
+    # reset old environment variables
+    if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
+        PATH="${_OLD_VIRTUAL_PATH:-}"
+        export PATH
+        unset _OLD_VIRTUAL_PATH
+    fi
+    if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
+        PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
+        export PYTHONHOME
+        unset _OLD_VIRTUAL_PYTHONHOME
+    fi
+
+    # This should detect bash and zsh, which have a hash command that must
+    # be called to get it to forget past commands.  Without forgetting
+    # past commands the $PATH changes we made may not be respected
+    if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
+        hash -r 2> /dev/null
+    fi
+
+    if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
+        PS1="${_OLD_VIRTUAL_PS1:-}"
+        export PS1
+        unset _OLD_VIRTUAL_PS1
+    fi
+
+    unset VIRTUAL_ENV
+    unset VIRTUAL_ENV_PROMPT
+    if [ ! "${1:-}" = "nondestructive" ] ; then
+    # Self destruct!
+        unset -f deactivate
+    fi
+}
+
+# unset irrelevant variables
+deactivate nondestructive
+
+VIRTUAL_ENV="/Users/jiaqiang/PycharmProjects/tx_flow_analysis/venv"
+export VIRTUAL_ENV
+
+_OLD_VIRTUAL_PATH="$PATH"
+PATH="$VIRTUAL_ENV/bin:$PATH"
+export PATH
+
+# unset PYTHONHOME if set
+# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
+# could use `if (set -u; : $PYTHONHOME) ;` in bash
+if [ -n "${PYTHONHOME:-}" ] ; then
+    _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
+    unset PYTHONHOME
+fi
+
+if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
+    _OLD_VIRTUAL_PS1="${PS1:-}"
+    PS1="(venv) ${PS1:-}"
+    export PS1
+    VIRTUAL_ENV_PROMPT="(venv) "
+    export VIRTUAL_ENV_PROMPT
+fi
+
+# This should detect bash and zsh, which have a hash command that must
+# be called to get it to forget past commands.  Without forgetting
+# past commands the $PATH changes we made may not be respected
+if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
+    hash -r 2> /dev/null
+fi

+ 26 - 0
venv/bin/activate.csh

@@ -0,0 +1,26 @@
+# This file must be used with "source bin/activate.csh" *from csh*.
+# You cannot run it directly.
+# Created by Davide Di Blasi <davidedb@gmail.com>.
+# Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
+
+alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
+
+# Unset irrelevant variables.
+deactivate nondestructive
+
+setenv VIRTUAL_ENV "/Users/jiaqiang/PycharmProjects/tx_flow_analysis/venv"
+
+set _OLD_VIRTUAL_PATH="$PATH"
+setenv PATH "$VIRTUAL_ENV/bin:$PATH"
+
+
+set _OLD_VIRTUAL_PROMPT="$prompt"
+
+if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
+    set prompt = "(venv) $prompt"
+    setenv VIRTUAL_ENV_PROMPT "(venv) "
+endif
+
+alias pydoc python -m pydoc
+
+rehash

+ 66 - 0
venv/bin/activate.fish

@@ -0,0 +1,66 @@
+# This file must be used with "source <venv>/bin/activate.fish" *from fish*
+# (https://fishshell.com/); you cannot run it directly.
+
+function deactivate  -d "Exit virtual environment and return to normal shell environment"
+    # reset old environment variables
+    if test -n "$_OLD_VIRTUAL_PATH"
+        set -gx PATH $_OLD_VIRTUAL_PATH
+        set -e _OLD_VIRTUAL_PATH
+    end
+    if test -n "$_OLD_VIRTUAL_PYTHONHOME"
+        set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
+        set -e _OLD_VIRTUAL_PYTHONHOME
+    end
+
+    if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
+        functions -e fish_prompt
+        set -e _OLD_FISH_PROMPT_OVERRIDE
+        functions -c _old_fish_prompt fish_prompt
+        functions -e _old_fish_prompt
+    end
+
+    set -e VIRTUAL_ENV
+    set -e VIRTUAL_ENV_PROMPT
+    if test "$argv[1]" != "nondestructive"
+        # Self-destruct!
+        functions -e deactivate
+    end
+end
+
+# Unset irrelevant variables.
+deactivate nondestructive
+
+set -gx VIRTUAL_ENV "/Users/jiaqiang/PycharmProjects/tx_flow_analysis/venv"
+
+set -gx _OLD_VIRTUAL_PATH $PATH
+set -gx PATH "$VIRTUAL_ENV/bin" $PATH
+
+# Unset PYTHONHOME if set.
+if set -q PYTHONHOME
+    set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
+    set -e PYTHONHOME
+end
+
+if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
+    # fish uses a function instead of an env var to generate the prompt.
+
+    # Save the current fish_prompt function as the function _old_fish_prompt.
+    functions -c fish_prompt _old_fish_prompt
+
+    # With the original prompt function renamed, we can override with our own.
+    function fish_prompt
+        # Save the return status of the last command.
+        set -l old_status $status
+
+        # Output the venv prompt; color taken from the blue of the Python logo.
+        printf "%s%s%s" (set_color 4B8BBE) "(venv) " (set_color normal)
+
+        # Restore the return status of the previous command.
+        echo "exit $old_status" | .
+        # Output the original/"old" prompt.
+        _old_fish_prompt
+    end
+
+    set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
+    set -gx VIRTUAL_ENV_PROMPT "(venv) "
+end

+ 8 - 0
venv/bin/distro

@@ -0,0 +1,8 @@
+#!/Users/jiaqiang/PycharmProjects/tx_flow_analysis/venv/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from distro.distro import main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())

+ 8 - 0
venv/bin/httpx

@@ -0,0 +1,8 @@
+#!/Users/jiaqiang/PycharmProjects/tx_flow_analysis/venv/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from httpx import main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())

+ 41 - 0
venv/bin/jsondiff

@@ -0,0 +1,41 @@
+#!/Users/jiaqiang/PycharmProjects/tx_flow_analysis/venv/bin/python3
+# -*- coding: utf-8 -*-
+
+from __future__ import print_function
+
+import sys
+import json
+import jsonpatch
+import argparse
+
+
+parser = argparse.ArgumentParser(description='Diff two JSON files')
+parser.add_argument('FILE1', type=argparse.FileType('r'))
+parser.add_argument('FILE2', type=argparse.FileType('r'))
+parser.add_argument('--indent', type=int, default=None,
+                    help='Indent output by n spaces')
+parser.add_argument('-u', '--preserve-unicode', action='store_true',
+                    help='Output Unicode character as-is without using Code Point')
+parser.add_argument('-v', '--version', action='version',
+                    version='%(prog)s ' + jsonpatch.__version__)
+
+
+def main():
+    try:
+        diff_files()
+    except KeyboardInterrupt:
+        sys.exit(1)
+
+
+def diff_files():
+    """ Diffs two JSON files and prints a patch """
+    args = parser.parse_args()
+    doc1 = json.load(args.FILE1)
+    doc2 = json.load(args.FILE2)
+    patch = jsonpatch.make_patch(doc1, doc2)
+    if patch.patch:
+        print(json.dumps(patch.patch, indent=args.indent, ensure_ascii=not(args.preserve_unicode)))
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()

+ 107 - 0
venv/bin/jsonpatch

@@ -0,0 +1,107 @@
+#!/Users/jiaqiang/PycharmProjects/tx_flow_analysis/venv/bin/python3
+# -*- coding: utf-8 -*-
+
+import sys
+import os.path
+import json
+import jsonpatch
+import tempfile
+import argparse
+
+
+parser = argparse.ArgumentParser(
+    description='Apply a JSON patch on a JSON file')
+parser.add_argument('ORIGINAL', type=argparse.FileType('r'),
+                    help='Original file')
+parser.add_argument('PATCH', type=argparse.FileType('r'),
+                    nargs='?', default=sys.stdin,
+                    help='Patch file (read from stdin if omitted)')
+parser.add_argument('--indent', type=int, default=None,
+                    help='Indent output by n spaces')
+parser.add_argument('-b', '--backup', action='store_true',
+                    help='Back up ORIGINAL if modifying in-place')
+parser.add_argument('-i', '--in-place', action='store_true',
+                    help='Modify ORIGINAL in-place instead of to stdout')
+parser.add_argument('-v', '--version', action='version',
+                    version='%(prog)s ' + jsonpatch.__version__)
+parser.add_argument('-u', '--preserve-unicode', action='store_true',
+                    help='Output Unicode character as-is without using Code Point')
+
+def main():
+    try:
+        patch_files()
+    except KeyboardInterrupt:
+        sys.exit(1)
+
+
+def patch_files():
+    """ Diffs two JSON files and prints a patch """
+    args = parser.parse_args()
+    doc = json.load(args.ORIGINAL)
+    patch = json.load(args.PATCH)
+    result = jsonpatch.apply_patch(doc, patch)
+
+    if args.in_place:
+        dirname = os.path.abspath(os.path.dirname(args.ORIGINAL.name))
+
+        try:
+            # Attempt to replace the file atomically.  We do this by
+            # creating a temporary file in the same directory as the
+            # original file so we can atomically move the new file over
+            # the original later.  (This is done in the same directory
+	    # because atomic renames do not work across mount points.)
+
+            fd, pathname = tempfile.mkstemp(dir=dirname)
+            fp = os.fdopen(fd, 'w')
+            atomic = True
+
+        except OSError:
+            # We failed to create the temporary file for an atomic
+            # replace, so fall back to non-atomic mode by backing up
+            # the original (if desired) and writing a new file.
+
+            if args.backup:
+                os.rename(args.ORIGINAL.name, args.ORIGINAL.name + '.orig')
+            fp = open(args.ORIGINAL.name, 'w')
+            atomic = False
+
+    else:
+        # Since we're not replacing the original file in-place, write
+        # the modified JSON to stdout instead.
+
+        fp = sys.stdout
+
+    # By this point we have some sort of file object we can write the 
+    # modified JSON to.
+    
+    json.dump(result, fp, indent=args.indent, ensure_ascii=not(args.preserve_unicode))
+    fp.write('\n')
+
+    if args.in_place:
+        # Close the new file.  If we aren't replacing atomically, this
+        # is our last step, since everything else is already in place.
+
+        fp.close()
+
+        if atomic:
+            try:
+                # Complete the atomic replace by linking the original
+                # to a backup (if desired), fixing up the permissions
+                # on the temporary file, and moving it into place.
+
+                if args.backup:
+                    os.link(args.ORIGINAL.name, args.ORIGINAL.name + '.orig')
+                os.chmod(pathname, os.stat(args.ORIGINAL.name).st_mode)
+                os.rename(pathname, args.ORIGINAL.name)
+
+            except OSError:
+                # In the event we could not actually do the atomic
+                # replace, unlink the original to move it out of the
+                # way and finally move the temporary file into place.
+                
+                os.unlink(args.ORIGINAL.name)
+                os.rename(pathname, args.ORIGINAL.name)
+
+
+if __name__ == "__main__":
+    main()

+ 67 - 0
venv/bin/jsonpointer

@@ -0,0 +1,67 @@
+#!/Users/jiaqiang/PycharmProjects/tx_flow_analysis/venv/bin/python3
+# -*- coding: utf-8 -*-
+
+
+import argparse
+import json
+import sys
+
+import jsonpointer
+
+parser = argparse.ArgumentParser(
+    description='Resolve a JSON pointer on JSON files')
+
+# Accept pointer as argument or as file
+ptr_group = parser.add_mutually_exclusive_group(required=True)
+
+ptr_group.add_argument('-f', '--pointer-file', type=argparse.FileType('r'),
+                       nargs='?',
+                       help='File containing a JSON pointer expression')
+
+ptr_group.add_argument('POINTER', type=str, nargs='?',
+                       help='A JSON pointer expression')
+
+parser.add_argument('FILE', type=argparse.FileType('r'), nargs='+',
+                    help='Files for which the pointer should be resolved')
+parser.add_argument('--indent', type=int, default=None,
+                    help='Indent output by n spaces')
+parser.add_argument('-v', '--version', action='version',
+                    version='%(prog)s ' + jsonpointer.__version__)
+
+
+def main():
+    try:
+        resolve_files()
+    except KeyboardInterrupt:
+        sys.exit(1)
+
+
+def parse_pointer(args):
+    if args.POINTER:
+        ptr = args.POINTER
+    elif args.pointer_file:
+        ptr = args.pointer_file.read().strip()
+    else:
+        parser.print_usage()
+        sys.exit(1)
+
+    return ptr
+
+
+def resolve_files():
+    """ Resolve a JSON pointer on JSON files """
+    args = parser.parse_args()
+
+    ptr = parse_pointer(args)
+
+    for f in args.FILE:
+        doc = json.load(f)
+        try:
+            result = jsonpointer.resolve_pointer(doc, ptr)
+            print(json.dumps(result, indent=args.indent))
+        except jsonpointer.JsonPointerException as e:
+            print('Could not resolve pointer: %s' % str(e), file=sys.stderr)
+
+
+if __name__ == "__main__":
+    main()

+ 8 - 0
venv/bin/normalizer

@@ -0,0 +1,8 @@
+#!/Users/jiaqiang/PycharmProjects/tx_flow_analysis/venv/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from charset_normalizer.cli import cli_detect
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(cli_detect())

+ 8 - 0
venv/bin/openai

@@ -0,0 +1,8 @@
+#!/Users/jiaqiang/PycharmProjects/tx_flow_analysis/venv/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from openai.cli import main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())

+ 8 - 0
venv/bin/pip

@@ -0,0 +1,8 @@
+#!/Users/jiaqiang/PycharmProjects/tx_flow_analysis/venv/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from pip._internal.cli.main import main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())

+ 8 - 0
venv/bin/pip3

@@ -0,0 +1,8 @@
+#!/Users/jiaqiang/PycharmProjects/tx_flow_analysis/venv/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from pip._internal.cli.main import main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())

+ 8 - 0
venv/bin/pip3.10

@@ -0,0 +1,8 @@
+#!/Users/jiaqiang/PycharmProjects/tx_flow_analysis/venv/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from pip._internal.cli.main import main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())

+ 1 - 0
venv/bin/python

@@ -0,0 +1 @@
+python3.10

+ 1 - 0
venv/bin/python3

@@ -0,0 +1 @@
+python3.10

BIN
venv/bin/python3.10


+ 8 - 0
venv/bin/tqdm

@@ -0,0 +1,8 @@
+#!/Users/jiaqiang/PycharmProjects/tx_flow_analysis/venv/bin/python3
+# -*- coding: utf-8 -*-
+import re
+import sys
+from tqdm.cli import main
+if __name__ == '__main__':
+    sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
+    sys.exit(main())

+ 1 - 0
venv/include/python3.10

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/include/python3.10

+ 1 - 0
venv/lib/python3.10/LICENSE.txt

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/LICENSE.txt

+ 1 - 0
venv/lib/python3.10/__future__.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/__future__.py

+ 1 - 0
venv/lib/python3.10/_collections_abc.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/_collections_abc.py

+ 1 - 0
venv/lib/python3.10/_weakrefset.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/_weakrefset.py

+ 1 - 0
venv/lib/python3.10/abc.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/abc.py

+ 1 - 0
venv/lib/python3.10/base64.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/base64.py

+ 1 - 0
venv/lib/python3.10/bisect.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/bisect.py

+ 1 - 0
venv/lib/python3.10/codecs.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/codecs.py

+ 1 - 0
venv/lib/python3.10/collections

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/collections

+ 1 - 0
venv/lib/python3.10/config-3.10-darwin

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/config-3.10-darwin

+ 1 - 0
venv/lib/python3.10/copy.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/copy.py

+ 1 - 0
venv/lib/python3.10/copyreg.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/copyreg.py

+ 1 - 0
venv/lib/python3.10/encodings

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/encodings

+ 1 - 0
venv/lib/python3.10/enum.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/enum.py

+ 1 - 0
venv/lib/python3.10/fnmatch.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/fnmatch.py

+ 1 - 0
venv/lib/python3.10/functools.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/functools.py

+ 1 - 0
venv/lib/python3.10/genericpath.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/genericpath.py

+ 1 - 0
venv/lib/python3.10/hashlib.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/hashlib.py

+ 1 - 0
venv/lib/python3.10/heapq.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/heapq.py

+ 1 - 0
venv/lib/python3.10/hmac.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/hmac.py

+ 1 - 0
venv/lib/python3.10/imp.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/imp.py

+ 1 - 0
venv/lib/python3.10/importlib

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/importlib

+ 1 - 0
venv/lib/python3.10/io.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/io.py

+ 1 - 0
venv/lib/python3.10/keyword.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/keyword.py

+ 1 - 0
venv/lib/python3.10/lib-dynload

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/lib-dynload

+ 1 - 0
venv/lib/python3.10/linecache.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/linecache.py

+ 1 - 0
venv/lib/python3.10/locale.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/locale.py

+ 0 - 0
venv/lib/python3.10/no-global-site-packages.txt


+ 1 - 0
venv/lib/python3.10/ntpath.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/ntpath.py

+ 1 - 0
venv/lib/python3.10/operator.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/operator.py

+ 1 - 0
venv/lib/python3.10/orig-prefix.txt

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10

+ 1 - 0
venv/lib/python3.10/os.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/os.py

+ 1 - 0
venv/lib/python3.10/posixpath.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/posixpath.py

+ 1 - 0
venv/lib/python3.10/random.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/random.py

+ 1 - 0
venv/lib/python3.10/re.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/re.py

+ 1 - 0
venv/lib/python3.10/readline.so

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/lib-dynload/readline.cpython-310-darwin.so

+ 1 - 0
venv/lib/python3.10/reprlib.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/reprlib.py

+ 1 - 0
venv/lib/python3.10/rlcompleter.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/rlcompleter.py

+ 1 - 0
venv/lib/python3.10/shutil.py

@@ -0,0 +1 @@
+/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/shutil.py

BIN
venv/lib/python3.10/site-packages/__pycache__/jsonpatch.cpython-310.pyc


BIN
venv/lib/python3.10/site-packages/__pycache__/jsonpointer.cpython-310.pyc


BIN
venv/lib/python3.10/site-packages/__pycache__/typing_extensions.cpython-310.pyc


+ 128 - 0
venv/lib/python3.10/site-packages/_distutils_hack/__init__.py

@@ -0,0 +1,128 @@
+import sys
+import os
+import re
+import importlib
+import warnings
+
+
+is_pypy = '__pypy__' in sys.builtin_module_names
+
+
+warnings.filterwarnings('ignore',
+                        r'.+ distutils\b.+ deprecated',
+                        DeprecationWarning)
+
+
+def warn_distutils_present():
+    if 'distutils' not in sys.modules:
+        return
+    if is_pypy and sys.version_info < (3, 7):
+        # PyPy for 3.6 unconditionally imports distutils, so bypass the warning
+        # https://foss.heptapod.net/pypy/pypy/-/blob/be829135bc0d758997b3566062999ee8b23872b4/lib-python/3/site.py#L250
+        return
+    warnings.warn(
+        "Distutils was imported before Setuptools, but importing Setuptools "
+        "also replaces the `distutils` module in `sys.modules`. This may lead "
+        "to undesirable behaviors or errors. To avoid these issues, avoid "
+        "using distutils directly, ensure that setuptools is installed in the "
+        "traditional way (e.g. not an editable install), and/or make sure "
+        "that setuptools is always imported before distutils.")
+
+
+def clear_distutils():
+    if 'distutils' not in sys.modules:
+        return
+    warnings.warn("Setuptools is replacing distutils.")
+    mods = [name for name in sys.modules if re.match(r'distutils\b', name)]
+    for name in mods:
+        del sys.modules[name]
+
+
+def enabled():
+    """
+    Allow selection of distutils by environment variable.
+    """
+    which = os.environ.get('SETUPTOOLS_USE_DISTUTILS', 'stdlib')
+    return which == 'local'
+
+
+def ensure_local_distutils():
+    clear_distutils()
+    distutils = importlib.import_module('setuptools._distutils')
+    distutils.__name__ = 'distutils'
+    sys.modules['distutils'] = distutils
+
+    # sanity check that submodules load as expected
+    core = importlib.import_module('distutils.core')
+    assert '_distutils' in core.__file__, core.__file__
+
+
+def do_override():
+    """
+    Ensure that the local copy of distutils is preferred over stdlib.
+
+    See https://github.com/pypa/setuptools/issues/417#issuecomment-392298401
+    for more motivation.
+    """
+    if enabled():
+        warn_distutils_present()
+        ensure_local_distutils()
+
+
+class DistutilsMetaFinder:
+    def find_spec(self, fullname, path, target=None):
+        if path is not None:
+            return
+
+        method_name = 'spec_for_{fullname}'.format(**locals())
+        method = getattr(self, method_name, lambda: None)
+        return method()
+
+    def spec_for_distutils(self):
+        import importlib.abc
+        import importlib.util
+
+        class DistutilsLoader(importlib.abc.Loader):
+
+            def create_module(self, spec):
+                return importlib.import_module('setuptools._distutils')
+
+            def exec_module(self, module):
+                pass
+
+        return importlib.util.spec_from_loader('distutils', DistutilsLoader())
+
+    def spec_for_pip(self):
+        """
+        Ensure stdlib distutils when running under pip.
+        See pypa/pip#8761 for rationale.
+        """
+        if self.pip_imported_during_build():
+            return
+        clear_distutils()
+        self.spec_for_distutils = lambda: None
+
+    @staticmethod
+    def pip_imported_during_build():
+        """
+        Detect if pip is being imported in a build script. Ref #2355.
+        """
+        import traceback
+        return any(
+            frame.f_globals['__file__'].endswith('setup.py')
+            for frame, line in traceback.walk_stack(None)
+        )
+
+
+DISTUTILS_FINDER = DistutilsMetaFinder()
+
+
+def add_shim():
+    sys.meta_path.insert(0, DISTUTILS_FINDER)
+
+
+def remove_shim():
+    try:
+        sys.meta_path.remove(DISTUTILS_FINDER)
+    except ValueError:
+        pass

BIN
venv/lib/python3.10/site-packages/_distutils_hack/__pycache__/__init__.cpython-310.pyc


BIN
venv/lib/python3.10/site-packages/_distutils_hack/__pycache__/override.cpython-310.pyc


+ 1 - 0
venv/lib/python3.10/site-packages/_distutils_hack/override.py

@@ -0,0 +1 @@
+__import__('_distutils_hack').do_override()

+ 33 - 0
venv/lib/python3.10/site-packages/_yaml/__init__.py

@@ -0,0 +1,33 @@
+# This is a stub package designed to roughly emulate the _yaml
+# extension module, which previously existed as a standalone module
+# and has been moved into the `yaml` package namespace.
+# It does not perfectly mimic its old counterpart, but should get
+# close enough for anyone who's relying on it even when they shouldn't.
+import yaml
+
+# in some circumstances, the yaml module we imoprted may be from a different version, so we need
+# to tread carefully when poking at it here (it may not have the attributes we expect)
+if not getattr(yaml, '__with_libyaml__', False):
+    from sys import version_info
+
+    exc = ModuleNotFoundError if version_info >= (3, 6) else ImportError
+    raise exc("No module named '_yaml'")
+else:
+    from yaml._yaml import *
+    import warnings
+    warnings.warn(
+        'The _yaml extension module is now located at yaml._yaml'
+        ' and its location is subject to change.  To use the'
+        ' LibYAML-based parser and emitter, import from `yaml`:'
+        ' `from yaml import CLoader as Loader, CDumper as Dumper`.',
+        DeprecationWarning
+    )
+    del warnings
+    # Don't `del yaml` here because yaml is actually an existing
+    # namespace member of _yaml.
+
+__name__ = '_yaml'
+# If the module is top-level (i.e. not a part of any specific package)
+# then the attribute should be set to ''.
+# https://docs.python.org/3.8/library/types.html
+__package__ = ''

BIN
venv/lib/python3.10/site-packages/_yaml/__pycache__/__init__.cpython-310.pyc


+ 1 - 0
venv/lib/python3.10/site-packages/annotated_types-0.7.0.dist-info/INSTALLER

@@ -0,0 +1 @@
+pip

+ 295 - 0
venv/lib/python3.10/site-packages/annotated_types-0.7.0.dist-info/METADATA

@@ -0,0 +1,295 @@
+Metadata-Version: 2.3
+Name: annotated-types
+Version: 0.7.0
+Summary: Reusable constraint types to use with typing.Annotated
+Project-URL: Homepage, https://github.com/annotated-types/annotated-types
+Project-URL: Source, https://github.com/annotated-types/annotated-types
+Project-URL: Changelog, https://github.com/annotated-types/annotated-types/releases
+Author-email: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com>, Samuel Colvin <s@muelcolvin.com>, Zac Hatfield-Dodds <zac@zhd.dev>
+License-File: LICENSE
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Console
+Classifier: Environment :: MacOS X
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Information Technology
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: POSIX :: Linux
+Classifier: Operating System :: Unix
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Typing :: Typed
+Requires-Python: >=3.8
+Requires-Dist: typing-extensions>=4.0.0; python_version < '3.9'
+Description-Content-Type: text/markdown
+
+# annotated-types
+
+[![CI](https://github.com/annotated-types/annotated-types/workflows/CI/badge.svg?event=push)](https://github.com/annotated-types/annotated-types/actions?query=event%3Apush+branch%3Amain+workflow%3ACI)
+[![pypi](https://img.shields.io/pypi/v/annotated-types.svg)](https://pypi.python.org/pypi/annotated-types)
+[![versions](https://img.shields.io/pypi/pyversions/annotated-types.svg)](https://github.com/annotated-types/annotated-types)
+[![license](https://img.shields.io/github/license/annotated-types/annotated-types.svg)](https://github.com/annotated-types/annotated-types/blob/main/LICENSE)
+
+[PEP-593](https://peps.python.org/pep-0593/) added `typing.Annotated` as a way of
+adding context-specific metadata to existing types, and specifies that
+`Annotated[T, x]` _should_ be treated as `T` by any tool or library without special
+logic for `x`.
+
+This package provides metadata objects which can be used to represent common
+constraints such as upper and lower bounds on scalar values and collection sizes,
+a `Predicate` marker for runtime checks, and
+descriptions of how we intend these metadata to be interpreted. In some cases,
+we also note alternative representations which do not require this package.
+
+## Install
+
+```bash
+pip install annotated-types
+```
+
+## Examples
+
+```python
+from typing import Annotated
+from annotated_types import Gt, Len, Predicate
+
+class MyClass:
+    age: Annotated[int, Gt(18)]                         # Valid: 19, 20, ...
+                                                        # Invalid: 17, 18, "19", 19.0, ...
+    factors: list[Annotated[int, Predicate(is_prime)]]  # Valid: 2, 3, 5, 7, 11, ...
+                                                        # Invalid: 4, 8, -2, 5.0, "prime", ...
+
+    my_list: Annotated[list[int], Len(0, 10)]           # Valid: [], [10, 20, 30, 40, 50]
+                                                        # Invalid: (1, 2), ["abc"], [0] * 20
+```
+
+## Documentation
+
+_While `annotated-types` avoids runtime checks for performance, users should not
+construct invalid combinations such as `MultipleOf("non-numeric")` or `Annotated[int, Len(3)]`.
+Downstream implementors may choose to raise an error, emit a warning, silently ignore
+a metadata item, etc., if the metadata objects described below are used with an
+incompatible type - or for any other reason!_
+
+### Gt, Ge, Lt, Le
+
+Express inclusive and/or exclusive bounds on orderable values - which may be numbers,
+dates, times, strings, sets, etc. Note that the boundary value need not be of the
+same type that was annotated, so long as they can be compared: `Annotated[int, Gt(1.5)]`
+is fine, for example, and implies that the value is an integer x such that `x > 1.5`.
+
+We suggest that implementors may also interpret `functools.partial(operator.le, 1.5)`
+as being equivalent to `Gt(1.5)`, for users who wish to avoid a runtime dependency on
+the `annotated-types` package.
+
+To be explicit, these types have the following meanings:
+
+* `Gt(x)` - value must be "Greater Than" `x` - equivalent to exclusive minimum
+* `Ge(x)` - value must be "Greater than or Equal" to `x` - equivalent to inclusive minimum
+* `Lt(x)` - value must be "Less Than" `x` - equivalent to exclusive maximum
+* `Le(x)` - value must be "Less than or Equal" to `x` - equivalent to inclusive maximum
+
+### Interval
+
+`Interval(gt, ge, lt, le)` allows you to specify an upper and lower bound with a single
+metadata object. `None` attributes should be ignored, and non-`None` attributes
+treated as per the single bounds above.
+
+### MultipleOf
+
+`MultipleOf(multiple_of=x)` might be interpreted in two ways:
+
+1. Python semantics, implying `value % multiple_of == 0`, or
+2. [JSONschema semantics](https://json-schema.org/draft/2020-12/json-schema-validation.html#rfc.section.6.2.1),
+   where `int(value / multiple_of) == value / multiple_of`.
+
+We encourage users to be aware of these two common interpretations and their
+distinct behaviours, especially since very large or non-integer numbers make
+it easy to cause silent data corruption due to floating-point imprecision.
+
+We encourage libraries to carefully document which interpretation they implement.
+
+### MinLen, MaxLen, Len
+
+`Len()` implies that `min_length <= len(value) <= max_length` - lower and upper bounds are inclusive.
+
+As well as `Len()` which can optionally include upper and lower bounds, we also
+provide `MinLen(x)` and `MaxLen(y)` which are equivalent to `Len(min_length=x)`
+and `Len(max_length=y)` respectively.
+
+`Len`, `MinLen`, and `MaxLen` may be used with any type which supports `len(value)`.
+
+Examples of usage:
+
+* `Annotated[list, MaxLen(10)]` (or `Annotated[list, Len(max_length=10))`) - list must have a length of 10 or less
+* `Annotated[str, MaxLen(10)]` - string must have a length of 10 or less
+* `Annotated[list, MinLen(3))` (or `Annotated[list, Len(min_length=3))`) - list must have a length of 3 or more
+* `Annotated[list, Len(4, 6)]` - list must have a length of 4, 5, or 6
+* `Annotated[list, Len(8, 8)]` - list must have a length of exactly 8
+
+#### Changed in v0.4.0
+
+* `min_inclusive` has been renamed to `min_length`, no change in meaning
+* `max_exclusive` has been renamed to `max_length`, upper bound is now **inclusive** instead of **exclusive**
+* The recommendation that slices are interpreted as `Len` has been removed due to ambiguity and different semantic
+  meaning of the upper bound in slices vs. `Len`
+
+See [issue #23](https://github.com/annotated-types/annotated-types/issues/23) for discussion.
+
+### Timezone
+
+`Timezone` can be used with a `datetime` or a `time` to express which timezones
+are allowed. `Annotated[datetime, Timezone(None)]` must be a naive datetime.
+`Timezone[...]` ([literal ellipsis](https://docs.python.org/3/library/constants.html#Ellipsis))
+expresses that any timezone-aware datetime is allowed. You may also pass a specific
+timezone string or [`tzinfo`](https://docs.python.org/3/library/datetime.html#tzinfo-objects)
+object such as `Timezone(timezone.utc)` or `Timezone("Africa/Abidjan")` to express that you only
+allow a specific timezone, though we note that this is often a symptom of fragile design.
+
+#### Changed in v0.x.x
+
+* `Timezone` accepts [`tzinfo`](https://docs.python.org/3/library/datetime.html#tzinfo-objects) objects instead of
+  `timezone`, extending compatibility to [`zoneinfo`](https://docs.python.org/3/library/zoneinfo.html) and third party libraries.
+
+### Unit
+
+`Unit(unit: str)` expresses that the annotated numeric value is the magnitude of
+a quantity with the specified unit. For example, `Annotated[float, Unit("m/s")]`
+would be a float representing a velocity in meters per second.
+
+Please note that `annotated_types` itself makes no attempt to parse or validate
+the unit string in any way. That is left entirely to downstream libraries,
+such as [`pint`](https://pint.readthedocs.io) or
+[`astropy.units`](https://docs.astropy.org/en/stable/units/).
+
+An example of how a library might use this metadata:
+
+```python
+from annotated_types import Unit
+from typing import Annotated, TypeVar, Callable, Any, get_origin, get_args
+
+# given a type annotated with a unit:
+Meters = Annotated[float, Unit("m")]
+
+
+# you can cast the annotation to a specific unit type with any
+# callable that accepts a string and returns the desired type
+T = TypeVar("T")
+def cast_unit(tp: Any, unit_cls: Callable[[str], T]) -> T | None:
+    if get_origin(tp) is Annotated:
+        for arg in get_args(tp):
+            if isinstance(arg, Unit):
+                return unit_cls(arg.unit)
+    return None
+
+
+# using `pint`
+import pint
+pint_unit = cast_unit(Meters, pint.Unit)
+
+
+# using `astropy.units`
+import astropy.units as u
+astropy_unit = cast_unit(Meters, u.Unit)
+```
+
+### Predicate
+
+`Predicate(func: Callable)` expresses that `func(value)` is truthy for valid values.
+Users should prefer the statically inspectable metadata above, but if you need
+the full power and flexibility of arbitrary runtime predicates... here it is.
+
+For some common constraints, we provide generic types:
+
+* `IsLower       = Annotated[T, Predicate(str.islower)]`
+* `IsUpper       = Annotated[T, Predicate(str.isupper)]`
+* `IsDigit       = Annotated[T, Predicate(str.isdigit)]`
+* `IsFinite      = Annotated[T, Predicate(math.isfinite)]`
+* `IsNotFinite   = Annotated[T, Predicate(Not(math.isfinite))]`
+* `IsNan         = Annotated[T, Predicate(math.isnan)]`
+* `IsNotNan      = Annotated[T, Predicate(Not(math.isnan))]`
+* `IsInfinite    = Annotated[T, Predicate(math.isinf)]`
+* `IsNotInfinite = Annotated[T, Predicate(Not(math.isinf))]`
+
+so that you can write e.g. `x: IsFinite[float] = 2.0` instead of the longer
+(but exactly equivalent) `x: Annotated[float, Predicate(math.isfinite)] = 2.0`.
+
+Some libraries might have special logic to handle known or understandable predicates,
+for example by checking for `str.isdigit` and using its presence to both call custom
+logic to enforce digit-only strings, and customise some generated external schema.
+Users are therefore encouraged to avoid indirection like `lambda s: s.lower()`, in
+favor of introspectable methods such as `str.lower` or `re.compile("pattern").search`.
+
+To enable basic negation of commonly used predicates like `math.isnan` without introducing introspection that makes it impossible for implementers to introspect the predicate we provide a `Not` wrapper that simply negates the predicate in an introspectable manner. Several of the predicates listed above are created in this manner.
+
+We do not specify what behaviour should be expected for predicates that raise
+an exception.  For example `Annotated[int, Predicate(str.isdigit)]` might silently
+skip invalid constraints, or statically raise an error; or it might try calling it
+and then propagate or discard the resulting
+`TypeError: descriptor 'isdigit' for 'str' objects doesn't apply to a 'int' object`
+exception.  We encourage libraries to document the behaviour they choose.
+
+### Doc
+
+`doc()` can be used to add documentation information in `Annotated`, for function and method parameters, variables, class attributes, return types, and any place where `Annotated` can be used.
+
+It expects a value that can be statically analyzed, as the main use case is for static analysis, editors, documentation generators, and similar tools.
+
+It returns a `DocInfo` class with a single attribute `documentation` containing the value passed to `doc()`.
+
+This is the early adopter's alternative form of the [`typing-doc` proposal](https://github.com/tiangolo/fastapi/blob/typing-doc/typing_doc.md).
+
+### Integrating downstream types with `GroupedMetadata`
+
+Implementers may choose to provide a convenience wrapper that groups multiple pieces of metadata.
+This can help reduce verbosity and cognitive overhead for users.
+For example, an implementer like Pydantic might provide a `Field` or `Meta` type that accepts keyword arguments and transforms these into low-level metadata:
+
+```python
+from dataclasses import dataclass
+from typing import Iterator
+from annotated_types import GroupedMetadata, Ge
+
+@dataclass
+class Field(GroupedMetadata):
+    ge: int | None = None
+    description: str | None = None
+
+    def __iter__(self) -> Iterator[object]:
+        # Iterating over a GroupedMetadata object should yield annotated-types
+        # constraint metadata objects which describe it as fully as possible,
+        # and may include other unknown objects too.
+        if self.ge is not None:
+            yield Ge(self.ge)
+        if self.description is not None:
+            yield Description(self.description)
+```
+
+Libraries consuming annotated-types constraints should check for `GroupedMetadata` and unpack it by iterating over the object and treating the results as if they had been "unpacked" in the `Annotated` type.  The same logic should be applied to the [PEP 646 `Unpack` type](https://peps.python.org/pep-0646/), so that `Annotated[T, Field(...)]`, `Annotated[T, Unpack[Field(...)]]` and `Annotated[T, *Field(...)]` are all treated consistently.
+
+Libraries consuming annotated-types should also ignore any metadata they do not recongize that came from unpacking a `GroupedMetadata`, just like they ignore unrecognized metadata in `Annotated` itself.
+
+Our own `annotated_types.Interval` class is a `GroupedMetadata` which unpacks itself into `Gt`, `Lt`, etc., so this is not an abstract concern.  Similarly, `annotated_types.Len` is a `GroupedMetadata` which unpacks itself into `MinLen` (optionally) and `MaxLen`.
+
+### Consuming metadata
+
+We intend to not be prescriptive as to _how_ the metadata and constraints are used, but as an example of how one might parse constraints from types annotations see our [implementation in `test_main.py`](https://github.com/annotated-types/annotated-types/blob/f59cf6d1b5255a0fe359b93896759a180bec30ae/tests/test_main.py#L94-L103).
+
+It is up to the implementer to determine how this metadata is used.
+You could use the metadata for runtime type checking, for generating schemas or to generate example data, amongst other use cases.
+
+## Design & History
+
+This package was designed at the PyCon 2022 sprints by the maintainers of Pydantic
+and Hypothesis, with the goal of making it as easy as possible for end-users to
+provide more informative annotations for use by runtime libraries.
+
+It is deliberately minimal, and following PEP-593 allows considerable downstream
+discretion in what (if anything!) they choose to support. Nonetheless, we expect
+that staying simple and covering _only_ the most common use-cases will give users
+and maintainers the best experience we can. If you'd like more constraints for your
+types - follow our lead, by defining them and documenting them downstream!

+ 10 - 0
venv/lib/python3.10/site-packages/annotated_types-0.7.0.dist-info/RECORD

@@ -0,0 +1,10 @@
+annotated_types-0.7.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+annotated_types-0.7.0.dist-info/METADATA,sha256=7ltqxksJJ0wCYFGBNIQCWTlWQGeAH0hRFdnK3CB895E,15046
+annotated_types-0.7.0.dist-info/RECORD,,
+annotated_types-0.7.0.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
+annotated_types-0.7.0.dist-info/licenses/LICENSE,sha256=_hBJiEsaDZNCkB6I4H8ykl0ksxIdmXK2poBfuYJLCV0,1083
+annotated_types/__init__.py,sha256=RynLsRKUEGI0KimXydlD1fZEfEzWwDo0Uon3zOKhG1Q,13819
+annotated_types/__pycache__/__init__.cpython-310.pyc,,
+annotated_types/__pycache__/test_cases.cpython-310.pyc,,
+annotated_types/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+annotated_types/test_cases.py,sha256=zHFX6EpcMbGJ8FzBYDbO56bPwx_DYIVSKbZM-4B3_lg,6421

+ 4 - 0
venv/lib/python3.10/site-packages/annotated_types-0.7.0.dist-info/WHEEL

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.24.2
+Root-Is-Purelib: true
+Tag: py3-none-any

+ 21 - 0
venv/lib/python3.10/site-packages/annotated_types-0.7.0.dist-info/licenses/LICENSE

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2022 the contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

+ 432 - 0
venv/lib/python3.10/site-packages/annotated_types/__init__.py

@@ -0,0 +1,432 @@
+import math
+import sys
+import types
+from dataclasses import dataclass
+from datetime import tzinfo
+from typing import TYPE_CHECKING, Any, Callable, Iterator, Optional, SupportsFloat, SupportsIndex, TypeVar, Union
+
+if sys.version_info < (3, 8):
+    from typing_extensions import Protocol, runtime_checkable
+else:
+    from typing import Protocol, runtime_checkable
+
+if sys.version_info < (3, 9):
+    from typing_extensions import Annotated, Literal
+else:
+    from typing import Annotated, Literal
+
+if sys.version_info < (3, 10):
+    EllipsisType = type(Ellipsis)
+    KW_ONLY = {}
+    SLOTS = {}
+else:
+    from types import EllipsisType
+
+    KW_ONLY = {"kw_only": True}
+    SLOTS = {"slots": True}
+
+
+__all__ = (
+    'BaseMetadata',
+    'GroupedMetadata',
+    'Gt',
+    'Ge',
+    'Lt',
+    'Le',
+    'Interval',
+    'MultipleOf',
+    'MinLen',
+    'MaxLen',
+    'Len',
+    'Timezone',
+    'Predicate',
+    'LowerCase',
+    'UpperCase',
+    'IsDigits',
+    'IsFinite',
+    'IsNotFinite',
+    'IsNan',
+    'IsNotNan',
+    'IsInfinite',
+    'IsNotInfinite',
+    'doc',
+    'DocInfo',
+    '__version__',
+)
+
+__version__ = '0.7.0'
+
+
+T = TypeVar('T')
+
+
+# arguments that start with __ are considered
+# positional only
+# see https://peps.python.org/pep-0484/#positional-only-arguments
+
+
+class SupportsGt(Protocol):
+    def __gt__(self: T, __other: T) -> bool:
+        ...
+
+
+class SupportsGe(Protocol):
+    def __ge__(self: T, __other: T) -> bool:
+        ...
+
+
+class SupportsLt(Protocol):
+    def __lt__(self: T, __other: T) -> bool:
+        ...
+
+
+class SupportsLe(Protocol):
+    def __le__(self: T, __other: T) -> bool:
+        ...
+
+
+class SupportsMod(Protocol):
+    def __mod__(self: T, __other: T) -> T:
+        ...
+
+
+class SupportsDiv(Protocol):
+    def __div__(self: T, __other: T) -> T:
+        ...
+
+
+class BaseMetadata:
+    """Base class for all metadata.
+
+    This exists mainly so that implementers
+    can do `isinstance(..., BaseMetadata)` while traversing field annotations.
+    """
+
+    __slots__ = ()
+
+
+@dataclass(frozen=True, **SLOTS)
+class Gt(BaseMetadata):
+    """Gt(gt=x) implies that the value must be greater than x.
+
+    It can be used with any type that supports the ``>`` operator,
+    including numbers, dates and times, strings, sets, and so on.
+    """
+
+    gt: SupportsGt
+
+
+@dataclass(frozen=True, **SLOTS)
+class Ge(BaseMetadata):
+    """Ge(ge=x) implies that the value must be greater than or equal to x.
+
+    It can be used with any type that supports the ``>=`` operator,
+    including numbers, dates and times, strings, sets, and so on.
+    """
+
+    ge: SupportsGe
+
+
+@dataclass(frozen=True, **SLOTS)
+class Lt(BaseMetadata):
+    """Lt(lt=x) implies that the value must be less than x.
+
+    It can be used with any type that supports the ``<`` operator,
+    including numbers, dates and times, strings, sets, and so on.
+    """
+
+    lt: SupportsLt
+
+
+@dataclass(frozen=True, **SLOTS)
+class Le(BaseMetadata):
+    """Le(le=x) implies that the value must be less than or equal to x.
+
+    It can be used with any type that supports the ``<=`` operator,
+    including numbers, dates and times, strings, sets, and so on.
+    """
+
+    le: SupportsLe
+
+
+@runtime_checkable
+class GroupedMetadata(Protocol):
+    """A grouping of multiple objects, like typing.Unpack.
+
+    `GroupedMetadata` on its own is not metadata and has no meaning.
+    All of the constraints and metadata should be fully expressable
+    in terms of the `BaseMetadata`'s returned by `GroupedMetadata.__iter__()`.
+
+    Concrete implementations should override `GroupedMetadata.__iter__()`
+    to add their own metadata.
+    For example:
+
+    >>> @dataclass
+    >>> class Field(GroupedMetadata):
+    >>>     gt: float | None = None
+    >>>     description: str | None = None
+    ...
+    >>>     def __iter__(self) -> Iterable[object]:
+    >>>         if self.gt is not None:
+    >>>             yield Gt(self.gt)
+    >>>         if self.description is not None:
+    >>>             yield Description(self.gt)
+
+    Also see the implementation of `Interval` below for an example.
+
+    Parsers should recognize this and unpack it so that it can be used
+    both with and without unpacking:
+
+    - `Annotated[int, Field(...)]` (parser must unpack Field)
+    - `Annotated[int, *Field(...)]` (PEP-646)
+    """  # noqa: trailing-whitespace
+
+    @property
+    def __is_annotated_types_grouped_metadata__(self) -> Literal[True]:
+        return True
+
+    def __iter__(self) -> Iterator[object]:
+        ...
+
+    if not TYPE_CHECKING:
+        __slots__ = ()  # allow subclasses to use slots
+
+        def __init_subclass__(cls, *args: Any, **kwargs: Any) -> None:
+            # Basic ABC like functionality without the complexity of an ABC
+            super().__init_subclass__(*args, **kwargs)
+            if cls.__iter__ is GroupedMetadata.__iter__:
+                raise TypeError("Can't subclass GroupedMetadata without implementing __iter__")
+
+        def __iter__(self) -> Iterator[object]:  # noqa: F811
+            raise NotImplementedError  # more helpful than "None has no attribute..." type errors
+
+
+@dataclass(frozen=True, **KW_ONLY, **SLOTS)
+class Interval(GroupedMetadata):
+    """Interval can express inclusive or exclusive bounds with a single object.
+
+    It accepts keyword arguments ``gt``, ``ge``, ``lt``, and/or ``le``, which
+    are interpreted the same way as the single-bound constraints.
+    """
+
+    gt: Union[SupportsGt, None] = None
+    ge: Union[SupportsGe, None] = None
+    lt: Union[SupportsLt, None] = None
+    le: Union[SupportsLe, None] = None
+
+    def __iter__(self) -> Iterator[BaseMetadata]:
+        """Unpack an Interval into zero or more single-bounds."""
+        if self.gt is not None:
+            yield Gt(self.gt)
+        if self.ge is not None:
+            yield Ge(self.ge)
+        if self.lt is not None:
+            yield Lt(self.lt)
+        if self.le is not None:
+            yield Le(self.le)
+
+
+@dataclass(frozen=True, **SLOTS)
+class MultipleOf(BaseMetadata):
+    """MultipleOf(multiple_of=x) might be interpreted in two ways:
+
+    1. Python semantics, implying ``value % multiple_of == 0``, or
+    2. JSONschema semantics, where ``int(value / multiple_of) == value / multiple_of``
+
+    We encourage users to be aware of these two common interpretations,
+    and libraries to carefully document which they implement.
+    """
+
+    multiple_of: Union[SupportsDiv, SupportsMod]
+
+
+@dataclass(frozen=True, **SLOTS)
+class MinLen(BaseMetadata):
+    """
+    MinLen() implies minimum inclusive length,
+    e.g. ``len(value) >= min_length``.
+    """
+
+    min_length: Annotated[int, Ge(0)]
+
+
+@dataclass(frozen=True, **SLOTS)
+class MaxLen(BaseMetadata):
+    """
+    MaxLen() implies maximum inclusive length,
+    e.g. ``len(value) <= max_length``.
+    """
+
+    max_length: Annotated[int, Ge(0)]
+
+
+@dataclass(frozen=True, **SLOTS)
+class Len(GroupedMetadata):
+    """
+    Len() implies that ``min_length <= len(value) <= max_length``.
+
+    Upper bound may be omitted or ``None`` to indicate no upper length bound.
+    """
+
+    min_length: Annotated[int, Ge(0)] = 0
+    max_length: Optional[Annotated[int, Ge(0)]] = None
+
+    def __iter__(self) -> Iterator[BaseMetadata]:
+        """Unpack a Len into zone or more single-bounds."""
+        if self.min_length > 0:
+            yield MinLen(self.min_length)
+        if self.max_length is not None:
+            yield MaxLen(self.max_length)
+
+
+@dataclass(frozen=True, **SLOTS)
+class Timezone(BaseMetadata):
+    """Timezone(tz=...) requires a datetime to be aware (or ``tz=None``, naive).
+
+    ``Annotated[datetime, Timezone(None)]`` must be a naive datetime.
+    ``Timezone[...]`` (the ellipsis literal) expresses that the datetime must be
+    tz-aware but any timezone is allowed.
+
+    You may also pass a specific timezone string or tzinfo object such as
+    ``Timezone(timezone.utc)`` or ``Timezone("Africa/Abidjan")`` to express that
+    you only allow a specific timezone, though we note that this is often
+    a symptom of poor design.
+    """
+
+    tz: Union[str, tzinfo, EllipsisType, None]
+
+
+@dataclass(frozen=True, **SLOTS)
+class Unit(BaseMetadata):
+    """Indicates that the value is a physical quantity with the specified unit.
+
+    It is intended for usage with numeric types, where the value represents the
+    magnitude of the quantity. For example, ``distance: Annotated[float, Unit('m')]``
+    or ``speed: Annotated[float, Unit('m/s')]``.
+
+    Interpretation of the unit string is left to the discretion of the consumer.
+    It is suggested to follow conventions established by python libraries that work
+    with physical quantities, such as
+
+    - ``pint`` : <https://pint.readthedocs.io/en/stable/>
+    - ``astropy.units``: <https://docs.astropy.org/en/stable/units/>
+
+    For indicating a quantity with a certain dimensionality but without a specific unit
+    it is recommended to use square brackets, e.g. `Annotated[float, Unit('[time]')]`.
+    Note, however, ``annotated_types`` itself makes no use of the unit string.
+    """
+
+    unit: str
+
+
+@dataclass(frozen=True, **SLOTS)
+class Predicate(BaseMetadata):
+    """``Predicate(func: Callable)`` implies `func(value)` is truthy for valid values.
+
+    Users should prefer statically inspectable metadata, but if you need the full
+    power and flexibility of arbitrary runtime predicates... here it is.
+
+    We provide a few predefined predicates for common string constraints:
+    ``IsLower = Predicate(str.islower)``, ``IsUpper = Predicate(str.isupper)``, and
+    ``IsDigits = Predicate(str.isdigit)``. Users are encouraged to use methods which
+    can be given special handling, and avoid indirection like ``lambda s: s.lower()``.
+
+    Some libraries might have special logic to handle certain predicates, e.g. by
+    checking for `str.isdigit` and using its presence to both call custom logic to
+    enforce digit-only strings, and customise some generated external schema.
+
+    We do not specify what behaviour should be expected for predicates that raise
+    an exception.  For example `Annotated[int, Predicate(str.isdigit)]` might silently
+    skip invalid constraints, or statically raise an error; or it might try calling it
+    and then propagate or discard the resulting exception.
+    """
+
+    func: Callable[[Any], bool]
+
+    def __repr__(self) -> str:
+        if getattr(self.func, "__name__", "<lambda>") == "<lambda>":
+            return f"{self.__class__.__name__}({self.func!r})"
+        if isinstance(self.func, (types.MethodType, types.BuiltinMethodType)) and (
+            namespace := getattr(self.func.__self__, "__name__", None)
+        ):
+            return f"{self.__class__.__name__}({namespace}.{self.func.__name__})"
+        if isinstance(self.func, type(str.isascii)):  # method descriptor
+            return f"{self.__class__.__name__}({self.func.__qualname__})"
+        return f"{self.__class__.__name__}({self.func.__name__})"
+
+
+@dataclass
+class Not:
+    func: Callable[[Any], bool]
+
+    def __call__(self, __v: Any) -> bool:
+        return not self.func(__v)
+
+
+_StrType = TypeVar("_StrType", bound=str)
+
+LowerCase = Annotated[_StrType, Predicate(str.islower)]
+"""
+Return True if the string is a lowercase string, False otherwise.
+
+A string is lowercase if all cased characters in the string are lowercase and there is at least one cased character in the string.
+"""  # noqa: E501
+UpperCase = Annotated[_StrType, Predicate(str.isupper)]
+"""
+Return True if the string is an uppercase string, False otherwise.
+
+A string is uppercase if all cased characters in the string are uppercase and there is at least one cased character in the string.
+"""  # noqa: E501
+IsDigit = Annotated[_StrType, Predicate(str.isdigit)]
+IsDigits = IsDigit  # type: ignore  # plural for backwards compatibility, see #63
+"""
+Return True if the string is a digit string, False otherwise.
+
+A string is a digit string if all characters in the string are digits and there is at least one character in the string.
+"""  # noqa: E501
+IsAscii = Annotated[_StrType, Predicate(str.isascii)]
+"""
+Return True if all characters in the string are ASCII, False otherwise.
+
+ASCII characters have code points in the range U+0000-U+007F. Empty string is ASCII too.
+"""
+
+_NumericType = TypeVar('_NumericType', bound=Union[SupportsFloat, SupportsIndex])
+IsFinite = Annotated[_NumericType, Predicate(math.isfinite)]
+"""Return True if x is neither an infinity nor a NaN, and False otherwise."""
+IsNotFinite = Annotated[_NumericType, Predicate(Not(math.isfinite))]
+"""Return True if x is one of infinity or NaN, and False otherwise"""
+IsNan = Annotated[_NumericType, Predicate(math.isnan)]
+"""Return True if x is a NaN (not a number), and False otherwise."""
+IsNotNan = Annotated[_NumericType, Predicate(Not(math.isnan))]
+"""Return True if x is anything but NaN (not a number), and False otherwise."""
+IsInfinite = Annotated[_NumericType, Predicate(math.isinf)]
+"""Return True if x is a positive or negative infinity, and False otherwise."""
+IsNotInfinite = Annotated[_NumericType, Predicate(Not(math.isinf))]
+"""Return True if x is neither a positive or negative infinity, and False otherwise."""
+
+try:
+    from typing_extensions import DocInfo, doc  # type: ignore [attr-defined]
+except ImportError:
+
+    @dataclass(frozen=True, **SLOTS)
+    class DocInfo:  # type: ignore [no-redef]
+        """ "
+        The return value of doc(), mainly to be used by tools that want to extract the
+        Annotated documentation at runtime.
+        """
+
+        documentation: str
+        """The documentation string passed to doc()."""
+
+    def doc(
+        documentation: str,
+    ) -> DocInfo:
+        """
+        Add documentation to a type annotation inside of Annotated.
+
+        For example:
+
+        >>> def hi(name: Annotated[int, doc("The name of the user")]) -> None: ...
+        """
+        return DocInfo(documentation)

BIN
venv/lib/python3.10/site-packages/annotated_types/__pycache__/__init__.cpython-310.pyc


BIN
venv/lib/python3.10/site-packages/annotated_types/__pycache__/test_cases.cpython-310.pyc


Alguns ficheiros não foram mostrados porque muitos ficheiros mudaram neste diff