test_complete.sh 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. #!/bin/bash
  2. # schedule-embedding-api 完整功能测试脚本
  3. # 测试场景:基于解析结果示例.md的材料
  4. # 使用说明:确保应用已启动在 http://localhost:8084
  5. BASE_URL="http://localhost:8084"
  6. RESULTS_FILE="complete_test_results.txt"
  7. PASSED=0
  8. FAILED=0
  9. # 颜色输出
  10. GREEN='\033[0;32m'
  11. RED='\033[0;31m'
  12. YELLOW='\033[1;33m'
  13. BLUE='\033[0;34m'
  14. NC='\033[0m'
  15. # 清空结果文件
  16. > "$RESULTS_FILE"
  17. echo "========================================" | tee -a "$RESULTS_FILE"
  18. echo " 完整功能测试 - 解析结果示例数据" | tee -a "$RESULTS_FILE"
  19. echo " 测试时间: $(date '+%Y-%m-%d %H:%M:%S')" | tee -a "$RESULTS_FILE"
  20. echo "========================================" | tee -a "$RESULTS_FILE"
  21. echo "" | tee -a "$RESULTS_FILE"
  22. # 测试函数
  23. test_api() {
  24. local test_name="$1"
  25. local method="$2"
  26. local url="$3"
  27. local data="$4"
  28. local expected_code="$5"
  29. echo -n "[$test_name] ... " | tee -a "$RESULTS_FILE"
  30. if [ "$method" = "GET" ]; then
  31. response=$(curl -s -w "\n%{http_code}" -X GET "$BASE_URL$url")
  32. elif [ "$method" = "DELETE" ]; then
  33. response=$(curl -s -w "\n%{http_code}" -X DELETE "$BASE_URL$url")
  34. elif [ "$method" = "POST" ]; then
  35. if [ -z "$data" ]; then
  36. response=$(curl -s -w "\n%{http_code}" -X POST "$BASE_URL$url" \
  37. -H "Content-Type: application/json")
  38. else
  39. response=$(curl -s -w "\n%{http_code}" -X POST "$BASE_URL$url" \
  40. -H "Content-Type: application/json" \
  41. -d "$data")
  42. fi
  43. fi
  44. http_code=$(echo "$response" | tail -n1)
  45. body=$(echo "$response" | sed '$d')
  46. if [ "$http_code" = "$expected_code" ]; then
  47. echo -e "${GREEN}PASSED${NC}" | tee -a "$RESULTS_FILE"
  48. ((PASSED++))
  49. else
  50. echo -e "${RED}FAILED${NC} (HTTP $http_code)" | tee -a "$RESULTS_FILE"
  51. ((FAILED++))
  52. fi
  53. # 显示关键结果
  54. if [[ "$body" == *"success"* ]] || [[ "$body" == *"chunkCount"* ]] || [[ "$body" == *"score"* ]]; then
  55. echo " -> $body" | tee -a "$RESULTS_FILE"
  56. fi
  57. echo "" | tee -a "$RESULTS_FILE"
  58. }
  59. # ========================================================================
  60. # 1. 健康检查
  61. # ========================================================================
  62. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  63. echo "1. 健康检查" | tee -a "$RESULTS_FILE"
  64. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  65. test_api "健康检查" "GET" "/actuator/health" "" "200"
  66. # ========================================================================
  67. # 2. 单个文档入库 - 银行流水示例
  68. # ========================================================================
  69. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  70. echo "2. 单个文档入库 - 银行流水" | tee -a "$RESULTS_FILE"
  71. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  72. test_api "入库-银行流水" \
  73. "POST" \
  74. "/api/v1/documents/index" \
  75. '{
  76. "docId": "bank-flow-001",
  77. "fileName": "111111133.png",
  78. "fullText": "账户明细\n\n账号:09-75******6775 户名:上海***有限公司 币种:人民币\n\n中国农业银行\n\n明细回单\n\n专用章\n\n日期:2023年01月05日-2023年04月16日\n\n交易时间 | 金额 | 余额 | 交易用途 | 交易对方\n2023/01/05 09:15:00 | 320 | 320 | 水稻销售收入(优质粳稻) | 金穗粮食贸易公司\n2023/01/08 10:45:00 | -150 | 170 | 化肥采购(复合肥) | 丰收农资连锁\n2023/01/12 14:20:00 | 280 | 450 | 蔬菜销售款(西红柿/黄瓜) | 绿源农产品公司\n2023/01/15 11:30:00 | -120 | 330 | 农机维修保养 | 北方农机服务\n2023/01/18 15:50:00 | 180 | 510 | 生猪出栏销售收入 | 双汇食品集团\n2023/01/22 09:25:00 | -80 | 430 | 种子采购(玉米种子) | 中种农业科技\n2023/01/25 13:10:00 | 250 | 680 | 中药材销售(黄芪) | 同仁堂药业\n2023/01/28 16:35:00 | -60 | 620 | 饲料采购 | 正大饲料公司\n2023/02/01 08:40:00 | 150 | 770 | 农资返利结算 | 丰收农资连锁\n2023/02/05 10:15:00 | -90 | 680 | 土地流转费支付 | 村委会\n2023/02/08 14:00:00 | 220 | 900 | 农产品冷链运输服务费 | 顺丰冷链物流\n2023/02/12 11:20:00 | -75 | 825 | 员工薪酬发放 | 员工个人账户\n2023/02/15 15:30:00 | 190 | 1015 | 蔬菜零收款 | 社区生鲜店\n2023/02/18 09:45:00 | -110 | 905 | 农业保险续保 | 太平洋保险\n2023/02/22 13:25:00 | 270 | 1175 | 种粮补贴收入 | 市农业农村局",
  79. "filePath": "/data/files/20260313_8f9e7d6c5b4a.png",
  80. "fileSize": 102400,
  81. "fileType": "png",
  82. "metadata": {
  83. "file_unique_id": "20260313_8f9e7d6c5b4a",
  84. "business_topic": "金融-信贷审批",
  85. "document_type": "银行流水",
  86. "belong_department": "风控部",
  87. "tags": ["信贷", "流水", "合规", "农业"]
  88. }
  89. }' \
  90. "200"
  91. # ========================================================================
  92. # 3. 单个文档入库 - 操作手册
  93. # ========================================================================
  94. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  95. echo "3. 单个文档入库 - 操作手册" | tee -a "$RESULTS_FILE"
  96. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  97. test_api "入库-操作手册" \
  98. "POST" \
  99. "/api/v1/documents/index" \
  100. '{
  101. "docId": "manual-001",
  102. "fileName": "宇信科技管理数智化星云平台-操作手册.docx",
  103. "fullText": "宇信科技管理数智化星云平台 快速入门操作手册\n\n北京宇信科技集团股份有限公司\n\n产品概述\n本平台是针对宇信科技管理数智化星云平台的快速入门手册,涵盖从系统初始操作,建立账号信息,建立个人工作台到进行数据分析的完整流程。\n\n系统初始配置\n系统管理员需对平台进行初始设置,包括:\n1. 公共参数维护:逻辑系统管理、控制点管理、系统参数、数据字典配置\n2. 用户建立及权限划分:机构管理、部门管理、岗位管理、角色管理、用户管理\n3. 应用维护及授权\n4. 业务流程定义\n\n用户权限管理\n系统支持基于角色的权限控制,不同角色拥有不同的功能权限和数据权限。角色类型包括:系统管理人员、业务人员、业务管理人员。\n\n数据准备与指标管理\n业务人员可通过数据源配置、数据集管理、维度管理等功能准备数据,创建根指标、组合指标、派生指标,并进行指标测试和上线管理。\n\n经营分析看板\n支持通过指标配置经营分析看板,包括普通模式和专业模式两种类型,可进行看板配置、公开、授权和查看。",
  104. "filePath": "/data/files/20260313_8f9e7d6c5b4a123123.docx",
  105. "fileSize": 2048000,
  106. "fileType": "docx",
  107. "metadata": {
  108. "file_unique_id": "20260313_8f9e7d6c5b4a1123",
  109. "business_topic": "金融-系统文件",
  110. "document_type": "操作手册",
  111. "belong_department": "产品部",
  112. "tags": ["金融", "企管", "星云", "操作手册"]
  113. }
  114. }' \
  115. "200"
  116. # ========================================================================
  117. # 4. 批量文档入库 - 多类型文档
  118. # ========================================================================
  119. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  120. echo "4. 批量文档入库 - 多类型文档" | tee -a "$RESULTS_FILE"
  121. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  122. test_api "批量入库-3个文档" \
  123. "POST" \
  124. "/api/v1/documents/batch-index" \
  125. '{
  126. "items": [
  127. {
  128. "docId": "contract-loan-test",
  129. "fileName": "贷款合同.pdf",
  130. "fullText": "贷款合同范本,主要条款包括贷款金额、利率、还款方式、担保条款等。甲方为贷款人,乙方为借款人。合同约定贷款金额为人民币500万元,期限36个月,按等额本息方式还款。",
  131. "filePath": "/data/contracts/loan001.pdf",
  132. "fileSize": 512000,
  133. "fileType": "pdf",
  134. "metadata": {
  135. "business_topic": "金融-信贷审批",
  136. "document_type": "贷款合同",
  137. "belong_department": "信贷部",
  138. "tags": ["贷款", "合同", "信贷"]
  139. }
  140. },
  141. {
  142. "docId": "audit-report-001",
  143. "fileName": "审计报告2025.pdf",
  144. "fullText": "年度审计报告,对公司2024年度财务状况进行审计。审计范围包括资产负债表、利润表、现金流量表等财务报表,以及内部控制制度的有效性。审计意见为标准无保留意见。",
  145. "filePath": "/data/reports/audit2025.pdf",
  146. "fileSize": 1024000,
  147. "fileType": "pdf",
  148. "metadata": {
  149. "business_topic": "金融-审计",
  150. "document_type": "审计报告",
  151. "belong_department": "审计部",
  152. "tags": ["审计", "财务", "合规"]
  153. }
  154. },
  155. {
  156. "docId": "risk-assessment-001",
  157. "fileName": "风险评估报告.pdf",
  158. "fullText": "企业风险评估报告,评估了公司的信用风险、市场风险、操作风险和合规风险。根据评估结果,建议加强贷前调查和贷后管理,完善内部控制制度。",
  159. "filePath": "/data/reports/risk001.pdf",
  160. "fileSize": 768000,
  161. "fileType": "pdf",
  162. "metadata": {
  163. "business_topic": "金融-风险管理",
  164. "document_type": "风险评估报告",
  165. "belong_department": "风控部",
  166. "tags": ["风险", "评估", "合规"]
  167. }
  168. }
  169. ]
  170. }' \
  171. "200"
  172. # ========================================================================
  173. # 5. 等待索引
  174. # ========================================================================
  175. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  176. echo "5. 等待文档索引完成..." | tee -a "$RESULTS_FILE"
  177. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  178. sleep 3
  179. # ========================================================================
  180. # 6. 向量搜索测试 - 按语义搜索
  181. # ========================================================================
  182. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  183. echo "6. 向量搜索 - 语义搜索测试" | tee -a "$RESULTS_FILE"
  184. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  185. # 6.1 搜索银行流水相关内容
  186. test_api "向量搜索-银行流水" \
  187. "POST" \
  188. "/api/v1/search" \
  189. '{
  190. "query": "农业银行账户交易明细流水",
  191. "topK": 3
  192. }' \
  193. "200"
  194. # 6.2 搜索操作手册相关内容
  195. test_api "向量搜索-系统操作" \
  196. "POST" \
  197. "/api/v1/search" \
  198. '{
  199. "query": "如何设置用户权限和角色管理",
  200. "topK": 3
  201. }' \
  202. "200"
  203. # 6.3 搜索贷款合同相关内容
  204. test_api "向量搜索-贷款合同" \
  205. "POST" \
  206. "/api/v1/search" \
  207. '{
  208. "query": "贷款合同还款方式和担保条款",
  209. "topK": 3
  210. }' \
  211. "200"
  212. # 6.4 搜索审计相关内容
  213. test_api "向量搜索-审计报告" \
  214. "POST" \
  215. "/api/v1/search" \
  216. '{
  217. "query": "财务审计年度报表",
  218. "topK": 3
  219. }' \
  220. "200"
  221. # 6.5 搜索风控相关内容
  222. test_api "向量搜索-风险评估" \
  223. "POST" \
  224. "/api/v1/search" \
  225. '{
  226. "query": "企业信用风险评估",
  227. "topK": 3
  228. }' \
  229. "200"
  230. # ========================================================================
  231. # 7. 混合搜索测试 - 按业务维度过滤
  232. # ========================================================================
  233. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  234. echo "7. 混合搜索 - 业务维度过滤" | tee -a "$RESULTS_FILE"
  235. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  236. # 7.1 按业务主题过滤 - 信贷审批
  237. test_api "混合搜索-信贷审批主题" \
  238. "POST" \
  239. "/api/v1/search/hybrid" \
  240. '{
  241. "query": "合同条款",
  242. "topK": 5,
  243. "filters": {
  244. "business_topic": "金融-信贷审批"
  245. }
  246. }' \
  247. "200"
  248. # 7.2 按文档类型过滤 - 贷款合同
  249. test_api "混合搜索-贷款合同类型" \
  250. "POST" \
  251. "/api/v1/search/hybrid" \
  252. '{
  253. "query": "贷款",
  254. "topK": 5,
  255. "filters": {
  256. "document_type": "贷款合同"
  257. }
  258. }' \
  259. "200"
  260. # 7.3 按部门过滤 - 风控部
  261. test_api "混合搜索-风控部" \
  262. "POST" \
  263. "/api/v1/search/hybrid" \
  264. '{
  265. "query": "风险",
  266. "topK": 5,
  267. "filters": {
  268. "belong_department": "风控部"
  269. }
  270. }' \
  271. "200"
  272. # 7.4 按标签过滤 - 包含"合规"标签
  273. test_api "混合搜索-合规标签" \
  274. "POST" \
  275. "/api/v1/search/hybrid" \
  276. '{
  277. "query": "报告",
  278. "topK": 5,
  279. "filters": {
  280. "tags": "合规"
  281. }
  282. }' \
  283. "200"
  284. # 7.5 多条件过滤 - 信贷部 + 贷款合同
  285. test_api "混合搜索-多条件过滤" \
  286. "POST" \
  287. "/api/v1/search/hybrid" \
  288. '{
  289. "query": "贷款",
  290. "topK": 5,
  291. "filters": {
  292. "belong_department": "信贷部",
  293. "document_type": "贷款合同"
  294. }
  295. }' \
  296. "200"
  297. # ========================================================================
  298. # 8. 查询文档测试
  299. # ========================================================================
  300. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  301. echo "8. 查询文档测试" | tee -a "$RESULTS_FILE"
  302. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  303. test_api "查询-银行流水" \
  304. "GET" \
  305. "/api/v1/documents/bank-flow-001" \
  306. "" \
  307. "200"
  308. test_api "查询-操作手册" \
  309. "GET" \
  310. "/api/v1/documents/manual-001" \
  311. "" \
  312. "200"
  313. test_api "查询-不存在的文档" \
  314. "GET" \
  315. "/api/v1/documents/not-exist-doc" \
  316. "" \
  317. "200"
  318. # ========================================================================
  319. # 9. 验证返回格式 - metadata是否只包含业务字段
  320. # ========================================================================
  321. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  322. echo "9. 验证返回格式" | tee -a "$RESULTS_FILE"
  323. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  324. echo "验证:搜索结果中metadata是否只包含业务扩展字段..." | tee -a "$RESULTS_FILE"
  325. response=$(curl -s -X POST "$BASE_URL/api/v1/search" \
  326. -H "Content-Type: application/json" \
  327. -d '{"query": "银行", "topK": 1}')
  328. # 检查返回的metadata是否包含公共字段
  329. if echo "$response" | grep -q '"file_path"'; then
  330. echo -e "${RED}FAILED${NC}: metadata中包含公共字段file_path" | tee -a "$RESULTS_FILE"
  331. ((FAILED++))
  332. else
  333. echo -e "${GREEN}PASSED${NC}: metadata中无公共字段" | tee -a "$RESULTS_FILE"
  334. ((PASSED++))
  335. fi
  336. echo "验证:搜索结果是否包含score字段..." | tee -a "$RESULTS_FILE"
  337. if echo "$response" | grep -q '"score"'; then
  338. echo -e "${GREEN}PASSED${NC}: 返回结果包含score" | tee -a "$RESULTS_FILE"
  339. ((PASSED++))
  340. else
  341. echo -e "${RED}FAILED${NC}: 返回结果缺少score" | tee -a "$RESULTS_FILE"
  342. ((FAILED++))
  343. fi
  344. echo "验证:搜索结果是否不包含embedding..." | tee -a "$RESULTS_FILE"
  345. if echo "$response" | grep -q '"embedding"'; then
  346. echo -e "${RED}FAILED${NC}: 返回结果包含embedding" | tee -a "$RESULTS_FILE"
  347. ((FAILED++))
  348. else
  349. echo -e "${GREEN}PASSED${NC}: 返回结果不包含embedding" | tee -a "$RESULTS_FILE"
  350. ((PASSED++))
  351. fi
  352. echo "" | tee -a "$RESULTS_FILE"
  353. # ========================================================================
  354. # 10. 清理测试数据
  355. # ========================================================================
  356. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  357. echo "10. 清理测试数据" | tee -a "$RESULTS_FILE"
  358. echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
  359. test_api "清理-银行流水" \
  360. "DELETE" \
  361. "/api/v1/documents/bank-flow-001" \
  362. "" \
  363. "200"
  364. test_api "清理-操作手册" \
  365. "DELETE" \
  366. "/api/v1/documents/manual-001" \
  367. "" \
  368. "200"
  369. test_api "清理-批量文档" \
  370. "DELETE" \
  371. "/api/v1/documents/contract-loan-test" \
  372. "" \
  373. "200"
  374. test_api "清理-审计报告" \
  375. "DELETE" \
  376. "/api/v1/documents/audit-report-001" \
  377. "" \
  378. "200"
  379. test_api "清理-风险评估" \
  380. "DELETE" \
  381. "/api/v1/documents/risk-assessment-001" \
  382. "" \
  383. "200"
  384. # ========================================================================
  385. # 测试总结
  386. # ========================================================================
  387. echo "========================================" | tee -a "$RESULTS_FILE"
  388. echo " 测试总结" | tee -a "$RESULTS_FILE"
  389. echo "========================================" | tee -a "$RESULTS_FILE"
  390. TOTAL=$((PASSED + FAILED))
  391. echo "总用例数: $TOTAL" | tee -a "$RESULTS_FILE"
  392. echo -e "通过: ${GREEN}$PASSED${NC}" | tee -a "$RESULTS_FILE"
  393. echo -e "失败: ${RED}$FAILED${NC}" | tee -a "$RESULTS_FILE"
  394. if [ $TOTAL -gt 0 ]; then
  395. PASS_RATE=$((PASSED * 100 / TOTAL))
  396. echo "通过率: $PASS_RATE%" | tee -a "$RESULTS_FILE"
  397. fi
  398. echo "" | tee -a "$RESULTS_FILE"
  399. echo "测试内容说明:" | tee -a "$RESULTS_FILE"
  400. echo "1. 入库测试:银行流水、操作手册、批量多类型文档" | tee -a "$RESULTS_FILE"
  401. echo "2. 向量搜索:按语义搜索不同业务内容" | tee -a "$RESULTS_FILE"
  402. echo "3. 混合搜索:按业务主题、文档类型、部门、标签过滤" | tee -a "$RESULTS_FILE"
  403. echo "4. 查询测试:查询存在/不存在文档" | tee -a "$RESULTS_FILE"
  404. echo "5. 格式验证:metadata不包含公共字段、无embedding" | tee -a "$RESULTS_FILE"
  405. echo "" | tee -a "$RESULTS_FILE"
  406. if [ $FAILED -eq 0 ]; then
  407. echo -e "${GREEN}✓ 所有测试通过!${NC}" | tee -a "$RESULTS_FILE"
  408. exit 0
  409. else
  410. echo -e "${RED}✗ 有 $FAILED 个测试失败${NC}" | tee -a "$RESULTS_FILE"
  411. exit 1
  412. fi