#!/bin/bash # schedule-embedding-api 完整功能测试脚本 # 测试场景:基于解析结果示例.md的材料 # 使用说明:确保应用已启动在 http://localhost:8084 BASE_URL="http://localhost:8084" RESULTS_FILE="complete_test_results.txt" PASSED=0 FAILED=0 # 颜色输出 GREEN='\033[0;32m' RED='\033[0;31m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # 清空结果文件 > "$RESULTS_FILE" echo "========================================" | tee -a "$RESULTS_FILE" echo " 完整功能测试 - 解析结果示例数据" | tee -a "$RESULTS_FILE" echo " 测试时间: $(date '+%Y-%m-%d %H:%M:%S')" | tee -a "$RESULTS_FILE" echo "========================================" | tee -a "$RESULTS_FILE" echo "" | tee -a "$RESULTS_FILE" # 测试函数 test_api() { local test_name="$1" local method="$2" local url="$3" local data="$4" local expected_code="$5" echo -n "[$test_name] ... " | tee -a "$RESULTS_FILE" if [ "$method" = "GET" ]; then response=$(curl -s -w "\n%{http_code}" -X GET "$BASE_URL$url") elif [ "$method" = "DELETE" ]; then response=$(curl -s -w "\n%{http_code}" -X DELETE "$BASE_URL$url") elif [ "$method" = "POST" ]; then if [ -z "$data" ]; then response=$(curl -s -w "\n%{http_code}" -X POST "$BASE_URL$url" \ -H "Content-Type: application/json") else response=$(curl -s -w "\n%{http_code}" -X POST "$BASE_URL$url" \ -H "Content-Type: application/json" \ -d "$data") fi fi http_code=$(echo "$response" | tail -n1) body=$(echo "$response" | sed '$d') if [ "$http_code" = "$expected_code" ]; then echo -e "${GREEN}PASSED${NC}" | tee -a "$RESULTS_FILE" ((PASSED++)) else echo -e "${RED}FAILED${NC} (HTTP $http_code)" | tee -a "$RESULTS_FILE" ((FAILED++)) fi # 显示关键结果 if [[ "$body" == *"success"* ]] || [[ "$body" == *"chunkCount"* ]] || [[ "$body" == *"score"* ]]; then echo " -> $body" | tee -a "$RESULTS_FILE" fi echo "" | tee -a "$RESULTS_FILE" } # ======================================================================== # 1. 健康检查 # ======================================================================== echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" echo "1. 健康检查" | tee -a "$RESULTS_FILE" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" test_api "健康检查" "GET" "/actuator/health" "" "200" # ======================================================================== # 2. 单个文档入库 - 银行流水示例 # ======================================================================== echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" echo "2. 单个文档入库 - 银行流水" | tee -a "$RESULTS_FILE" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" test_api "入库-银行流水" \ "POST" \ "/api/v1/documents/index" \ '{ "docId": "bank-flow-001", "fileName": "111111133.png", "fullText": "账户明细\n\n账号:09-75******6775 户名:上海***有限公司 币种:人民币\n\n中国农业银行\n\n明细回单\n\n专用章\n\n日期:2023年01月05日-2023年04月16日\n\n交易时间 | 金额 | 余额 | 交易用途 | 交易对方\n2023/01/05 09:15:00 | 320 | 320 | 水稻销售收入(优质粳稻) | 金穗粮食贸易公司\n2023/01/08 10:45:00 | -150 | 170 | 化肥采购(复合肥) | 丰收农资连锁\n2023/01/12 14:20:00 | 280 | 450 | 蔬菜销售款(西红柿/黄瓜) | 绿源农产品公司\n2023/01/15 11:30:00 | -120 | 330 | 农机维修保养 | 北方农机服务\n2023/01/18 15:50:00 | 180 | 510 | 生猪出栏销售收入 | 双汇食品集团\n2023/01/22 09:25:00 | -80 | 430 | 种子采购(玉米种子) | 中种农业科技\n2023/01/25 13:10:00 | 250 | 680 | 中药材销售(黄芪) | 同仁堂药业\n2023/01/28 16:35:00 | -60 | 620 | 饲料采购 | 正大饲料公司\n2023/02/01 08:40:00 | 150 | 770 | 农资返利结算 | 丰收农资连锁\n2023/02/05 10:15:00 | -90 | 680 | 土地流转费支付 | 村委会\n2023/02/08 14:00:00 | 220 | 900 | 农产品冷链运输服务费 | 顺丰冷链物流\n2023/02/12 11:20:00 | -75 | 825 | 员工薪酬发放 | 员工个人账户\n2023/02/15 15:30:00 | 190 | 1015 | 蔬菜零收款 | 社区生鲜店\n2023/02/18 09:45:00 | -110 | 905 | 农业保险续保 | 太平洋保险\n2023/02/22 13:25:00 | 270 | 1175 | 种粮补贴收入 | 市农业农村局", "filePath": "/data/files/20260313_8f9e7d6c5b4a.png", "fileSize": 102400, "fileType": "png", "metadata": { "file_unique_id": "20260313_8f9e7d6c5b4a", "business_topic": "金融-信贷审批", "document_type": "银行流水", "belong_department": "风控部", "tags": ["信贷", "流水", "合规", "农业"] } }' \ "200" # ======================================================================== # 3. 单个文档入库 - 操作手册 # ======================================================================== echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" echo "3. 单个文档入库 - 操作手册" | tee -a "$RESULTS_FILE" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" test_api "入库-操作手册" \ "POST" \ "/api/v1/documents/index" \ '{ "docId": "manual-001", "fileName": "宇信科技管理数智化星云平台-操作手册.docx", "fullText": "宇信科技管理数智化星云平台 快速入门操作手册\n\n北京宇信科技集团股份有限公司\n\n产品概述\n本平台是针对宇信科技管理数智化星云平台的快速入门手册,涵盖从系统初始操作,建立账号信息,建立个人工作台到进行数据分析的完整流程。\n\n系统初始配置\n系统管理员需对平台进行初始设置,包括:\n1. 公共参数维护:逻辑系统管理、控制点管理、系统参数、数据字典配置\n2. 用户建立及权限划分:机构管理、部门管理、岗位管理、角色管理、用户管理\n3. 应用维护及授权\n4. 业务流程定义\n\n用户权限管理\n系统支持基于角色的权限控制,不同角色拥有不同的功能权限和数据权限。角色类型包括:系统管理人员、业务人员、业务管理人员。\n\n数据准备与指标管理\n业务人员可通过数据源配置、数据集管理、维度管理等功能准备数据,创建根指标、组合指标、派生指标,并进行指标测试和上线管理。\n\n经营分析看板\n支持通过指标配置经营分析看板,包括普通模式和专业模式两种类型,可进行看板配置、公开、授权和查看。", "filePath": "/data/files/20260313_8f9e7d6c5b4a123123.docx", "fileSize": 2048000, "fileType": "docx", "metadata": { "file_unique_id": "20260313_8f9e7d6c5b4a1123", "business_topic": "金融-系统文件", "document_type": "操作手册", "belong_department": "产品部", "tags": ["金融", "企管", "星云", "操作手册"] } }' \ "200" # ======================================================================== # 4. 批量文档入库 - 多类型文档 # ======================================================================== echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" echo "4. 批量文档入库 - 多类型文档" | tee -a "$RESULTS_FILE" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" test_api "批量入库-3个文档" \ "POST" \ "/api/v1/documents/batch-index" \ '{ "items": [ { "docId": "contract-loan-test", "fileName": "贷款合同.pdf", "fullText": "贷款合同范本,主要条款包括贷款金额、利率、还款方式、担保条款等。甲方为贷款人,乙方为借款人。合同约定贷款金额为人民币500万元,期限36个月,按等额本息方式还款。", "filePath": "/data/contracts/loan001.pdf", "fileSize": 512000, "fileType": "pdf", "metadata": { "business_topic": "金融-信贷审批", "document_type": "贷款合同", "belong_department": "信贷部", "tags": ["贷款", "合同", "信贷"] } }, { "docId": "audit-report-001", "fileName": "审计报告2025.pdf", "fullText": "年度审计报告,对公司2024年度财务状况进行审计。审计范围包括资产负债表、利润表、现金流量表等财务报表,以及内部控制制度的有效性。审计意见为标准无保留意见。", "filePath": "/data/reports/audit2025.pdf", "fileSize": 1024000, "fileType": "pdf", "metadata": { "business_topic": "金融-审计", "document_type": "审计报告", "belong_department": "审计部", "tags": ["审计", "财务", "合规"] } }, { "docId": "risk-assessment-001", "fileName": "风险评估报告.pdf", "fullText": "企业风险评估报告,评估了公司的信用风险、市场风险、操作风险和合规风险。根据评估结果,建议加强贷前调查和贷后管理,完善内部控制制度。", "filePath": "/data/reports/risk001.pdf", "fileSize": 768000, "fileType": "pdf", "metadata": { "business_topic": "金融-风险管理", "document_type": "风险评估报告", "belong_department": "风控部", "tags": ["风险", "评估", "合规"] } } ] }' \ "200" # ======================================================================== # 5. 等待索引 # ======================================================================== echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" echo "5. 等待文档索引完成..." | tee -a "$RESULTS_FILE" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" sleep 3 # ======================================================================== # 6. 向量搜索测试 - 按语义搜索 # ======================================================================== echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" echo "6. 向量搜索 - 语义搜索测试" | tee -a "$RESULTS_FILE" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" # 6.1 搜索银行流水相关内容 test_api "向量搜索-银行流水" \ "POST" \ "/api/v1/search" \ '{ "query": "农业银行账户交易明细流水", "topK": 3 }' \ "200" # 6.2 搜索操作手册相关内容 test_api "向量搜索-系统操作" \ "POST" \ "/api/v1/search" \ '{ "query": "如何设置用户权限和角色管理", "topK": 3 }' \ "200" # 6.3 搜索贷款合同相关内容 test_api "向量搜索-贷款合同" \ "POST" \ "/api/v1/search" \ '{ "query": "贷款合同还款方式和担保条款", "topK": 3 }' \ "200" # 6.4 搜索审计相关内容 test_api "向量搜索-审计报告" \ "POST" \ "/api/v1/search" \ '{ "query": "财务审计年度报表", "topK": 3 }' \ "200" # 6.5 搜索风控相关内容 test_api "向量搜索-风险评估" \ "POST" \ "/api/v1/search" \ '{ "query": "企业信用风险评估", "topK": 3 }' \ "200" # ======================================================================== # 7. 混合搜索测试 - 按业务维度过滤 # ======================================================================== echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" echo "7. 混合搜索 - 业务维度过滤" | tee -a "$RESULTS_FILE" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" # 7.1 按业务主题过滤 - 信贷审批 test_api "混合搜索-信贷审批主题" \ "POST" \ "/api/v1/search/hybrid" \ '{ "query": "合同条款", "topK": 5, "filters": { "business_topic": "金融-信贷审批" } }' \ "200" # 7.2 按文档类型过滤 - 贷款合同 test_api "混合搜索-贷款合同类型" \ "POST" \ "/api/v1/search/hybrid" \ '{ "query": "贷款", "topK": 5, "filters": { "document_type": "贷款合同" } }' \ "200" # 7.3 按部门过滤 - 风控部 test_api "混合搜索-风控部" \ "POST" \ "/api/v1/search/hybrid" \ '{ "query": "风险", "topK": 5, "filters": { "belong_department": "风控部" } }' \ "200" # 7.4 按标签过滤 - 包含"合规"标签 test_api "混合搜索-合规标签" \ "POST" \ "/api/v1/search/hybrid" \ '{ "query": "报告", "topK": 5, "filters": { "tags": "合规" } }' \ "200" # 7.5 多条件过滤 - 信贷部 + 贷款合同 test_api "混合搜索-多条件过滤" \ "POST" \ "/api/v1/search/hybrid" \ '{ "query": "贷款", "topK": 5, "filters": { "belong_department": "信贷部", "document_type": "贷款合同" } }' \ "200" # ======================================================================== # 8. 查询文档测试 # ======================================================================== echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" echo "8. 查询文档测试" | tee -a "$RESULTS_FILE" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" test_api "查询-银行流水" \ "GET" \ "/api/v1/documents/bank-flow-001" \ "" \ "200" test_api "查询-操作手册" \ "GET" \ "/api/v1/documents/manual-001" \ "" \ "200" test_api "查询-不存在的文档" \ "GET" \ "/api/v1/documents/not-exist-doc" \ "" \ "200" # ======================================================================== # 9. 验证返回格式 - metadata是否只包含业务字段 # ======================================================================== echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" echo "9. 验证返回格式" | tee -a "$RESULTS_FILE" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" echo "验证:搜索结果中metadata是否只包含业务扩展字段..." | tee -a "$RESULTS_FILE" response=$(curl -s -X POST "$BASE_URL/api/v1/search" \ -H "Content-Type: application/json" \ -d '{"query": "银行", "topK": 1}') # 检查返回的metadata是否包含公共字段 if echo "$response" | grep -q '"file_path"'; then echo -e "${RED}FAILED${NC}: metadata中包含公共字段file_path" | tee -a "$RESULTS_FILE" ((FAILED++)) else echo -e "${GREEN}PASSED${NC}: metadata中无公共字段" | tee -a "$RESULTS_FILE" ((PASSED++)) fi echo "验证:搜索结果是否包含score字段..." | tee -a "$RESULTS_FILE" if echo "$response" | grep -q '"score"'; then echo -e "${GREEN}PASSED${NC}: 返回结果包含score" | tee -a "$RESULTS_FILE" ((PASSED++)) else echo -e "${RED}FAILED${NC}: 返回结果缺少score" | tee -a "$RESULTS_FILE" ((FAILED++)) fi echo "验证:搜索结果是否不包含embedding..." | tee -a "$RESULTS_FILE" if echo "$response" | grep -q '"embedding"'; then echo -e "${RED}FAILED${NC}: 返回结果包含embedding" | tee -a "$RESULTS_FILE" ((FAILED++)) else echo -e "${GREEN}PASSED${NC}: 返回结果不包含embedding" | tee -a "$RESULTS_FILE" ((PASSED++)) fi echo "" | tee -a "$RESULTS_FILE" # ======================================================================== # 10. 清理测试数据 # ======================================================================== echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" echo "10. 清理测试数据" | tee -a "$RESULTS_FILE" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE" test_api "清理-银行流水" \ "DELETE" \ "/api/v1/documents/bank-flow-001" \ "" \ "200" test_api "清理-操作手册" \ "DELETE" \ "/api/v1/documents/manual-001" \ "" \ "200" test_api "清理-批量文档" \ "DELETE" \ "/api/v1/documents/contract-loan-test" \ "" \ "200" test_api "清理-审计报告" \ "DELETE" \ "/api/v1/documents/audit-report-001" \ "" \ "200" test_api "清理-风险评估" \ "DELETE" \ "/api/v1/documents/risk-assessment-001" \ "" \ "200" # ======================================================================== # 测试总结 # ======================================================================== echo "========================================" | tee -a "$RESULTS_FILE" echo " 测试总结" | tee -a "$RESULTS_FILE" echo "========================================" | tee -a "$RESULTS_FILE" TOTAL=$((PASSED + FAILED)) echo "总用例数: $TOTAL" | tee -a "$RESULTS_FILE" echo -e "通过: ${GREEN}$PASSED${NC}" | tee -a "$RESULTS_FILE" echo -e "失败: ${RED}$FAILED${NC}" | tee -a "$RESULTS_FILE" if [ $TOTAL -gt 0 ]; then PASS_RATE=$((PASSED * 100 / TOTAL)) echo "通过率: $PASS_RATE%" | tee -a "$RESULTS_FILE" fi echo "" | tee -a "$RESULTS_FILE" echo "测试内容说明:" | tee -a "$RESULTS_FILE" echo "1. 入库测试:银行流水、操作手册、批量多类型文档" | tee -a "$RESULTS_FILE" echo "2. 向量搜索:按语义搜索不同业务内容" | tee -a "$RESULTS_FILE" echo "3. 混合搜索:按业务主题、文档类型、部门、标签过滤" | tee -a "$RESULTS_FILE" echo "4. 查询测试:查询存在/不存在文档" | tee -a "$RESULTS_FILE" echo "5. 格式验证:metadata不包含公共字段、无embedding" | tee -a "$RESULTS_FILE" echo "" | tee -a "$RESULTS_FILE" if [ $FAILED -eq 0 ]; then echo -e "${GREEN}✓ 所有测试通过!${NC}" | tee -a "$RESULTS_FILE" exit 0 else echo -e "${RED}✗ 有 $FAILED 个测试失败${NC}" | tee -a "$RESULTS_FILE" exit 1 fi