| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464 |
- #!/bin/bash
- # schedule-embedding-api 完整功能测试脚本
- # 测试场景:基于解析结果示例.md的材料
- # 使用说明:确保应用已启动在 http://localhost:8084
- BASE_URL="http://localhost:8084"
- RESULTS_FILE="complete_test_results.txt"
- PASSED=0
- FAILED=0
- # 颜色输出
- GREEN='\033[0;32m'
- RED='\033[0;31m'
- YELLOW='\033[1;33m'
- BLUE='\033[0;34m'
- NC='\033[0m'
- # 清空结果文件
- > "$RESULTS_FILE"
- echo "========================================" | tee -a "$RESULTS_FILE"
- echo " 完整功能测试 - 解析结果示例数据" | tee -a "$RESULTS_FILE"
- echo " 测试时间: $(date '+%Y-%m-%d %H:%M:%S')" | tee -a "$RESULTS_FILE"
- echo "========================================" | tee -a "$RESULTS_FILE"
- echo "" | tee -a "$RESULTS_FILE"
- # 测试函数
- test_api() {
- local test_name="$1"
- local method="$2"
- local url="$3"
- local data="$4"
- local expected_code="$5"
- echo -n "[$test_name] ... " | tee -a "$RESULTS_FILE"
- if [ "$method" = "GET" ]; then
- response=$(curl -s -w "\n%{http_code}" -X GET "$BASE_URL$url")
- elif [ "$method" = "DELETE" ]; then
- response=$(curl -s -w "\n%{http_code}" -X DELETE "$BASE_URL$url")
- elif [ "$method" = "POST" ]; then
- if [ -z "$data" ]; then
- response=$(curl -s -w "\n%{http_code}" -X POST "$BASE_URL$url" \
- -H "Content-Type: application/json")
- else
- response=$(curl -s -w "\n%{http_code}" -X POST "$BASE_URL$url" \
- -H "Content-Type: application/json" \
- -d "$data")
- fi
- fi
- http_code=$(echo "$response" | tail -n1)
- body=$(echo "$response" | sed '$d')
- if [ "$http_code" = "$expected_code" ]; then
- echo -e "${GREEN}PASSED${NC}" | tee -a "$RESULTS_FILE"
- ((PASSED++))
- else
- echo -e "${RED}FAILED${NC} (HTTP $http_code)" | tee -a "$RESULTS_FILE"
- ((FAILED++))
- fi
- # 显示关键结果
- if [[ "$body" == *"success"* ]] || [[ "$body" == *"chunkCount"* ]] || [[ "$body" == *"score"* ]]; then
- echo " -> $body" | tee -a "$RESULTS_FILE"
- fi
- echo "" | tee -a "$RESULTS_FILE"
- }
- # ========================================================================
- # 1. 健康检查
- # ========================================================================
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- echo "1. 健康检查" | tee -a "$RESULTS_FILE"
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- test_api "健康检查" "GET" "/actuator/health" "" "200"
- # ========================================================================
- # 2. 单个文档入库 - 银行流水示例
- # ========================================================================
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- echo "2. 单个文档入库 - 银行流水" | tee -a "$RESULTS_FILE"
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- test_api "入库-银行流水" \
- "POST" \
- "/api/v1/documents/index" \
- '{
- "docId": "bank-flow-001",
- "fileName": "111111133.png",
- "fullText": "账户明细\n\n账号:09-75******6775 户名:上海***有限公司 币种:人民币\n\n中国农业银行\n\n明细回单\n\n专用章\n\n日期:2023年01月05日-2023年04月16日\n\n交易时间 | 金额 | 余额 | 交易用途 | 交易对方\n2023/01/05 09:15:00 | 320 | 320 | 水稻销售收入(优质粳稻) | 金穗粮食贸易公司\n2023/01/08 10:45:00 | -150 | 170 | 化肥采购(复合肥) | 丰收农资连锁\n2023/01/12 14:20:00 | 280 | 450 | 蔬菜销售款(西红柿/黄瓜) | 绿源农产品公司\n2023/01/15 11:30:00 | -120 | 330 | 农机维修保养 | 北方农机服务\n2023/01/18 15:50:00 | 180 | 510 | 生猪出栏销售收入 | 双汇食品集团\n2023/01/22 09:25:00 | -80 | 430 | 种子采购(玉米种子) | 中种农业科技\n2023/01/25 13:10:00 | 250 | 680 | 中药材销售(黄芪) | 同仁堂药业\n2023/01/28 16:35:00 | -60 | 620 | 饲料采购 | 正大饲料公司\n2023/02/01 08:40:00 | 150 | 770 | 农资返利结算 | 丰收农资连锁\n2023/02/05 10:15:00 | -90 | 680 | 土地流转费支付 | 村委会\n2023/02/08 14:00:00 | 220 | 900 | 农产品冷链运输服务费 | 顺丰冷链物流\n2023/02/12 11:20:00 | -75 | 825 | 员工薪酬发放 | 员工个人账户\n2023/02/15 15:30:00 | 190 | 1015 | 蔬菜零收款 | 社区生鲜店\n2023/02/18 09:45:00 | -110 | 905 | 农业保险续保 | 太平洋保险\n2023/02/22 13:25:00 | 270 | 1175 | 种粮补贴收入 | 市农业农村局",
- "filePath": "/data/files/20260313_8f9e7d6c5b4a.png",
- "fileSize": 102400,
- "fileType": "png",
- "metadata": {
- "file_unique_id": "20260313_8f9e7d6c5b4a",
- "business_topic": "金融-信贷审批",
- "document_type": "银行流水",
- "belong_department": "风控部",
- "tags": ["信贷", "流水", "合规", "农业"]
- }
- }' \
- "200"
- # ========================================================================
- # 3. 单个文档入库 - 操作手册
- # ========================================================================
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- echo "3. 单个文档入库 - 操作手册" | tee -a "$RESULTS_FILE"
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- test_api "入库-操作手册" \
- "POST" \
- "/api/v1/documents/index" \
- '{
- "docId": "manual-001",
- "fileName": "宇信科技管理数智化星云平台-操作手册.docx",
- "fullText": "宇信科技管理数智化星云平台 快速入门操作手册\n\n北京宇信科技集团股份有限公司\n\n产品概述\n本平台是针对宇信科技管理数智化星云平台的快速入门手册,涵盖从系统初始操作,建立账号信息,建立个人工作台到进行数据分析的完整流程。\n\n系统初始配置\n系统管理员需对平台进行初始设置,包括:\n1. 公共参数维护:逻辑系统管理、控制点管理、系统参数、数据字典配置\n2. 用户建立及权限划分:机构管理、部门管理、岗位管理、角色管理、用户管理\n3. 应用维护及授权\n4. 业务流程定义\n\n用户权限管理\n系统支持基于角色的权限控制,不同角色拥有不同的功能权限和数据权限。角色类型包括:系统管理人员、业务人员、业务管理人员。\n\n数据准备与指标管理\n业务人员可通过数据源配置、数据集管理、维度管理等功能准备数据,创建根指标、组合指标、派生指标,并进行指标测试和上线管理。\n\n经营分析看板\n支持通过指标配置经营分析看板,包括普通模式和专业模式两种类型,可进行看板配置、公开、授权和查看。",
- "filePath": "/data/files/20260313_8f9e7d6c5b4a123123.docx",
- "fileSize": 2048000,
- "fileType": "docx",
- "metadata": {
- "file_unique_id": "20260313_8f9e7d6c5b4a1123",
- "business_topic": "金融-系统文件",
- "document_type": "操作手册",
- "belong_department": "产品部",
- "tags": ["金融", "企管", "星云", "操作手册"]
- }
- }' \
- "200"
- # ========================================================================
- # 4. 批量文档入库 - 多类型文档
- # ========================================================================
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- echo "4. 批量文档入库 - 多类型文档" | tee -a "$RESULTS_FILE"
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- test_api "批量入库-3个文档" \
- "POST" \
- "/api/v1/documents/batch-index" \
- '{
- "items": [
- {
- "docId": "contract-loan-test",
- "fileName": "贷款合同.pdf",
- "fullText": "贷款合同范本,主要条款包括贷款金额、利率、还款方式、担保条款等。甲方为贷款人,乙方为借款人。合同约定贷款金额为人民币500万元,期限36个月,按等额本息方式还款。",
- "filePath": "/data/contracts/loan001.pdf",
- "fileSize": 512000,
- "fileType": "pdf",
- "metadata": {
- "business_topic": "金融-信贷审批",
- "document_type": "贷款合同",
- "belong_department": "信贷部",
- "tags": ["贷款", "合同", "信贷"]
- }
- },
- {
- "docId": "audit-report-001",
- "fileName": "审计报告2025.pdf",
- "fullText": "年度审计报告,对公司2024年度财务状况进行审计。审计范围包括资产负债表、利润表、现金流量表等财务报表,以及内部控制制度的有效性。审计意见为标准无保留意见。",
- "filePath": "/data/reports/audit2025.pdf",
- "fileSize": 1024000,
- "fileType": "pdf",
- "metadata": {
- "business_topic": "金融-审计",
- "document_type": "审计报告",
- "belong_department": "审计部",
- "tags": ["审计", "财务", "合规"]
- }
- },
- {
- "docId": "risk-assessment-001",
- "fileName": "风险评估报告.pdf",
- "fullText": "企业风险评估报告,评估了公司的信用风险、市场风险、操作风险和合规风险。根据评估结果,建议加强贷前调查和贷后管理,完善内部控制制度。",
- "filePath": "/data/reports/risk001.pdf",
- "fileSize": 768000,
- "fileType": "pdf",
- "metadata": {
- "business_topic": "金融-风险管理",
- "document_type": "风险评估报告",
- "belong_department": "风控部",
- "tags": ["风险", "评估", "合规"]
- }
- }
- ]
- }' \
- "200"
- # ========================================================================
- # 5. 等待索引
- # ========================================================================
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- echo "5. 等待文档索引完成..." | tee -a "$RESULTS_FILE"
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- sleep 3
- # ========================================================================
- # 6. 向量搜索测试 - 按语义搜索
- # ========================================================================
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- echo "6. 向量搜索 - 语义搜索测试" | tee -a "$RESULTS_FILE"
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- # 6.1 搜索银行流水相关内容
- test_api "向量搜索-银行流水" \
- "POST" \
- "/api/v1/search" \
- '{
- "query": "农业银行账户交易明细流水",
- "topK": 3
- }' \
- "200"
- # 6.2 搜索操作手册相关内容
- test_api "向量搜索-系统操作" \
- "POST" \
- "/api/v1/search" \
- '{
- "query": "如何设置用户权限和角色管理",
- "topK": 3
- }' \
- "200"
- # 6.3 搜索贷款合同相关内容
- test_api "向量搜索-贷款合同" \
- "POST" \
- "/api/v1/search" \
- '{
- "query": "贷款合同还款方式和担保条款",
- "topK": 3
- }' \
- "200"
- # 6.4 搜索审计相关内容
- test_api "向量搜索-审计报告" \
- "POST" \
- "/api/v1/search" \
- '{
- "query": "财务审计年度报表",
- "topK": 3
- }' \
- "200"
- # 6.5 搜索风控相关内容
- test_api "向量搜索-风险评估" \
- "POST" \
- "/api/v1/search" \
- '{
- "query": "企业信用风险评估",
- "topK": 3
- }' \
- "200"
- # ========================================================================
- # 7. 混合搜索测试 - 按业务维度过滤
- # ========================================================================
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- echo "7. 混合搜索 - 业务维度过滤" | tee -a "$RESULTS_FILE"
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- # 7.1 按业务主题过滤 - 信贷审批
- test_api "混合搜索-信贷审批主题" \
- "POST" \
- "/api/v1/search/hybrid" \
- '{
- "query": "合同条款",
- "topK": 5,
- "filters": {
- "business_topic": "金融-信贷审批"
- }
- }' \
- "200"
- # 7.2 按文档类型过滤 - 贷款合同
- test_api "混合搜索-贷款合同类型" \
- "POST" \
- "/api/v1/search/hybrid" \
- '{
- "query": "贷款",
- "topK": 5,
- "filters": {
- "document_type": "贷款合同"
- }
- }' \
- "200"
- # 7.3 按部门过滤 - 风控部
- test_api "混合搜索-风控部" \
- "POST" \
- "/api/v1/search/hybrid" \
- '{
- "query": "风险",
- "topK": 5,
- "filters": {
- "belong_department": "风控部"
- }
- }' \
- "200"
- # 7.4 按标签过滤 - 包含"合规"标签
- test_api "混合搜索-合规标签" \
- "POST" \
- "/api/v1/search/hybrid" \
- '{
- "query": "报告",
- "topK": 5,
- "filters": {
- "tags": "合规"
- }
- }' \
- "200"
- # 7.5 多条件过滤 - 信贷部 + 贷款合同
- test_api "混合搜索-多条件过滤" \
- "POST" \
- "/api/v1/search/hybrid" \
- '{
- "query": "贷款",
- "topK": 5,
- "filters": {
- "belong_department": "信贷部",
- "document_type": "贷款合同"
- }
- }' \
- "200"
- # ========================================================================
- # 8. 查询文档测试
- # ========================================================================
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- echo "8. 查询文档测试" | tee -a "$RESULTS_FILE"
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- test_api "查询-银行流水" \
- "GET" \
- "/api/v1/documents/bank-flow-001" \
- "" \
- "200"
- test_api "查询-操作手册" \
- "GET" \
- "/api/v1/documents/manual-001" \
- "" \
- "200"
- test_api "查询-不存在的文档" \
- "GET" \
- "/api/v1/documents/not-exist-doc" \
- "" \
- "200"
- # ========================================================================
- # 9. 验证返回格式 - metadata是否只包含业务字段
- # ========================================================================
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- echo "9. 验证返回格式" | tee -a "$RESULTS_FILE"
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- echo "验证:搜索结果中metadata是否只包含业务扩展字段..." | tee -a "$RESULTS_FILE"
- response=$(curl -s -X POST "$BASE_URL/api/v1/search" \
- -H "Content-Type: application/json" \
- -d '{"query": "银行", "topK": 1}')
- # 检查返回的metadata是否包含公共字段
- if echo "$response" | grep -q '"file_path"'; then
- echo -e "${RED}FAILED${NC}: metadata中包含公共字段file_path" | tee -a "$RESULTS_FILE"
- ((FAILED++))
- else
- echo -e "${GREEN}PASSED${NC}: metadata中无公共字段" | tee -a "$RESULTS_FILE"
- ((PASSED++))
- fi
- echo "验证:搜索结果是否包含score字段..." | tee -a "$RESULTS_FILE"
- if echo "$response" | grep -q '"score"'; then
- echo -e "${GREEN}PASSED${NC}: 返回结果包含score" | tee -a "$RESULTS_FILE"
- ((PASSED++))
- else
- echo -e "${RED}FAILED${NC}: 返回结果缺少score" | tee -a "$RESULTS_FILE"
- ((FAILED++))
- fi
- echo "验证:搜索结果是否不包含embedding..." | tee -a "$RESULTS_FILE"
- if echo "$response" | grep -q '"embedding"'; then
- echo -e "${RED}FAILED${NC}: 返回结果包含embedding" | tee -a "$RESULTS_FILE"
- ((FAILED++))
- else
- echo -e "${GREEN}PASSED${NC}: 返回结果不包含embedding" | tee -a "$RESULTS_FILE"
- ((PASSED++))
- fi
- echo "" | tee -a "$RESULTS_FILE"
- # ========================================================================
- # 10. 清理测试数据
- # ========================================================================
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- echo "10. 清理测试数据" | tee -a "$RESULTS_FILE"
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" | tee -a "$RESULTS_FILE"
- test_api "清理-银行流水" \
- "DELETE" \
- "/api/v1/documents/bank-flow-001" \
- "" \
- "200"
- test_api "清理-操作手册" \
- "DELETE" \
- "/api/v1/documents/manual-001" \
- "" \
- "200"
- test_api "清理-批量文档" \
- "DELETE" \
- "/api/v1/documents/contract-loan-test" \
- "" \
- "200"
- test_api "清理-审计报告" \
- "DELETE" \
- "/api/v1/documents/audit-report-001" \
- "" \
- "200"
- test_api "清理-风险评估" \
- "DELETE" \
- "/api/v1/documents/risk-assessment-001" \
- "" \
- "200"
- # ========================================================================
- # 测试总结
- # ========================================================================
- echo "========================================" | tee -a "$RESULTS_FILE"
- echo " 测试总结" | tee -a "$RESULTS_FILE"
- echo "========================================" | tee -a "$RESULTS_FILE"
- TOTAL=$((PASSED + FAILED))
- echo "总用例数: $TOTAL" | tee -a "$RESULTS_FILE"
- echo -e "通过: ${GREEN}$PASSED${NC}" | tee -a "$RESULTS_FILE"
- echo -e "失败: ${RED}$FAILED${NC}" | tee -a "$RESULTS_FILE"
- if [ $TOTAL -gt 0 ]; then
- PASS_RATE=$((PASSED * 100 / TOTAL))
- echo "通过率: $PASS_RATE%" | tee -a "$RESULTS_FILE"
- fi
- echo "" | tee -a "$RESULTS_FILE"
- echo "测试内容说明:" | tee -a "$RESULTS_FILE"
- echo "1. 入库测试:银行流水、操作手册、批量多类型文档" | tee -a "$RESULTS_FILE"
- echo "2. 向量搜索:按语义搜索不同业务内容" | tee -a "$RESULTS_FILE"
- echo "3. 混合搜索:按业务主题、文档类型、部门、标签过滤" | tee -a "$RESULTS_FILE"
- echo "4. 查询测试:查询存在/不存在文档" | tee -a "$RESULTS_FILE"
- echo "5. 格式验证:metadata不包含公共字段、无embedding" | tee -a "$RESULTS_FILE"
- echo "" | tee -a "$RESULTS_FILE"
- if [ $FAILED -eq 0 ]; then
- echo -e "${GREEN}✓ 所有测试通过!${NC}" | tee -a "$RESULTS_FILE"
- exit 0
- else
- echo -e "${RED}✗ 有 $FAILED 个测试失败${NC}" | tee -a "$RESULTS_FILE"
- exit 1
- fi
|