SHA1
--- a/ocr_tools/daemons/glmocr_vllm_daemon.sh
+++ b/ocr_tools/daemons/glmocr_vllm_daemon.sh
@@ -0,0 +1,359 @@
 
				+#!/bin/bash
			
 
				+# filepath: ocr_platform/ocr_tools/daemons/glmocr_vllm_daemon.sh
			
 
				+# 对应: GLM-OCR 自部署 vLLM 服务，SDK 自托管模式 (config: maas.enabled=false, ocr_api → 本服务)
			
 
				+
			
 
				+# 保证transformers与vllm的兼容性, 注意先后次序，vllm安装后的transformers需重新安装
			
 
				+# uv pip install -U vllm --torch-backend=auto --extra-index-url https://wheels.vllm.ai/nightly
			
 
				+# uv pip install -U transformers
			
 
				+# GLM-OCR vLLM 服务守护进程脚本
			
 
				+
			
 
				+LOGDIR="/home/ubuntu/zhch/logs"
			
 
				+mkdir -p $LOGDIR
			
 
				+PIDFILE="$LOGDIR/glmocr_vllm.pid"
			
 
				+LOGFILE="$LOGDIR/glmocr_vllm.log"
			
 
				+
			
 
				+# 配置参数
			
 
				+CONDA_ENV="mineru_2_7_1"
			
 
				+PORT="20036"
			
 
				+HOST="0.0.0.0"
			
 
				+# 本地模型目录（与 config-zhch.yaml 自托管模式 ocr_api.api_port 一致）
			
 
				+MODEL_PATH="/home/ubuntu/models/modelscope_cache/models/ZhipuAI/GLM-OCR"
			
 
				+# 也可使用 HuggingFace 模型 id: zai-org/GLM-OCR
			
 
				+SERVED_MODEL_NAME="glm-ocr"
			
 
				+ALLOWED_LOCAL_MEDIA_PATH="/"
			
 
				+
			
 
				+# GPU 配置
			
 
				+GPU_MEMORY_UTILIZATION="0.7"
			
 
				+CUDA_VISIBLE_DEVICES="7"
			
 
				+# 可选：开启 MTP 推测解码以提升推理性能
			
 
				+ENABLE_MTP="0"
			
 
				+MTP_NUM_SPECULATIVE_TOKENS="1"
			
 
				+
			
 
				+# 环境变量（按需取消注释）
			
 
				+# export HF_HOME="/home/ubuntu/models/hf_home"
			
 
				+# export HF_ENDPOINT="https://hf-mirror.com"
			
 
				+# export MODELSCOPE_CACHE="/home/ubuntu/models/modelscope_cache"
			
 
				+
			
 
				+# 正确初始化和激活 conda 环境
			
 
				+if [ -f "/home/ubuntu/anaconda3/etc/profile.d/conda.sh" ]; then
			
 
				+    source /home/ubuntu/anaconda3/etc/profile.d/conda.sh
			
 
				+    conda activate $CONDA_ENV
			
 
				+elif [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then
			
 
				+    source /opt/conda/etc/profile.d/conda.sh
			
 
				+    conda activate $CONDA_ENV
			
 
				+else
			
 
				+    echo "Warning: Using direct conda path activation"
			
 
				+    export PATH="/home/ubuntu/anaconda3/envs/$CONDA_ENV/bin:$PATH"
			
 
				+fi
			
 
				+
			
 
				+start() {
			
 
				+    if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
			
 
				+        echo "GLM-OCR vLLM is already running"
			
 
				+        return 1
			
 
				+    fi
			
 
				+
			
 
				+    echo "Starting GLM-OCR vLLM daemon..."
			
 
				+    echo "Host: $HOST, Port: $PORT"
			
 
				+    echo "Model path: $MODEL_PATH"
			
 
				+    echo "Served model name: $SERVED_MODEL_NAME"
			
 
				+    echo "GPU memory utilization: $GPU_MEMORY_UTILIZATION"
			
 
				+    echo "CUDA devices: $CUDA_VISIBLE_DEVICES"
			
 
				+
			
 
				+    # 检查模型：本地路径需存在目录，HuggingFace id (含 / 且非绝对路径) 不检查
			
 
				+    if [[ "$MODEL_PATH" == /* ]]; then
			
 
				+        if [ ! -d "$MODEL_PATH" ]; then
			
 
				+            echo "❌ Model path not found: $MODEL_PATH"
			
 
				+            echo "Use a local path or HuggingFace id (e.g. zai-org/GLM-OCR). Edit MODEL_PATH in this script."
			
 
				+            return 1
			
 
				+        fi
			
 
				+    fi
			
 
				+
			
 
				+    # 检查 Python / vLLM
			
 
				+    if ! command -v python >/dev/null 2>&1; then
			
 
				+        echo "❌ Python not found. Check conda environment activation."
			
 
				+        return 1
			
 
				+    fi
			
 
				+
			
 
				+    if ! python -c "import vllm" 2>/dev/null; then
			
 
				+        echo "❌ vllm not found. Install: uv pip install -U vllm --torch-backend=auto --extra-index-url https://wheels.vllm.ai/nightly"
			
 
				+        return 1
			
 
				+    fi
			
 
				+
			
 
				+    echo "🔧 Using Python: $(which python)"
			
 
				+    echo "🔧 vLLM: $(python -c 'import vllm; print(vllm.__file__)' 2>/dev/null || true)"
			
 
				+
			
 
				+    echo "📊 GPU 状态检查:"
			
 
				+    if command -v nvidia-smi >/dev/null 2>&1; then
			
 
				+        nvidia-smi --query-gpu=index,name,memory.used,memory.total --format=csv,noheader,nounits | \
			
 
				+        awk -F',' '{printf "  GPU %s: %s - 内存: %sMB/%sMB\n", $1, $2, $3, $4}'
			
 
				+    else
			
 
				+        echo "⚠️  nvidia-smi not available"
			
 
				+    fi
			
 
				+
			
 
				+    # 构建 vllm serve 参数
			
 
				+    if [ "$ENABLE_MTP" = "1" ]; then
			
 
				+        CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES TRANSFORMERS_USE_FAST=false nohup vllm serve "$MODEL_PATH" \
			
 
				+            --host $HOST \
			
 
				+            --port $PORT \
			
 
				+            --allowed-local-media-path $ALLOWED_LOCAL_MEDIA_PATH \
			
 
				+            --served-model-name $SERVED_MODEL_NAME \
			
 
				+            --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
			
 
				+            --speculative-config "{\"method\": \"mtp\", \"num_speculative_tokens\": $MTP_NUM_SPECULATIVE_TOKENS}" \
			
 
				+            > $LOGFILE 2>&1 &
			
 
				+    else
			
 
				+        CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES TRANSFORMERS_USE_FAST=false nohup vllm serve "$MODEL_PATH" \
			
 
				+            --host $HOST \
			
 
				+            --port $PORT \
			
 
				+            --allowed-local-media-path $ALLOWED_LOCAL_MEDIA_PATH \
			
 
				+            --served-model-name $SERVED_MODEL_NAME \
			
 
				+            --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
			
 
				+            > $LOGFILE 2>&1 &
			
 
				+    fi
			
 
				+
			
 
				+    echo $! > $PIDFILE
			
 
				+    echo "✅ GLM-OCR vLLM started with PID: $(cat $PIDFILE)"
			
 
				+    echo "📋 Log file: $LOGFILE"
			
 
				+    echo "🌐 Service URL: http://$HOST:$PORT"
			
 
				+    echo "📖 OpenAI-compatible API: http://localhost:$PORT/v1 (chat/completions, models)"
			
 
				+    echo ""
			
 
				+    echo "Waiting for service to start..."
			
 
				+    sleep 5
			
 
				+    status
			
 
				+}
			
 
				+
			
 
				+stop() {
			
 
				+    if [ ! -f $PIDFILE ]; then
			
 
				+        echo "GLM-OCR vLLM is not running"
			
 
				+        return 1
			
 
				+    fi
			
 
				+
			
 
				+    PID=$(cat $PIDFILE)
			
 
				+    echo "Stopping GLM-OCR vLLM (PID: $PID)..."
			
 
				+
			
 
				+    kill $PID
			
 
				+
			
 
				+    for i in {1..30}; do
			
 
				+        if ! kill -0 $PID 2>/dev/null; then
			
 
				+            break
			
 
				+        fi
			
 
				+        echo "Waiting for process to stop... ($i/30)"
			
 
				+        sleep 1
			
 
				+    done
			
 
				+
			
 
				+    if kill -0 $PID 2>/dev/null; then
			
 
				+        echo "Force killing process..."
			
 
				+        kill -9 $PID
			
 
				+    fi
			
 
				+
			
 
				+    rm -f $PIDFILE
			
 
				+    echo "✅ GLM-OCR vLLM stopped"
			
 
				+}
			
 
				+
			
 
				+status() {
			
 
				+    if [ -f $PIDFILE ] && kill -0 $(cat $PIDFILE) 2>/dev/null; then
			
 
				+        PID=$(cat $PIDFILE)
			
 
				+        echo "✅ GLM-OCR vLLM is running (PID: $PID)"
			
 
				+        echo "🌐 Service URL: http://$HOST:$PORT"
			
 
				+        echo "📋 Log file: $LOGFILE"
			
 
				+
			
 
				+        if command -v ss >/dev/null 2>&1; then
			
 
				+            if ss -tuln | grep -q ":$PORT "; then
			
 
				+                echo "🔗 Port $PORT is being listened"
			
 
				+            else
			
 
				+                echo "⚠️  Port $PORT is not being listened (service may be starting up)"
			
 
				+            fi
			
 
				+        elif command -v netstat >/dev/null 2>&1; then
			
 
				+            if netstat -tuln | grep -q ":$PORT "; then
			
 
				+                echo "🔗 Port $PORT is being listened"
			
 
				+            else
			
 
				+                echo "⚠️  Port $PORT is not being listened (service may be starting up)"
			
 
				+            fi
			
 
				+        fi
			
 
				+
			
 
				+        if command -v curl >/dev/null 2>&1; then
			
 
				+            if curl -s --connect-timeout 2 http://127.0.0.1:$PORT/v1/models > /dev/null 2>&1; then
			
 
				+                echo "🎯 API 响应正常"
			
 
				+            else
			
 
				+                echo "⚠️  API 无响应 (service may be starting up)"
			
 
				+            fi
			
 
				+        fi
			
 
				+
			
 
				+        if command -v nvidia-smi >/dev/null 2>&1; then
			
 
				+            echo "📊 GPU 使用情况:"
			
 
				+            nvidia-smi --query-gpu=index,utilization.gpu,utilization.memory,memory.used,memory.total --format=csv,noheader,nounits | \
			
 
				+            awk -F',' '{printf "  GPU %s: GPU利用率 %s%%, 内存利用率 %s%%, 显存 %sMB/%sMB\n", $1, $2, $3, $4, $5}'
			
 
				+        fi
			
 
				+
			
 
				+        if [ -f $LOGFILE ]; then
			
 
				+            echo "📄 Latest logs (last 3 lines):"
			
 
				+            tail -3 $LOGFILE | sed 's/^/  /'
			
 
				+        fi
			
 
				+    else
			
 
				+        echo "❌ GLM-OCR vLLM is not running"
			
 
				+        if [ -f $PIDFILE ]; then
			
 
				+            echo "Removing stale PID file..."
			
 
				+            rm -f $PIDFILE
			
 
				+        fi
			
 
				+    fi
			
 
				+}
			
 
				+
			
 
				+logs() {
			
 
				+    if [ -f $LOGFILE ]; then
			
 
				+        echo "📄 GLM-OCR vLLM logs:"
			
 
				+        echo "====================="
			
 
				+        tail -f $LOGFILE
			
 
				+    else
			
 
				+        echo "❌ Log file not found: $LOGFILE"
			
 
				+    fi
			
 
				+}
			
 
				+
			
 
				+config() {
			
 
				+    echo "📋 Current configuration:"
			
 
				+    echo "  Conda Environment: $CONDA_ENV"
			
 
				+    echo "  Host: $HOST"
			
 
				+    echo "  Port: $PORT"
			
 
				+    echo "  Model Path: $MODEL_PATH"
			
 
				+    echo "  Served Model Name: $SERVED_MODEL_NAME"
			
 
				+    echo "  Allowed Local Media Path: $ALLOWED_LOCAL_MEDIA_PATH"
			
 
				+    echo "  GPU Memory Utilization: $GPU_MEMORY_UTILIZATION"
			
 
				+    echo "  CUDA Visible Devices: $CUDA_VISIBLE_DEVICES"
			
 
				+    echo "  Enable MTP: $ENABLE_MTP"
			
 
				+    echo "  PID File: $PIDFILE"
			
 
				+    echo "  Log File: $LOGFILE"
			
 
				+
			
 
				+    if [ -d "$MODEL_PATH" ]; then
			
 
				+        echo "✅ Model path exists"
			
 
				+        echo "  Model files:"
			
 
				+        ls -la "$MODEL_PATH" | head -10 | sed 's/^/    /'
			
 
				+        if [ $(ls -1 "$MODEL_PATH" 2>/dev/null | wc -l) -gt 10 ]; then
			
 
				+            echo "    ... and more files"
			
 
				+        fi
			
 
				+    else
			
 
				+        echo "❌ Model path not found (use HuggingFace id like zai-org/GLM-OCR by setting MODEL_PATH)"
			
 
				+    fi
			
 
				+
			
 
				+    echo ""
			
 
				+    echo "🔧 Environment:"
			
 
				+    echo "  Python: $(which python 2>/dev/null || echo 'Not found')"
			
 
				+    echo "  vLLM: $(python -c 'import vllm; print(vllm.__file__)' 2>/dev/null || echo 'Not found')"
			
 
				+    echo "  Conda: $(which conda 2>/dev/null || echo 'Not found')"
			
 
				+    echo "  CUDA: $(which nvcc 2>/dev/null || echo 'Not found')"
			
 
				+
			
 
				+    if command -v nvidia-smi >/dev/null 2>&1; then
			
 
				+        echo ""
			
 
				+        echo "🔥 GPU Information:"
			
 
				+        nvidia-smi --query-gpu=index,name,driver_version,memory.total --format=csv,noheader,nounits | \
			
 
				+        awk -F',' '{printf "  GPU %s: %s (Driver: %s, Memory: %sMB)\n", $1, $2, $3, $4}'
			
 
				+    fi
			
 
				+}
			
 
				+
			
 
				+test_api() {
			
 
				+    echo "🧪 Testing GLM-OCR vLLM API..."
			
 
				+
			
 
				+    if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
			
 
				+        echo "❌ GLM-OCR vLLM service is not running"
			
 
				+        return 1
			
 
				+    fi
			
 
				+
			
 
				+    if ! command -v curl >/dev/null 2>&1; then
			
 
				+        echo "❌ curl command not found"
			
 
				+        return 1
			
 
				+    fi
			
 
				+
			
 
				+    echo "📡 Testing /v1/models endpoint..."
			
 
				+    response=$(curl -s --connect-timeout 10 http://127.0.0.1:$PORT/v1/models)
			
 
				+    if [ $? -eq 0 ]; then
			
 
				+        echo "✅ Models endpoint accessible"
			
 
				+        echo "$response" | python -m json.tool 2>/dev/null || echo "$response"
			
 
				+    else
			
 
				+        echo "❌ Models endpoint not accessible"
			
 
				+    fi
			
 
				+}
			
 
				+
			
 
				+test_client() {
			
 
				+    echo "🧪 Testing GLM-OCR SDK with vLLM server (self-hosted mode)..."
			
 
				+
			
 
				+    if [ ! -f $PIDFILE ] || ! kill -0 $(cat $PIDFILE) 2>/dev/null; then
			
 
				+        echo "❌ GLM-OCR vLLM service is not running. Start it first: $0 start"
			
 
				+        return 1
			
 
				+    fi
			
 
				+
			
 
				+    TEST_IMAGE="/home/ubuntu/zhch/data/test/sample.png"
			
 
				+    if [ ! -f "$TEST_IMAGE" ]; then
			
 
				+        echo "⚠️  Test image not found: $TEST_IMAGE"
			
 
				+        echo "Update TEST_IMAGE in this script or create the file."
			
 
				+        return 1
			
 
				+    fi
			
 
				+
			
 
				+    echo "📄 Test image: $TEST_IMAGE"
			
 
				+    echo "Run GLM-OCR with config that has maas.enabled=false and ocr_api pointing to 127.0.0.1:$PORT"
			
 
				+    echo "Example: glmocr parse $TEST_IMAGE --config /path/to/config.yaml"
			
 
				+    echo ""
			
 
				+    echo "Or start GLM-OCR Flask server (layout+OCR) that uses this vLLM backend:"
			
 
				+    echo "  glmocr server --config /path/to/config-zhch.yaml  # with maas.enabled=false, ocr_api.api_port=$PORT"
			
 
				+    echo "Then: curl -X POST http://localhost:5002/glmocr/parse -H 'Content-Type: application/json' -d '{\"images\": [\"file://$TEST_IMAGE\"]}'"
			
 
				+}
			
 
				+
			
 
				+usage() {
			
 
				+    echo "GLM-OCR vLLM Service Daemon"
			
 
				+    echo "==========================="
			
 
				+    echo "Usage: $0 {start|stop|restart|status|logs|config|test|test-client}"
			
 
				+    echo ""
			
 
				+    echo "Commands:"
			
 
				+    echo "  start       - Start the GLM-OCR vLLM service"
			
 
				+    echo "  stop        - Stop the GLM-OCR vLLM service"
			
 
				+    echo "  restart     - Restart the GLM-OCR vLLM service"
			
 
				+    echo "  status      - Show service status and resource usage"
			
 
				+    echo "  logs        - Show service logs (follow mode)"
			
 
				+    echo "  config      - Show current configuration"
			
 
				+    echo "  test        - Test /v1/models API endpoint"
			
 
				+    echo "  test-client - Show how to test SDK/Flask with this vLLM backend"
			
 
				+    echo ""
			
 
				+    echo "Configuration (edit script to modify):"
			
 
				+    echo "  Host: $HOST"
			
 
				+    echo "  Port: $PORT"
			
 
				+    echo "  Model Path: $MODEL_PATH"
			
 
				+    echo "  Served Model Name: $SERVED_MODEL_NAME"
			
 
				+    echo "  GPU Memory: $GPU_MEMORY_UTILIZATION"
			
 
				+    echo "  CUDA Devices: $CUDA_VISIBLE_DEVICES"
			
 
				+    echo "  Enable MTP: $ENABLE_MTP"
			
 
				+    echo ""
			
 
				+    echo "Examples:"
			
 
				+    echo "  ./glmocr_vllm_daemon.sh start"
			
 
				+    echo "  ./glmocr_vllm_daemon.sh status"
			
 
				+    echo "  ./glmocr_vllm_daemon.sh logs"
			
 
				+    echo "  ./glmocr_vllm_daemon.sh test"
			
 
				+}
			
 
				+
			
 
				+case "$1" in
			
 
				+    start)
			
 
				+        start
			
 
				+        ;;
			
 
				+    stop)
			
 
				+        stop
			
 
				+        ;;
			
 
				+    restart)
			
 
				+        stop
			
 
				+        sleep 3
			
 
				+        start
			
 
				+        ;;
			
 
				+    status)
			
 
				+        status
			
 
				+        ;;
			
 
				+    logs)
			
 
				+        logs
			
 
				+        ;;
			
 
				+    config)
			
 
				+        config
			
 
				+        ;;
			
 
				+    test)
			
 
				+        test_api
			
 
				+        ;;
			
 
				+    test-client)
			
 
				+        test_client
			
 
				+        ;;
			
 
				+    *)
			
 
				+        usage
			
 
				+        exit 1
			
 
				+        ;;
			
 
				+esac
			
--- a/ocr_tools/ocr_batch/processor_configs.yaml
+++ b/ocr_tools/ocr_batch/processor_configs.yaml
@@ -8,7 +8,22 @@ processors:
 
				   # MinerU vLLM 处理器
			
 
				   # 基于 MinerU 的多线程批量处理（支持 PDF 和图片）
			
 
				   # -------------------------------------------------------------------------
			
 
				-  yusys_ocr:
			
 
				+  yusys_ocr_v4:
			
 
				+    script: "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/main_v2.py"
			
 
				+    input_arg: "--input"
			
 
				+    output_arg: "--output_dir"
			
 
				+    extra_args:
			
 
				+      - "--config=/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/config/bank_statement_yusys_v4.yaml"
			
 
				+      - "--pages=1-35"
			
 
				+      - "--streaming"
			
 
				+      - "--debug"
			
 
				+      - "--log_level=DEBUG"
			
 
				+    output_subdir: "bank_statement_yusys_v4"
			
 
				+    log_subdir: "logs/bank_statement_yusys_v4"
			
 
				+    venv: "conda activate mineru2"
			
 
				+    description: "YUSYS Wired UNET OCR 框架 GLM-OCR"
			
 
				+
			
 
				+  yusys_ocr_v3:
			
 
				     script: "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/main_v2.py"
			
 
				     input_arg: "--input"
			
 
				     output_arg: "--output_dir"
			
--- a/ocr_tools/universal_doc_parser/config/bank_statement_glm_vl.yaml
+++ b/ocr_tools/universal_doc_parser/config/bank_statement_glm_vl.yaml
@@ -0,0 +1,113 @@
 
				+# 银行交易流水场景配置 - GLM-OCR 版本
			
 
				+scene_name: "bank_statement_glm"
			
 
				+description: "银行交易流水、对账单等场景（使用 GLM-OCR 进行 VL 识别）"
			
 
				+
			
 
				+input:
			
 
				+  supported_formats: [".pdf", ".png", ".jpg", ".jpeg", ".bmp", ".tiff"]
			
 
				+  dpi: 200  # PDF转图片的DPI
			
 
				+
			
 
				+preprocessor:
			
 
				+  module: "mineru"
			
 
				+  orientation_classifier:
			
 
				+    enabled: true
			
 
				+    model_name: "paddle_orientation_classification"
			
 
				+    model_dir: null  # 使用默认路径
			
 
				+  unwarping:
			
 
				+    enabled: false
			
 
				+
			
 
				+# ============================================================
			
 
				+# Layout 检测配置 - 使用 PP-DocLayoutV3
			
 
				+# ============================================================
			
 
				+layout_detection:
			
 
				+  module: "paddle"
			
 
				+  model_name: "PP-DocLayoutV3"
			
 
				+  model_dir: "PaddlePaddle/PP-DocLayoutV3_safetensors"
			
 
				+  device: "cpu"
			
 
				+  conf: 0.3
			
 
				+  num_threads: 4
			
 
				+  batch_size: 1
			
 
				+  
			
 
				+  # 后处理配置
			
 
				+  post_process:
			
 
				+    # 将大面积文本块转换为表格（后处理）
			
 
				+    convert_large_text_to_table: true  # 是否启用
			
 
				+    min_text_area_ratio: 0.25         # 最小面积占比（25%）
			
 
				+    min_text_width_ratio: 0.4         # 最小宽度占比（40%）
			
 
				+    min_text_height_ratio: 0.3        # 最小高度占比（30%）
			
 
				+
			
 
				+  # Debug 可视化配置
			
 
				+  debug_options:
			
 
				+    enabled: true               # 是否开启调试可视化输出
			
 
				+    output_dir: null             # 调试输出目录；null不输出
			
 
				+    prefix: ""                  # 保存文件名前缀（如设置为页码）
			
 
				+
			
 
				+# ============================================================
			
 
				+# VL识别配置 - 使用 GLM-OCR
			
 
				+# ============================================================
			
 
				+vl_recognition:
			
 
				+  module: "glmocr"
			
 
				+  api_url: "http://10.192.72.11:20036/v1/chat/completions"
			
 
				+  api_key: null  # 可选，如需要可填写
			
 
				+  model: "glm-ocr"
			
 
				+  max_image_size: 3500  # GLM-OCR 推荐的最大图片尺寸
			
 
				+  resize_mode: 'max'    # 缩放模式: 'max' 保持宽高比, 'fixed' 固定尺寸
			
 
				+  verify_ssl: false
			
 
				+  
			
 
				+  # Task prompt mapping - 针对不同任务使用不同提示词
			
 
				+  task_prompt_mapping:
			
 
				+    text: "Text Recognition:"
			
 
				+    table: "Table Recognition:"
			
 
				+    formula: "Formula Recognition:"
			
 
				+    seal: "Seal Recognition:"  # 印章识别的专用提示词
			
 
				+  
			
 
				+  # 模型参数
			
 
				+  model_params:
			
 
				+    connection_pool_size: 128  # HTTP 连接池大小（应 >= max_workers）
			
 
				+    http_timeout: 300          # HTTP 请求超时时间（秒）
			
 
				+    connect_timeout: 30        # 连接超时时间（秒）
			
 
				+    retry_max_attempts: 2      # 最大重试次数
			
 
				+    retry_backoff_base_seconds: 0.5
			
 
				+    retry_backoff_max_seconds: 8.0
			
 
				+    retry_jitter_ratio: 0.2
			
 
				+    retry_status_codes: [429, 500, 502, 503, 504]
			
 
				+    max_tokens: 4096
			
 
				+    temperature: 0.8
			
 
				+    top_p: 0.9
			
 
				+    top_k: 50
			
 
				+    repetition_penalty: 1.1
			
 
				+  
			
 
				+  # 场景特定配置
			
 
				+  table_recognition:
			
 
				+    return_cells_coordinate: false  # GLM-OCR 不直接返回单元格坐标
			
 
				+    bank_statement_mode: true
			
 
				+
			
 
				+# ============================================================
			
 
				+# OCR识别配置
			
 
				+# ============================================================
			
 
				+ocr_recognition:
			
 
				+  module: "mineru" 
			
 
				+  language: "ch"
			
 
				+  det_threshold: 0.6
			
 
				+  unclip_ratio: 1.5
			
 
				+  enable_merge_det_boxes: false
			
 
				+  batch_size: 8
			
 
				+  device: "cpu"
			
 
				+
			
 
				+# ============================================================
			
 
				+# 输出配置
			
 
				+# ============================================================
			
 
				+output:
			
 
				+  create_subdir: false
			
 
				+  save_pdf_images: true
			
 
				+  save_json: true
			
 
				+  save_page_json: true
			
 
				+  save_markdown: true
			
 
				+  save_page_markdown: true
			
 
				+  save_html: true
			
 
				+  save_layout_image: true
			
 
				+  save_ocr_image: true
			
 
				+  draw_type_label: true
			
 
				+  draw_bbox_number: true
			
 
				+  save_enhanced_json: true
			
 
				+  normalize_numbers: true
			
 
				+  debug_mode: true
			
--- a/ocr_tools/universal_doc_parser/config/bank_statement_yusys_v4.yaml
+++ b/ocr_tools/universal_doc_parser/config/bank_statement_yusys_v4.yaml
@@ -0,0 +1,178 @@
 
				+# 银行交易流水场景配置 - V4版本
			
 
				+# Pipeline V3逻辑: 有线表格使用MinerU UNet, 无线表格/seal使用GLM-OCR VLM
			
 
				+scene_name: "bank_statement_yusys_v4"
			
 
				+
			
 
				+description: "银行流水V4: PP-DocLayoutV3 layout + PaddleOCR + MinerU UNet（有线表格）+ GLM-OCR VLM（无线表格/seal）"
			
 
				+
			
 
				+input:
			
 
				+  supported_formats: [".pdf", ".png", ".jpg", ".jpeg", ".bmp", ".tiff"]
			
 
				+  dpi: 200
			
 
				+
			
 
				+preprocessor:
			
 
				+  module: "mineru"
			
 
				+  orientation_classifier:
			
 
				+    enabled: true
			
 
				+    model_name: "paddle_orientation_classification"
			
 
				+    model_dir: null  # 使用默认路径
			
 
				+  unwarping:
			
 
				+    enabled: false
			
 
				+
			
 
				+# ============================================================
			
 
				+# Layout 检测配置 - 使用 PP-DocLayoutV3
			
 
				+# ============================================================
			
 
				+layout_detection:
			
 
				+  module: "paddle"
			
 
				+  model_name: "PP-DocLayoutV3"
			
 
				+  model_dir: "PaddlePaddle/PP-DocLayoutV3_safetensors"
			
 
				+  device: "cpu"
			
 
				+  conf: 0.3
			
 
				+  num_threads: 4
			
 
				+  batch_size: 1
			
 
				+  
			
 
				+  # 后处理配置
			
 
				+  post_process:
			
 
				+    # 将大面积文本块转换为表格（后处理）
			
 
				+    convert_large_text_to_table: true  # 是否启用
			
 
				+    min_text_area_ratio: 0.25         # 最小面积占比（25%）
			
 
				+    min_text_width_ratio: 0.4         # 最小宽度占比（40%）
			
 
				+    min_text_height_ratio: 0.3        # 最小高度占比（30%）
			
 
				+
			
 
				+  # Debug 可视化配置
			
 
				+  debug_options:
			
 
				+    enabled: true               # 是否开启调试可视化输出
			
 
				+    output_dir: null             # 调试输出目录；null不输出
			
 
				+    prefix: ""                  # 保存文件名前缀（如设置为页码）
			
 
				+
			
 
				+# ============================================================
			
 
				+# OCR 识别配置
			
 
				+# ============================================================
			
 
				+ocr_recognition:
			
 
				+  module: "mineru"
			
 
				+  language: "ch"
			
 
				+  det_threshold: 0.5
			
 
				+  unclip_ratio: 1.5
			
 
				+  enable_merge_det_boxes: false
			
 
				+  batch_size: 8
			
 
				+  device: "cpu"
			
 
				+
			
 
				+# ============================================================
			
 
				+# 表格分类配置（自动区分有线/无线表格）
			
 
				+# ============================================================
			
 
				+table_classification:
			
 
				+  enabled: true               # 启用自动表格分类
			
 
				+  module: "paddle"            # 分类模型：paddle（MinerU PaddleTableClsModel）
			
 
				+  confidence_threshold: 0.5   # 分类置信度阈值
			
 
				+  batch_size: 16              # 批处理大小
			
 
				+
			
 
				+  # Debug 可视化配置
			
 
				+  debug_options:
			
 
				+    enabled: true               # 是否开启调试可视化输出
			
 
				+    output_dir: null             # 调试输出目录；null不输出
			
 
				+    save_table_lines: true       # 保存表格线可视化（unet横线/竖线叠加）
			
 
				+    image_format: "png"          # 可视化图片格式：png/jpg
			
 
				+    prefix: ""                  # 保存文件名前缀（如设置为页码/表格序号）
			
 
				+
			
 
				+# ============================================================
			
 
				+# 有线表格识别专用配置（MinerU UNet）
			
 
				+# ============================================================
			
 
				+table_recognition_wired:
			
 
				+  use_wired_unet: true
			
 
				+  upscale_ratio: 3.333
			
 
				+  need_ocr: true
			
 
				+  row_threshold: 10
			
 
				+  col_threshold: 15
			
 
				+  ocr_conf_threshold: 0.9       # 单元格 OCR 置信度阈值
			
 
				+  cell_crop_margin: 2
			
 
				+  use_custom_postprocess: true  # 是否使用自定义后处理（默认启用）
			
 
				+
			
 
				+  # 是否启用倾斜矫正
			
 
				+  enable_deskew: true
			
 
				+
			
 
				+  # 🆕 启用多源单元格融合
			
 
				+  use_cell_fusion: true
			
 
				+  
			
 
				+  # 融合引擎配置
			
 
				+  cell_fusion:
			
 
				+    # RT-DETR 模型路径（必需）
			
 
				+    rtdetr_model_path: "/Users/zhch158/models/pytorch_models/Table/RT-DETR-L_wired_table_cell_det.onnx"
			
 
				+    
			
 
				+    # 融合权重
			
 
				+    unet_weight: 0.6        # UNet 权重（结构性强）
			
 
				+    rtdetr_weight: 0.4      # RT-DETR 权重（鲁棒性强）
			
 
				+    
			
 
				+    # 阈值配置
			
 
				+    iou_merge_threshold: 0.7    # 高IoU合并阈值（>0.7则加权平均）
			
 
				+    iou_nms_threshold: 0.5      # NMS去重阈值
			
 
				+    rtdetr_conf_threshold: 0.5  # RT-DETR置信度阈值
			
 
				+    
			
 
				+    # 功能开关
			
 
				+    enable_ocr_compensation: true      # 启用OCR边缘补偿
			
 
				+
			
 
				+  # Debug 可视化配置
			
 
				+  debug_options:
			
 
				+    enabled: true               # 是否开启调试可视化输出
			
 
				+    output_dir: null             # 调试输出目录；null不输出
			
 
				+    save_table_lines: true       # 保存表格线可视化（unet横线/竖线叠加）
			
 
				+    save_connected_components: true  # 保存连通域提取的单元格图
			
 
				+    save_grid_structure: true    # 保存逻辑网格结构（row/col/rowspan/colspan）
			
 
				+    save_text_overlay: true      # 保存文本填充覆盖图
			
 
				+    image_format: "png"          # 可视化图片格式：png/jpg
			
 
				+    prefix: ""                  # 保存文件名前缀（如设置为页码/表格序号）
			
 
				+
			
 
				+# ============================================================
			
 
				+# VL识别配置 - 使用 GLM-OCR（无线表格 + seal识别）
			
 
				+# ============================================================
			
 
				+vl_recognition:
			
 
				+  module: "glmocr"
			
 
				+  api_url: "http://10.192.72.11:20036/v1/chat/completions"
			
 
				+  api_key: null  # 可选，如需要可填写
			
 
				+  model: "glm-ocr"
			
 
				+  max_image_size: 3500  # GLM-OCR 推荐的最大图片尺寸
			
 
				+  resize_mode: 'max'    # 缩放模式: 'max' 保持宽高比, 'fixed' 固定尺寸
			
 
				+  verify_ssl: false
			
 
				+  
			
 
				+  # Task prompt mapping - 针对不同任务使用不同提示词
			
 
				+  task_prompt_mapping:
			
 
				+    text: "Text Recognition:"
			
 
				+    table: "Table Recognition:"
			
 
				+    formula: "Formula Recognition:"
			
 
				+    seal: "Seal Recognition:"  # 印章识别的专用提示词
			
 
				+  
			
 
				+  # 模型参数
			
 
				+  model_params:
			
 
				+    connection_pool_size: 128  # HTTP 连接池大小（应 >= max_workers）
			
 
				+    http_timeout: 300          # HTTP 请求超时时间（秒）
			
 
				+    connect_timeout: 30        # 连接超时时间（秒）
			
 
				+    retry_max_attempts: 2      # 最大重试次数
			
 
				+    retry_backoff_base_seconds: 0.5
			
 
				+    retry_backoff_max_seconds: 8.0
			
 
				+    retry_jitter_ratio: 0.2
			
 
				+    retry_status_codes: [429, 500, 502, 503, 504]
			
 
				+    max_tokens: 4096
			
 
				+    temperature: 0.8
			
 
				+    top_p: 0.9
			
 
				+    top_k: 50
			
 
				+    repetition_penalty: 1.1
			
 
				+  
			
 
				+  # 场景特定配置
			
 
				+  table_recognition:
			
 
				+    return_cells_coordinate: false  # GLM-OCR 不直接返回单元格坐标
			
 
				+
			
 
				+# ============================================================
			
 
				+# 输出配置
			
 
				+# ============================================================
			
 
				+output:
			
 
				+  create_subdir: false
			
 
				+  save_pdf_images: true
			
 
				+  save_json: true
			
 
				+  save_page_json: true
			
 
				+  save_markdown: true
			
 
				+  save_page_markdown: true
			
 
				+  save_html: true
			
 
				+  save_layout_image: true
			
 
				+  save_ocr_image: true
			
 
				+  draw_type_label: true
			
 
				+  draw_bbox_number: true
			
 
				+  save_enhanced_json: true
			
 
				+  normalize_numbers: true
			
 
				+  debug_mode: true
			
--- a/ocr_tools/universal_doc_parser/core/element_processors.py
+++ b/ocr_tools/universal_doc_parser/core/element_processors.py
@@ -655,6 +655,57 @@ class ElementProcessors:
 
				             'content': content
			
 
				         }
			
 
				     
			
 
				+    def process_seal_element(
			
 
				+        self,
			
 
				+        image: np.ndarray,
			
 
				+        layout_item: Dict[str, Any]
			
 
				+    ) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        处理印章（seal）元素 - 使用 VLM 识别
			
 
				+        
			
 
				+        Args:
			
 
				+            image: 页面图像
			
 
				+            layout_item: 布局检测项
			
 
				+            
			
 
				+        Returns:
			
 
				+            处理后的元素字典
			
 
				+        """
			
 
				+        bbox = layout_item.get('bbox', [0, 0, 0, 0])
			
 
				+        category = layout_item.get('category', 'seal')
			
 
				+        cropped_region = CoordinateUtils.crop_region(image, bbox)
			
 
				+        
			
 
				+        content = {'text': '', 'confidence': 0.0}
			
 
				+        
			
 
				+        try:
			
 
				+            # 懒加载 VL 识别器
			
 
				+            vl_recognizer = self._ensure_vl_recognizer()
			
 
				+            if vl_recognizer is None:
			
 
				+                logger.error("❌ VL recognizer not available for seal recognition")
			
 
				+                return {
			
 
				+                    'type': category,
			
 
				+                    'bbox': bbox,
			
 
				+                    'content': content
			
 
				+                }
			
 
				+            
			
 
				+            # 使用 recognize_text 方法，传入 element_type='seal'
			
 
				+            # GLM-OCR 适配器会根据 element_type 使用相应的提示词
			
 
				+            seal_result = vl_recognizer.recognize_text(cropped_region, element_type='seal')
			
 
				+            content = {
			
 
				+                'text': seal_result.get('text', ''),
			
 
				+                'confidence': seal_result.get('confidence', 0.0)
			
 
				+            }
			
 
				+            
			
 
				+            logger.info(f"🔖 Seal recognized: {content['text'][:50]}..." if len(content['text']) > 50 else f"🔖 Seal recognized: {content['text']}")
			
 
				+        except Exception as e:
			
 
				+            logger.warning(f"Seal recognition failed: {e}")
			
 
				+        
			
 
				+        return {
			
 
				+            'type': category,
			
 
				+            'bbox': bbox,
			
 
				+            'confidence': layout_item.get('confidence', 0.0),
			
 
				+            'content': content
			
 
				+        }
			
 
				+    
			
 
				     def process_image_element(
			
 
				         self,
			
 
				         image: np.ndarray,
			
--- a/ocr_tools/universal_doc_parser/core/model_factory.py
+++ b/ocr_tools/universal_doc_parser/core/model_factory.py
@@ -39,9 +39,13 @@ class ModelFactory:
 
				     def create_layout_detector(cls, config: Dict[str, Any]) -> BaseLayoutDetector:
			
 
				         # 根据配置创建检测器
			
 
				         module_name = config.get('module', 'mineru')
			
 
				-        if module_name == 'paddle':
			
 
				+        model_name = config.get('model_name', 'default')
			
 
				+        if module_name == 'paddle' and model_name == 'RT-DETR-H_layout_17cls':
			
 
				             from models.adapters import PaddleLayoutDetector
			
 
				             detector = PaddleLayoutDetector(config)
			
 
				+        elif module_name == 'paddle' and model_name == 'PP-DocLayoutV3':
			
 
				+            from models.adapters import PPDocLayoutV3Detector
			
 
				+            detector = PPDocLayoutV3Detector(config)
			
 
				         elif module_name == 'docling':
			
 
				             from models.adapters import DoclingLayoutDetector
			
 
				             detector = DoclingLayoutDetector(config)
			
@@ -74,6 +78,9 @@ class ModelFactory:
 
				         elif module_name == 'mineru':
			
 
				             from models.adapters import MinerUVLRecognizer
			
 
				             recognizer = MinerUVLRecognizer(config)
			
 
				+        elif module_name == 'glmocr':
			
 
				+            from models.adapters import GLMOCRVLRecognizer
			
 
				+            recognizer = GLMOCRVLRecognizer(config)
			
 
				         else:
			
 
				             raise ValueError(f"Unknown VL recognizer module: {module_name}")
			
 
				             
			
--- a/ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py
+++ b/ocr_tools/universal_doc_parser/core/pipeline_manager_v2.py
@@ -88,6 +88,9 @@ class EnhancedDocPipeline:
 
				         'interline_equation_yolo', 'interline_equation_number'
			
 
				     ]
			
 
				     
			
 
				+    # Seal（印章）类元素 - 需要 VLM 识别
			
 
				+    SEAL_CATEGORIES = ['seal']
			
 
				+    
			
 
				     # 丢弃类元素（水印、装饰等）
			
 
				     DISCARD_CATEGORIES = ['abandon', 'discarded']
			
 
				     
			
@@ -750,6 +753,7 @@ class EnhancedDocPipeline:
 
				             'image_body': [],
			
 
				             'image_text': [],
			
 
				             'equation': [],
			
 
				+            'seal': [],  # 🔧 添加 seal 类别
			
 
				             'code': [],
			
 
				             'discard': []
			
 
				         }
			
@@ -769,6 +773,8 @@ class EnhancedDocPipeline:
 
				                 classified['image_text'].append(item)
			
 
				             elif category in self.EQUATION_CATEGORIES:
			
 
				                 classified['equation'].append(item)
			
 
				+            elif category in self.SEAL_CATEGORIES:
			
 
				+                classified['seal'].append(item)
			
 
				             elif category in self.CODE_CATEGORIES:
			
 
				                 classified['code'].append(item)
			
 
				             elif category in self.DISCARD_CATEGORIES:
			
@@ -784,6 +790,7 @@ class EnhancedDocPipeline:
 
				                    f"image={len(classified['image_body'])}, "
			
 
				                    f"image_text={len(classified['image_text'])}, "
			
 
				                    f"equation={len(classified['equation'])}, "
			
 
				+                   f"seal={len(classified['seal'])}, "
			
 
				                    f"code={len(classified['code'])}, "
			
 
				                    f"discard={len(classified['discard'])}")
			
 
				         
			
@@ -952,6 +959,17 @@ class EnhancedDocPipeline:
 
				                 logger.warning(f"⚠️ Equation processing failed: {e}")
			
 
				                 processed_elements.append(ElementProcessors.create_error_element(item, str(e)))
			
 
				         
			
 
				+        # 🔧 处理 Seal（印章）元素 - 使用 VLM 识别
			
 
				+        for item in classified_elements['seal']:
			
 
				+            try:
			
 
				+                element = self.element_processors.process_seal_element(
			
 
				+                    detection_image, item
			
 
				+                )
			
 
				+                processed_elements.append(element)
			
 
				+            except Exception as e:
			
 
				+                logger.warning(f"⚠️ Seal processing failed: {e}")
			
 
				+                processed_elements.append(ElementProcessors.create_error_element(item, str(e)))
			
 
				+        
			
 
				         # 处理图片主体
			
 
				         for item in classified_elements['image_body']:
			
 
				             try:
			
--- a/ocr_tools/universal_doc_parser/main_v2.py
+++ b/ocr_tools/universal_doc_parser/main_v2.py
@@ -414,6 +414,15 @@ if __name__ == "__main__":
 
				         # 默认配置（用于开发测试）
			
 
				         default_config = {
			
 
				             # 测试输入
			
 
				+            # "input": "/Users/zhch158/workspace/data/流水分析/湛_平安银行图.pdf",
			
 
				+            # "output_dir": "./output/湛_平安银行图/bank_statement_yusys_v3",
			
 
				+
			
 
				+            # "input": "/Users/zhch158/workspace/data/流水分析/张_微信图.pdf",
			
 
				+            # "output_dir": "./output/张_微信图/bank_statement_yusys_v3",
			
 
				+
			
 
				+            # "input": "/Users/zhch158/workspace/data/流水分析/许_民生银行图.pdf",
			
 
				+            # "output_dir": "./output/许_民生银行图/bank_statement_yusys_v3",
			
 
				+
			
 
				             # "input": "/Users/zhch158/workspace/data/流水分析/康强_北京农村商业银行.pdf",
			
 
				             # "output_dir": "./output/康强_北京农村商业银行/bank_statement_mineru_vl",
			
 
				 
			
@@ -425,9 +434,12 @@ if __name__ == "__main__":
 
				 
			
 
				             # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_005.png",
			
 
				             # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003.png",
			
 
				+            # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003.png",
			
 
				             # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/2023年度报告母公司_page_003_270_skew(-0.4).png",
			
 
				-            # "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
			
 
				+            "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
			
 
				+            # "output_dir": "./output/2023年度报告母公司/bank_statement_yusys_v3",
			
 
				             # "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_yusys_v3",
			
 
				+            "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_glm_vl",
			
 
				 
			
 
				             # "input": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司.pdf",
			
 
				             # "output_dir": "/Users/zhch158/workspace/data/流水分析/2023年度报告母公司/bank_statement_yusys_v2",
			
@@ -444,8 +456,8 @@ if __name__ == "__main__":
 
				             # "input": "/Users/zhch158/workspace/data/流水分析/德_内蒙古银行照.pdf",
			
 
				             # "output_dir": "./output/德_内蒙古银行照/bank_statement_yusys_v3",
			
 
				 
			
 
				-            "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/提取自赤峰黄金2023年报.pdf",
			
 
				-            "output_dir": "./output/提取自赤峰黄金2023年报/bank_statement_yusys_v3",
			
 
				+            # "input": "/Users/zhch158/workspace/repository.git/ocr_platform/ocr_tools/universal_doc_parser/tests/提取自赤峰黄金2023年报.pdf",
			
 
				+            # "output_dir": "./output/提取自赤峰黄金2023年报/bank_statement_yusys_v3",
			
 
				             # "input": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报.pdf",
			
 
				             # "output_dir": "/Users/zhch158/workspace/data/流水分析/提取自赤峰黄金2023年报/bank_statement_yusys_v3",
			
 
				 
			
@@ -463,7 +475,8 @@ if __name__ == "__main__":
 
				             # "output_dir": "/Users/zhch158/workspace/data/流水分析/山西云集科技有限公司/bank_statement_yusys_v3",
			
 
				 
			
 
				             # 配置文件
			
 
				-            "config": "./config/bank_statement_yusys_v3.yaml",
			
 
				+            "config": "./config/bank_statement_glm_vl.yaml",
			
 
				+            # "config": "./config/bank_statement_yusys_v3.yaml",
			
 
				             # "config": "./config/bank_statement_smart_router.yaml",
			
 
				             # "config": "./config/bank_statement_mineru_vl.yaml",
			
 
				             # "config": "./config/bank_statement_yusys_v2.yaml",
			
@@ -473,7 +486,7 @@ if __name__ == "__main__":
 
				             "scene": "bank_statement",
			
 
				             
			
 
				             # 页面范围（可选）
			
 
				-            "pages": "7",  # 只处理前1页
			
 
				+            "pages": "3-7",  # 只处理前1页
			
 
				             # "pages": "1-3,5,7-10",  # 处理指定页面
			
 
				             # "pages": "83-109",  # 处理指定页面
			
 
				 
			
@@ -486,7 +499,7 @@ if __name__ == "__main__":
 
				             "log_level": "DEBUG",
			
 
				 
			
 
				             # 日志文件
			
 
				-            "log_file": "./output/logs/bank_statement_yusys_v3/process.log",
			
 
				+            "log_file": "./output/logs/bank_statement_glm_vl/process.log",
			
 
				         }
			
 
				         
			
 
				         # 构造参数
			
--- a/ocr_tools/universal_doc_parser/models/adapters/__init__.py
+++ b/ocr_tools/universal_doc_parser/models/adapters/__init__.py
@@ -17,6 +17,13 @@ from .paddle_vl_adapter import PaddleVLRecognizer
 
				 from .docling_layout_adapter import DoclingLayoutDetector
			
 
				 from .pp_doclayout_v3_layout_adapter import PPDocLayoutV3Detector
			
 
				 
			
 
				+# GLM-OCR 适配器
			
 
				+try:
			
 
				+    from .glmocr_vl_adapter import GLMOCRVLRecognizer
			
 
				+    GLMOCR_AVAILABLE = True
			
 
				+except ImportError:
			
 
				+    GLMOCR_AVAILABLE = False
			
 
				+
			
 
				 # 可选导入 DiT 适配器
			
 
				 try:
			
 
				     from .dit_layout_adapter import DitLayoutDetector
			
@@ -49,6 +56,9 @@ __all__ = [
 
				     'PaddleLayoutDetector',
			
 
				     'PaddleVLRecognizer',
			
 
				     
			
 
				+    # GLM-OCR 适配器
			
 
				+    'GLMOCRVLRecognizer',
			
 
				+    
			
 
				     # Docling 适配器
			
 
				     'DoclingLayoutDetector',
			
 
				     # PP-DocLayoutV3 适配器
			
--- a/ocr_tools/universal_doc_parser/models/adapters/glmocr_vl_adapter.py
+++ b/ocr_tools/universal_doc_parser/models/adapters/glmocr_vl_adapter.py
@@ -0,0 +1,413 @@
 
				+"""GLM-OCR VL识别适配器
			
 
				+
			
 
				+直接通过 HTTP 调用 GLM-OCR API（OpenAI 兼容格式）。
			
 
				+支持表格、公式、文本和印章（seal）识别。
			
 
				+
			
 
				+架构说明：
			
 
				+- 使用 requests 库直接调用 GLM-OCR HTTP API
			
 
				+- 无需依赖 glmocr 包
			
 
				+- 通过 task_prompt_mapping 配置不同任务的提示词
			
 
				+- 支持图片预处理（尺寸控制）
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, Any, List, Union, Optional
			
 
				+import numpy as np
			
 
				+from PIL import Image
			
 
				+from loguru import logger
			
 
				+import requests
			
 
				+from requests.adapters import HTTPAdapter
			
 
				+from urllib3.util.retry import Retry
			
 
				+import base64
			
 
				+from io import BytesIO
			
 
				+import json
			
 
				+
			
 
				+# 导入基类
			
 
				+from .base import BaseVLRecognizer
			
 
				+
			
 
				+
			
 
				+class GLMOCRVLRecognizer(BaseVLRecognizer):
			
 
				+    """
			
 
				+    GLM-OCR VL识别适配器
			
 
				+    
			
 
				+    配置示例：
			
 
				+    ```yaml
			
 
				+    vl_recognition:
			
 
				+      module: "glmocr"
			
 
				+      api_url: "http://10.192.72.11:20036/v1/chat/completions"
			
 
				+      api_key: null  # 可选
			
 
				+      model: "glm-ocr"
			
 
				+      max_image_size: 3500
			
 
				+      resize_mode: 'max'
			
 
				+      task_prompt_mapping:
			
 
				+        text: "Text Recognition:"
			
 
				+        table: "Table Recognition:"
			
 
				+        formula: "Formula Recognition:"
			
 
				+        seal: "Seal Recognition:"
			
 
				+      model_params:
			
 
				+        connection_pool_size: 128
			
 
				+        http_timeout: 300
			
 
				+        retry_max_attempts: 2
			
 
				+    ```
			
 
				+    """
			
 
				+    
			
 
				+    def __init__(self, config: Dict[str, Any]):
			
 
				+        super().__init__(config)
			
 
				+        
			
 
				+        self.session = None
			
 
				+        
			
 
				+        # API 配置
			
 
				+        self.api_url = config.get('api_url', 'http://127.0.0.1:8000/v1/chat/completions')
			
 
				+        self.api_key = config.get('api_key')
			
 
				+        self.model = config.get('model', 'glm-ocr')
			
 
				+        self.verify_ssl = config.get('verify_ssl', False)
			
 
				+        
			
 
				+        # 图片尺寸限制配置
			
 
				+        self.max_image_size = config.get('max_image_size', 3500)
			
 
				+        self.resize_mode = config.get('resize_mode', 'max')
			
 
				+        
			
 
				+        # Task prompt mapping（任务提示词映射）
			
 
				+        self.task_prompt_mapping = config.get('task_prompt_mapping', {
			
 
				+            'text': 'Text Recognition:',
			
 
				+            'table': 'Table Recognition:',
			
 
				+            'formula': 'Formula Recognition:',
			
 
				+            'seal': 'Seal Recognition:',
			
 
				+        })
			
 
				+        
			
 
				+        # 模型参数
			
 
				+        model_params = config.get('model_params', {})
			
 
				+        self.connection_pool_size = model_params.get('connection_pool_size', 128)
			
 
				+        self.http_timeout = model_params.get('http_timeout', 300)
			
 
				+        self.connect_timeout = model_params.get('connect_timeout', 30)
			
 
				+        self.retry_max_attempts = model_params.get('retry_max_attempts', 2)
			
 
				+        
			
 
				+        # 生成参数
			
 
				+        self.max_tokens = model_params.get('max_tokens', 4096)
			
 
				+        self.temperature = model_params.get('temperature', 0.8)
			
 
				+        self.top_p = model_params.get('top_p', 0.9)
			
 
				+        self.top_k = model_params.get('top_k', 50)
			
 
				+        self.repetition_penalty = model_params.get('repetition_penalty', 1.1)
			
 
				+        
			
 
				+        logger.info(f"GLM-OCR VL Recognizer configured with max_image_size={self.max_image_size}")
			
 
				+        logger.debug(f"Task prompt mapping: {self.task_prompt_mapping}")
			
 
				+    
			
 
				+    def initialize(self):
			
 
				+        """初始化 HTTP 会话"""
			
 
				+        try:
			
 
				+            # 创建会话
			
 
				+            self.session = requests.Session()
			
 
				+            
			
 
				+            # 配置连接池
			
 
				+            adapter = HTTPAdapter(
			
 
				+                pool_connections=self.connection_pool_size,
			
 
				+                pool_maxsize=self.connection_pool_size,
			
 
				+                max_retries=Retry(
			
 
				+                    total=self.retry_max_attempts,
			
 
				+                    backoff_factor=0.5,
			
 
				+                    status_forcelist=[429, 500, 502, 503, 504],
			
 
				+                )
			
 
				+            )
			
 
				+            self.session.mount('http://', adapter)
			
 
				+            self.session.mount('https://', adapter)
			
 
				+            
			
 
				+            # 设置默认 headers
			
 
				+            self.session.headers.update({
			
 
				+                'Content-Type': 'application/json',
			
 
				+            })
			
 
				+            
			
 
				+            if self.api_key:
			
 
				+                self.session.headers.update({
			
 
				+                    'Authorization': f'Bearer {self.api_key}'
			
 
				+                })
			
 
				+            
			
 
				+            logger.success(f"✅ GLM-OCR VL recognizer initialized: {self.api_url}")
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"❌ Failed to initialize GLM-OCR VL recognizer: {e}")
			
 
				+            raise
			
 
				+    
			
 
				+    def cleanup(self):
			
 
				+        """清理资源"""
			
 
				+        if self.session:
			
 
				+            self.session.close()
			
 
				+            self.session = None
			
 
				+        logger.debug("GLM-OCR VL recognizer cleaned up")
			
 
				+    
			
 
				+    def _preprocess_image(self, image: Union[np.ndarray, Image.Image]) -> Image.Image:
			
 
				+        """
			
 
				+        预处理图片，控制尺寸避免序列长度超限
			
 
				+        
			
 
				+        Args:
			
 
				+            image: 输入图片
			
 
				+            
			
 
				+        Returns:
			
 
				+            处理后的PIL图片
			
 
				+        """
			
 
				+        # 转换为PIL图像
			
 
				+        if isinstance(image, np.ndarray):
			
 
				+            image = Image.fromarray(image)
			
 
				+        
			
 
				+        # 获取原始尺寸
			
 
				+        orig_w, orig_h = image.size
			
 
				+        
			
 
				+        # 计算缩放比例
			
 
				+        if self.resize_mode == 'max':
			
 
				+            # 保持宽高比，最长边不超过 max_image_size
			
 
				+            max_dim = max(orig_w, orig_h)
			
 
				+            if max_dim > self.max_image_size:
			
 
				+                scale = self.max_image_size / max_dim
			
 
				+                new_w = int(orig_w * scale)
			
 
				+                new_h = int(orig_h * scale)
			
 
				+                
			
 
				+                logger.debug(f"🔄 Resizing image: {orig_w}x{orig_h} → {new_w}x{new_h} (scale={scale:.3f})")
			
 
				+                image = image.resize((new_w, new_h), Image.Resampling.LANCZOS)
			
 
				+        
			
 
				+        elif self.resize_mode == 'fixed':
			
 
				+            # 固定尺寸（可能改变宽高比）
			
 
				+            if orig_w != self.max_image_size or orig_h != self.max_image_size:
			
 
				+                logger.debug(f"🔄 Resizing image: {orig_w}x{orig_h} → {self.max_image_size}x{self.max_image_size}")
			
 
				+                image = image.resize((self.max_image_size, self.max_image_size), Image.Resampling.LANCZOS)
			
 
				+        
			
 
				+        return image
			
 
				+    
			
 
				+    def _build_request_for_image(
			
 
				+        self, 
			
 
				+        image: Image.Image, 
			
 
				+        task_type: str = 'text'
			
 
				+    ) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        为单张图片构建 GLM-OCR API 请求
			
 
				+        
			
 
				+        Args:
			
 
				+            image: PIL图片
			
 
				+            task_type: 任务类型 ('text', 'table', 'formula', 'seal')
			
 
				+            
			
 
				+        Returns:
			
 
				+            请求字典
			
 
				+        """
			
 
				+        # 获取任务对应的提示词
			
 
				+        prompt_text = self.task_prompt_mapping.get(task_type, self.task_prompt_mapping.get('text', ''))
			
 
				+        
			
 
				+        # 将图片转为 base64
			
 
				+        buffered = BytesIO()
			
 
				+        image.save(buffered, format="JPEG")
			
 
				+        img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
			
 
				+        img_url = f"data:image/jpeg;base64,{img_base64}"
			
 
				+        
			
 
				+        # 构建请求（OpenAI 兼容格式）
			
 
				+        request_data = {
			
 
				+            "model": self.model,
			
 
				+            "messages": [
			
 
				+                {
			
 
				+                    "role": "user",
			
 
				+                    "content": [
			
 
				+                        {"type": "image_url", "image_url": {"url": img_url}},
			
 
				+                        {"type": "text", "text": prompt_text},
			
 
				+                    ]
			
 
				+                }
			
 
				+            ],
			
 
				+            "max_tokens": self.max_tokens,
			
 
				+            "temperature": self.temperature,
			
 
				+            "top_p": self.top_p,
			
 
				+            "top_k": self.top_k,
			
 
				+            "repetition_penalty": self.repetition_penalty,
			
 
				+        }
			
 
				+        
			
 
				+        return request_data
			
 
				+    
			
 
				+    def _call_ocr_api(self, image: Image.Image, task_type: str) -> str:
			
 
				+        """
			
 
				+        调用 GLM-OCR API 进行识别
			
 
				+        
			
 
				+        Args:
			
 
				+            image: PIL图片
			
 
				+            task_type: 任务类型
			
 
				+            
			
 
				+        Returns:
			
 
				+            识别结果文本
			
 
				+        """
			
 
				+        if self.session is None:
			
 
				+            raise RuntimeError("HTTP session not initialized")
			
 
				+        
			
 
				+        try:
			
 
				+            # 构建请求
			
 
				+            request_data = self._build_request_for_image(image, task_type)
			
 
				+            
			
 
				+            # 调用 API
			
 
				+            response = self.session.post(
			
 
				+                self.api_url,
			
 
				+                json=request_data,
			
 
				+                timeout=(self.connect_timeout, self.http_timeout),
			
 
				+                verify=self.verify_ssl
			
 
				+            )
			
 
				+            
			
 
				+            if response.status_code != 200:
			
 
				+                logger.error(f"OCR API returned status {response.status_code}: {response.text}")
			
 
				+                return ""
			
 
				+            
			
 
				+            # 解析响应
			
 
				+            result = response.json()
			
 
				+            
			
 
				+            # 提取识别结果
			
 
				+            if 'choices' in result and len(result['choices']) > 0:
			
 
				+                content = result['choices'][0].get('message', {}).get('content', '')
			
 
				+                return content
			
 
				+            
			
 
				+            logger.warning(f"No content in OCR response: {result}")
			
 
				+            return ""
			
 
				+            
			
 
				+        except requests.exceptions.Timeout:
			
 
				+            logger.error(f"OCR API timeout after {self.http_timeout}s")
			
 
				+            return ""
			
 
				+        except requests.exceptions.RequestException as e:
			
 
				+            logger.error(f"OCR API request failed: {e}")
			
 
				+            return ""
			
 
				+        except json.JSONDecodeError as e:
			
 
				+            logger.error(f"Failed to parse OCR response: {e}")
			
 
				+            return ""
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"OCR API call failed: {e}")
			
 
				+            return ""
			
 
				+    
			
 
				+    def recognize_table(self, image: Union[np.ndarray, Image.Image], **kwargs) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        识别表格
			
 
				+        
			
 
				+        Args:
			
 
				+            image: 输入图片
			
 
				+            **kwargs: 额外参数（未使用）
			
 
				+            
			
 
				+        Returns:
			
 
				+            包含 'html' 和 'markdown' 的字典
			
 
				+        """
			
 
				+        try:
			
 
				+            # 预处理图片
			
 
				+            image = self._preprocess_image(image)
			
 
				+            
			
 
				+            # 调用 API
			
 
				+            result_text = self._call_ocr_api(image, 'table')
			
 
				+            
			
 
				+            if not result_text:
			
 
				+                return {'html': '', 'markdown': '', 'cells': []}
			
 
				+            
			
 
				+            # GLM-OCR 默认返回 Markdown 格式
			
 
				+            # 如果需要 HTML，可以使用简单的转换（或保持 Markdown）
			
 
				+            return {
			
 
				+                'html': result_text,  # GLM-OCR 可能返回 HTML 或 Markdown
			
 
				+                'markdown': result_text,
			
 
				+                'cells': [],  # GLM-OCR 不直接返回单元格坐标
			
 
				+            }
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"❌ Table recognition failed: {e}")
			
 
				+            return {'html': '', 'markdown': '', 'cells': []}
			
 
				+    
			
 
				+    def recognize_formula(self, image: Union[np.ndarray, Image.Image], **kwargs) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        识别公式
			
 
				+        
			
 
				+        Args:
			
 
				+            image: 输入图片
			
 
				+            **kwargs: 额外参数（未使用）
			
 
				+            
			
 
				+        Returns:
			
 
				+            包含 'latex' 的字典
			
 
				+        """
			
 
				+        try:
			
 
				+            # 预处理图片
			
 
				+            image = self._preprocess_image(image)
			
 
				+            
			
 
				+            # 调用 API
			
 
				+            result_text = self._call_ocr_api(image, 'formula')
			
 
				+            
			
 
				+            if not result_text:
			
 
				+                return {'latex': '', 'confidence': 0.0, 'raw': {}}
			
 
				+            
			
 
				+            # 清理 LaTeX 格式（移除 markdown 代码块标记）
			
 
				+            latex = self._clean_latex(result_text)
			
 
				+            
			
 
				+            return {
			
 
				+                'latex': latex,
			
 
				+                'confidence': 0.9 if latex else 0.0,
			
 
				+                'raw': {'raw_output': result_text}
			
 
				+            }
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"❌ Formula recognition failed: {e}")
			
 
				+            return {'latex': '', 'confidence': 0.0, 'raw': {}}
			
 
				+    
			
 
				+    def recognize_text(self, image: Union[np.ndarray, Image.Image], **kwargs) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        识别文本区域（包括普通文本和印章）
			
 
				+        
			
 
				+        Args:
			
 
				+            image: 输入图片
			
 
				+            **kwargs: 额外参数，可包含 'element_type' 指定类型（如 'seal'）
			
 
				+            
			
 
				+        Returns:
			
 
				+            包含 'text' 的字典
			
 
				+        """
			
 
				+        try:
			
 
				+            # 预处理图片
			
 
				+            image = self._preprocess_image(image)
			
 
				+            
			
 
				+            # 确定任务类型（如果是 seal，使用 seal 提示词）
			
 
				+            element_type = kwargs.get('element_type', 'text')
			
 
				+            task_type = 'seal' if element_type == 'seal' else 'text'
			
 
				+            
			
 
				+            # 调用 API
			
 
				+            result_text = self._call_ocr_api(image, task_type)
			
 
				+            
			
 
				+            return {
			
 
				+                'text': result_text or '',
			
 
				+                'confidence': 0.9 if result_text else 0.0
			
 
				+            }
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"❌ Text recognition failed: {e}")
			
 
				+            return {'text': '', 'confidence': 0.0}
			
 
				+    
			
 
				+    def _clean_latex(self, latex_str: str) -> str:
			
 
				+        """
			
 
				+        清理 LaTeX 字符串，移除 Markdown 代码块标记
			
 
				+        
			
 
				+        Args:
			
 
				+            latex_str: 原始 LaTeX 字符串
			
 
				+            
			
 
				+        Returns:
			
 
				+            清理后的 LaTeX
			
 
				+        """
			
 
				+        if not latex_str:
			
 
				+            return ""
			
 
				+        
			
 
				+        # 移除 Markdown 代码块标记
			
 
				+        latex_str = latex_str.strip()
			
 
				+        if latex_str.startswith('```'):
			
 
				+            lines = latex_str.split('\n')
			
 
				+            # 移除第一行的 ```latex 或 ```
			
 
				+            if lines[0].startswith('```'):
			
 
				+                lines = lines[1:]
			
 
				+            # 移除最后一行的 ```
			
 
				+            if lines and lines[-1].strip() == '```':
			
 
				+                lines = lines[:-1]
			
 
				+            latex_str = '\n'.join(lines)
			
 
				+        
			
 
				+        # 移除行内代码标记
			
 
				+        if latex_str.startswith('`') and latex_str.endswith('`'):
			
 
				+            latex_str = latex_str[1:-1]
			
 
				+        
			
 
				+        # 移除常见的 LaTeX 包裹符号
			
 
				+        latex_str = latex_str.strip()
			
 
				+        if latex_str.startswith('$') and latex_str.endswith('$'):
			
 
				+            # 移除单个 $ 或 $$
			
 
				+            if latex_str.startswith('$$') and latex_str.endswith('$$'):
			
 
				+                latex_str = latex_str[2:-2]
			
 
				+            else:
			
 
				+                latex_str = latex_str[1:-1]
			
 
				+        
			
 
				+        return latex_str.strip()
			
 
				+
			
 
				+
			
 
				+# 导出适配器类
			
 
				+__all__ = ['GLMOCRVLRecognizer']
			
--- a/ocr_tools/universal_doc_parser/models/adapters/pp_doclayout_v3_layout_adapter.py
+++ b/ocr_tools/universal_doc_parser/models/adapters/pp_doclayout_v3_layout_adapter.py
@@ -77,7 +77,7 @@ class PPDocLayoutV3Detector(BaseLayoutDetector):
 
				         "paragraph_title": "title",
			
 
				         "reference": "text",
			
 
				         "reference_content": "text",
			
 
				-        "seal": "image_body",
			
 
				+        "seal": "seal",  # 🔧 修改：保留 seal 作为独立类别，用于 VLM 识别
			
 
				         "table": "table_body",
			
 
				         "text": "text",
			
 
				         "vision_footnote": "page_footnote",
			
--- a/ocr_tools/universal_doc_parser/tests/test_glmocr_adapter.py
+++ b/ocr_tools/universal_doc_parser/tests/test_glmocr_adapter.py
@@ -0,0 +1,188 @@
 
				+#!/opt/miniconda3/envs/mineru2/bin/python
			
 
				+"""测试 GLM-OCR 适配器加载
			
 
				+
			
 
				+验证：
			
 
				+1. 适配器类可以正确导入
			
 
				+2. 配置文件可以正确解析
			
 
				+3. 适配器可以正确初始化
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+from pathlib import Path
			
 
				+
			
 
				+# 添加项目根目录到路径
			
 
				+project_root = Path(__file__).parents[1]
			
 
				+sys.path.insert(0, str(project_root))
			
 
				+
			
 
				+from loguru import logger
			
 
				+
			
 
				+def test_import_adapter():
			
 
				+    """测试导入适配器"""
			
 
				+    logger.info("测试 1: 导入 GLM-OCR 适配器...")
			
 
				+    try:
			
 
				+        from models.adapters import GLMOCRVLRecognizer
			
 
				+        logger.success("✅ GLMOCRVLRecognizer 导入成功")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"❌ 导入失败: {e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def test_load_config():
			
 
				+    """测试加载配置文件"""
			
 
				+    logger.info("测试 2: 加载配置文件...")
			
 
				+    try:
			
 
				+        import yaml
			
 
				+        # 修正配置文件路径
			
 
				+        config_path = project_root / "config" / "bank_statement_glm_vl.yaml"
			
 
				+        if not config_path.exists():
			
 
				+            # 尝试其他可能的路径
			
 
				+            config_path = Path(__file__).parent / "config" / "bank_statement_glm_vl.yaml"
			
 
				+        
			
 
				+        if not config_path.exists():
			
 
				+            logger.warning(f"⚠️  配置文件不存在: {config_path}")
			
 
				+            logger.warning("跳过配置文件测试")
			
 
				+            return True, None  # 不算失败
			
 
				+        
			
 
				+        with open(config_path, 'r', encoding='utf-8') as f:
			
 
				+            config = yaml.safe_load(f)
			
 
				+        
			
 
				+        logger.info(f"配置场景: {config.get('scene_name')}")
			
 
				+        logger.info(f"VL模块: {config.get('vl_recognition', {}).get('module')}")
			
 
				+        logger.info(f"Layout模块: {config.get('layout_detection', {}).get('module')}")
			
 
				+        
			
 
				+        logger.success("✅ 配置文件加载成功")
			
 
				+        return True, config
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"❌ 配置加载失败: {e}")
			
 
				+        return False, None
			
 
				+
			
 
				+
			
 
				+def test_create_adapter():
			
 
				+    """测试创建适配器实例"""
			
 
				+    logger.info("测试 3: 创建适配器实例...")
			
 
				+    try:
			
 
				+        from models.adapters import GLMOCRVLRecognizer
			
 
				+        
			
 
				+        # 简化的配置
			
 
				+        config = {
			
 
				+            'module': 'glmocr',
			
 
				+            'api_url': 'http://10.192.72.11:20036/v1/chat/completions',
			
 
				+            'model': 'glm-ocr',
			
 
				+            'max_image_size': 3500,
			
 
				+            'resize_mode': 'max',
			
 
				+            'task_prompt_mapping': {
			
 
				+                'text': 'Text Recognition:',
			
 
				+                'table': 'Table Recognition:',
			
 
				+                'formula': 'Formula Recognition:',
			
 
				+                'seal': 'Seal Recognition:',
			
 
				+            },
			
 
				+            'model_params': {
			
 
				+                'connection_pool_size': 128,
			
 
				+                'http_timeout': 300,
			
 
				+                'retry_max_attempts': 2,
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+        recognizer = GLMOCRVLRecognizer(config)
			
 
				+        logger.info(f"适配器类型: {type(recognizer)}")
			
 
				+        logger.info(f"最大图片尺寸: {recognizer.max_image_size}")
			
 
				+        logger.info(f"任务提示词: {recognizer.task_prompt_mapping}")
			
 
				+        
			
 
				+        logger.success("✅ 适配器实例创建成功")
			
 
				+        return True, recognizer
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"❌ 适配器创建失败: {e}")
			
 
				+        import traceback
			
 
				+        traceback.print_exc()
			
 
				+        return False, None
			
 
				+
			
 
				+
			
 
				+def test_initialize_adapter(recognizer):
			
 
				+    """测试初始化适配器（需要 API 服务可用）"""
			
 
				+    logger.info("测试 4: 初始化适配器（需要 GLM-OCR API 服务）...")
			
 
				+    try:
			
 
				+        recognizer.initialize()
			
 
				+        logger.success("✅ 适配器初始化成功")
			
 
				+        logger.info(f"HTTP Session: {recognizer.session}")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        logger.warning(f"⚠️  适配器初始化失败（可能是 API 服务不可用）: {e}")
			
 
				+        import traceback
			
 
				+        traceback.print_exc()
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def test_model_factory():
			
 
				+    """测试通过工厂创建适配器"""
			
 
				+    logger.info("测试 5: 通过 ModelFactory 创建适配器...")
			
 
				+    try:
			
 
				+        from core.model_factory import ModelFactory
			
 
				+        
			
 
				+        config = {
			
 
				+            'module': 'glmocr',
			
 
				+            'api_url': 'http://10.192.72.11:20036/v1/chat/completions',
			
 
				+            'model': 'glm-ocr',
			
 
				+            'max_image_size': 3500,
			
 
				+            'model_params': {
			
 
				+                'connection_pool_size': 128,
			
 
				+                'http_timeout': 300,
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+        recognizer = ModelFactory.create_vl_recognizer(config)
			
 
				+        logger.info(f"适配器类型: {type(recognizer).__name__}")
			
 
				+        logger.success("✅ ModelFactory 创建适配器成功")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"❌ ModelFactory 创建失败: {e}")
			
 
				+        import traceback
			
 
				+        traceback.print_exc()
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主测试函数"""
			
 
				+    logger.info("="*60)
			
 
				+    logger.info("开始测试 GLM-OCR 适配器")
			
 
				+    logger.info("="*60)
			
 
				+    
			
 
				+    results = []
			
 
				+    
			
 
				+    # 测试 1: 导入
			
 
				+    results.append(("导入适配器", test_import_adapter()))
			
 
				+    
			
 
				+    # 测试 2: 加载配置
			
 
				+    success, config = test_load_config()
			
 
				+    results.append(("加载配置", success))
			
 
				+    
			
 
				+    # 测试 3: 创建实例
			
 
				+    success, recognizer = test_create_adapter()
			
 
				+    results.append(("创建实例", success))
			
 
				+    
			
 
				+    # 测试 4: 初始化（可选，需要 API 服务）
			
 
				+    if success and recognizer:
			
 
				+        init_success = test_initialize_adapter(recognizer)
			
 
				+        results.append(("初始化适配器", init_success))
			
 
				+    
			
 
				+    # 测试 5: 工厂方法
			
 
				+    results.append(("ModelFactory", test_model_factory()))
			
 
				+    
			
 
				+    # 汇总结果
			
 
				+    logger.info("="*60)
			
 
				+    logger.info("测试结果汇总:")
			
 
				+    logger.info("="*60)
			
 
				+    for test_name, result in results:
			
 
				+        status = "✅ 通过" if result else "❌ 失败"
			
 
				+        logger.info(f"{test_name:20s}: {status}")
			
 
				+    
			
 
				+    passed = sum(1 for _, r in results if r)
			
 
				+    total = len(results)
			
 
				+    logger.info(f"\n总计: {passed}/{total} 测试通过")
			
 
				+    
			
 
				+    return passed == total
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    success = main()
			
 
				+    sys.exit(0 if success else 1)
			
--- a/ocr_tools/universal_doc_parser/tests/test_glmocr_api.py
+++ b/ocr_tools/universal_doc_parser/tests/test_glmocr_api.py
@@ -0,0 +1,135 @@
 
				+#!/opt/miniconda3/envs/mineru2/bin/python
			
 
				+"""测试 GLM-OCR API 实际调用
			
 
				+
			
 
				+验证：
			
 
				+1. 文本识别
			
 
				+2. API 请求和响应
			
 
				+"""
			
 
				+
			
 
				+import sys
			
 
				+from pathlib import Path
			
 
				+import numpy as np
			
 
				+from PIL import Image
			
 
				+
			
 
				+# 添加项目根目录到路径
			
 
				+project_root = Path(__file__).parents[1]
			
 
				+sys.path.insert(0, str(project_root))
			
 
				+
			
 
				+from loguru import logger
			
 
				+from models.adapters import GLMOCRVLRecognizer
			
 
				+
			
 
				+
			
 
				+def test_text_recognition():
			
 
				+    """测试文本识别（使用简单测试图片）"""
			
 
				+    logger.info("测试: 文本识别...")
			
 
				+    
			
 
				+    try:
			
 
				+        # 创建配置
			
 
				+        config = {
			
 
				+            'module': 'glmocr',
			
 
				+            'api_url': 'http://10.192.72.11:20036/v1/chat/completions',
			
 
				+            'model': 'glm-ocr',
			
 
				+            'max_image_size': 3500,
			
 
				+            'resize_mode': 'max',
			
 
				+            'task_prompt_mapping': {
			
 
				+                'text': 'Text Recognition:',
			
 
				+            },
			
 
				+            'model_params': {
			
 
				+                'connection_pool_size': 128,
			
 
				+                'http_timeout': 300,
			
 
				+            }
			
 
				+        }
			
 
				+        
			
 
				+        # 创建识别器
			
 
				+        recognizer = GLMOCRVLRecognizer(config)
			
 
				+        recognizer.initialize()
			
 
				+        
			
 
				+        # 创建简单测试图片（白底黑字）
			
 
				+        img = Image.new('RGB', (200, 100), color='white')
			
 
				+        # 注意：这只是一个占位图片，实际识别需要有文字的图片
			
 
				+        
			
 
				+        logger.info("调用 recognize_text()...")
			
 
				+        result = recognizer.recognize_text(img)
			
 
				+        
			
 
				+        logger.info(f"识别结果: {result}")
			
 
				+        logger.info(f"文本内容: {result.get('text', '')[:100]}")
			
 
				+        logger.info(f"置信度: {result.get('confidence', 0.0)}")
			
 
				+        
			
 
				+        recognizer.cleanup()
			
 
				+        
			
 
				+        if result.get('text') is not None:
			
 
				+            logger.success("✅ 文本识别测试通过")
			
 
				+            return True
			
 
				+        else:
			
 
				+            logger.warning("⚠️  未获取到识别结果（可能是测试图片为空）")
			
 
				+            return True  # 不算失败
			
 
				+            
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"❌ 文本识别测试失败: {e}")
			
 
				+        import traceback
			
 
				+        traceback.print_exc()
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def test_api_connection():
			
 
				+    """测试 API 连接"""
			
 
				+    logger.info("测试: API 连接...")
			
 
				+    
			
 
				+    try:
			
 
				+        import requests
			
 
				+        
			
 
				+        api_url = 'http://10.192.72.11:20036/v1/chat/completions'
			
 
				+        
			
 
				+        # 发送简单健康检查请求
			
 
				+        response = requests.get(
			
 
				+            api_url.replace('/v1/chat/completions', '/health'),
			
 
				+            timeout=10
			
 
				+        )
			
 
				+        
			
 
				+        if response.status_code == 200:
			
 
				+            logger.success("✅ API 服务可访问")
			
 
				+            return True
			
 
				+        else:
			
 
				+            logger.warning(f"⚠️  API 返回状态码: {response.status_code}")
			
 
				+            return True  # 不算失败，可能不支持 /health 端点
			
 
				+            
			
 
				+    except requests.exceptions.ConnectionError:
			
 
				+        logger.warning("⚠️  无法连接到 API 服务（可能未启动）")
			
 
				+        return True  # 不算失败
			
 
				+    except Exception as e:
			
 
				+        logger.warning(f"⚠️  API 连接测试异常: {e}")
			
 
				+        return True  # 不算失败
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主测试函数"""
			
 
				+    logger.info("="*60)
			
 
				+    logger.info("GLM-OCR API 实际调用测试")
			
 
				+    logger.info("="*60)
			
 
				+    
			
 
				+    results = []
			
 
				+    
			
 
				+    # 测试 API 连接
			
 
				+    results.append(("API连接", test_api_connection()))
			
 
				+    
			
 
				+    # 测试文本识别
			
 
				+    results.append(("文本识别", test_text_recognition()))
			
 
				+    
			
 
				+    # 汇总结果
			
 
				+    logger.info("="*60)
			
 
				+    logger.info("测试结果汇总:")
			
 
				+    logger.info("="*60)
			
 
				+    for test_name, result in results:
			
 
				+        status = "✅ 通过" if result else "❌ 失败"
			
 
				+        logger.info(f"{test_name:20s}: {status}")
			
 
				+    
			
 
				+    passed = sum(1 for _, r in results if r)
			
 
				+    total = len(results)
			
 
				+    logger.info(f"\n总计: {passed}/{total} 测试通过")
			
 
				+    
			
 
				+    return passed == total
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    success = main()
			
 
				+    sys.exit(0 if success else 1)
			
--- a/ocr_utils/json_formatters.py
+++ b/ocr_utils/json_formatters.py
@@ -180,6 +180,20 @@ class JSONFormatters:
 
				                     }]
			
 
				                 }]
			
 
				         
			
 
				+        # 印章类型
			
 
				+        elif elem_type == 'seal':
			
 
				+            text = content.get('text', '') if isinstance(content, dict) else str(content)
			
 
				+            confidence = content.get('confidence', 0.0) if isinstance(content, dict) else 0.0
			
 
				+            block['lines'] = [{
			
 
				+                'bbox': bbox,
			
 
				+                'spans': [{
			
 
				+                    'bbox': bbox,
			
 
				+                    'type': 'seal',
			
 
				+                    'content': text,
			
 
				+                    'confidence': confidence
			
 
				+                }]
			
 
				+            }]
			
 
				+        
			
 
				         # 丢弃类型
			
 
				         elif elem_type in ['abandon', 'discarded']:
			
 
				             block['type'] = 'abandon'
			
@@ -361,6 +375,12 @@ class JSONFormatters:
 
				             result['type'] = elem_type
			
 
				             result['text'] = content.get('text', '') if isinstance(content, dict) else str(content)
			
 
				         
			
 
				+        # 印章类型
			
 
				+        elif elem_type == 'seal':
			
 
				+            result['type'] = 'seal'
			
 
				+            result['text'] = content.get('text', '') if isinstance(content, dict) else str(content)
			
 
				+            result['confidence'] = content.get('confidence', 0.0) if isinstance(content, dict) else 0.0
			
 
				+        
			
 
				         # 丢弃元素
			
 
				         elif elem_type in ['discarded', 'abandon']:
			
 
				             result['type'] = 'discarded'
			
--- a/ocr_utils/markdown_generator.py
+++ b/ocr_utils/markdown_generator.py
@@ -276,6 +276,12 @@ pages: {len(results.get('pages', []))}
 
				                         else:
			
 
				                             md_lines.append(f"*{text}*")
			
 
				                         md_lines.append("")
			
 
				+                
			
 
				+                elif elem_type == 'seal':
			
 
				+                    text = content.get('text', '') if isinstance(content, dict) else str(content)
			
 
				+                    if text:
			
 
				+                        md_lines.append(f"🔖 **[印章]** {text}")
			
 
				+                        md_lines.append("")
			
 
				         
			
 
				         return '\n'.join(md_lines)
			
 
				     
			
@@ -371,6 +377,13 @@ pages: {len(results.get('pages', []))}
 
				                         md_lines.append(f"*{text}*")
			
 
				                     md_lines.append("")
			
 
				             
			
 
				+            elif elem_type == 'seal':
			
 
				+                text = content.get('text', '') if isinstance(content, dict) else str(content)
			
 
				+                if text:
			
 
				+                    confidence = content.get('confidence', 0.0) if isinstance(content, dict) else 0.0
			
 
				+                    md_lines.append(f"🔖 **[印章]** {text} _(置信度: {confidence:.2f})_")
			
 
				+                    md_lines.append("")
			
 
				+            
			
 
				             elif elem_type == 'discarded':
			
 
				                 text = content.get('text', '') if isinstance(content, dict) else ''
			
 
				                 if text:
			
--- a/ocr_validator/config/A用户_单元格扫描流水.yaml
+++ b/ocr_validator/config/A用户_单元格扫描流水.yaml
@@ -4,6 +4,13 @@ document:
 
				 
			
 
				   # 🎯 关键改进：定义该文档使用的 OCR 工具及其结果目录
			
 
				   ocr_results:
			
 
				+    # bank_statement_yusys_v4
			
 
				+    - tool: "mineru"
			
 
				+      result_dir: "bank_statement_yusys_v4"
			
 
				+      image_dir: "bank_statement_yusys_v4/{{name}}"
			
 
				+      description: "YUSYS-OCR框架 v4.0 GLM-OCR"
			
 
				+      enabled: true
			
 
				+
			
 
				     # bank_statement_yusys_v3
			
 
				     - tool: "mineru"
			
 
				       result_dir: "bank_statement_yusys_v3"
			
--- a/ocr_validator/config/B用户_扫描流水.yaml
+++ b/ocr_validator/config/B用户_扫描流水.yaml
@@ -4,6 +4,13 @@ document:
 
				   
			
 
				   # 🎯 关键改进：定义该文档使用的 OCR 工具及其结果目录
			
 
				   ocr_results:
			
 
				+    # bank_statement_yusys_v4
			
 
				+    - tool: "mineru"
			
 
				+      result_dir: "bank_statement_yusys_v4"
			
 
				+      image_dir: "bank_statement_yusys_v4/{{name}}"
			
 
				+      description: "YUSYS-OCR框架 v4.0 GLM-OCR"
			
 
				+      enabled: true
			
 
				+
			
 
				     # bank_statement_yusys_v3
			
 
				     - tool: "mineru"
			
 
				       result_dir: "bank_statement_yusys_v3"
			
--- a/ocr_validator/config/global.yaml
+++ b/ocr_validator/config/global.yaml
@@ -160,5 +160,3 @@ data_sources:
 
				   - 张_微信图.yaml
			
 
				   - 付_工商银行943825图.yaml
			
 
				   - 许_民生银行图.yaml
			
 
				-
			
 
				-
			
--- a/ocr_validator/config/乔_建设银行图.yaml
+++ b/ocr_validator/config/乔_建设银行图.yaml
@@ -4,6 +4,13 @@ document:
 
				   
			
 
				   # 🎯 关键改进：定义该文档使用的 OCR 工具及其结果目录
			
 
				   ocr_results:
			
 
				+    # bank_statement_yusys_v4
			
 
				+    - tool: "mineru"
			
 
				+      result_dir: "bank_statement_yusys_v4"
			
 
				+      image_dir: "bank_statement_yusys_v4/{{name}}"
			
 
				+      description: "YUSYS-OCR框架 v4.0 GLM-OCR"
			
 
				+      enabled: true
			
 
				+
			
 
				     # bank_statement_yusys_v3
			
 
				     - tool: "mineru"
			
 
				       result_dir: "bank_statement_yusys_v3"
			
--- a/ocr_validator/config/付_工商银行943825图.yaml
+++ b/ocr_validator/config/付_工商银行943825图.yaml
@@ -4,6 +4,13 @@ document:
 
				   
			
 
				   # 🎯 关键改进：定义该文档使用的 OCR 工具及其结果目录
			
 
				   ocr_results:
			
 
				+    # bank_statement_yusys_v4
			
 
				+    - tool: "mineru"
			
 
				+      result_dir: "bank_statement_yusys_v4"
			
 
				+      image_dir: "bank_statement_yusys_v4/{{name}}"
			
 
				+      description: "YUSYS-OCR框架 v4.0 GLM-OCR"
			
 
				+      enabled: true
			
 
				+
			
 
				     # bank_statement_yusys_v3
			
 
				     - tool: "mineru"
			
 
				       result_dir: "bank_statement_yusys_v3"
			
--- a/ocr_validator/config/对公_招商银行图.yaml
+++ b/ocr_validator/config/对公_招商银行图.yaml
@@ -4,6 +4,13 @@ document:
 
				   
			
 
				   # 🎯 关键改进：定义该文档使用的 OCR 工具及其结果目录
			
 
				   ocr_results:
			
 
				+    # bank_statement_yusys_v4
			
 
				+    - tool: "mineru"
			
 
				+      result_dir: "bank_statement_yusys_v4"
			
 
				+      image_dir: "bank_statement_yusys_v4/{{name}}"
			
 
				+      description: "YUSYS-OCR框架 v4.0 GLM-OCR"
			
 
				+      enabled: true
			
 
				+
			
 
				     # bank_statement_yusys_v3
			
 
				     - tool: "mineru"
			
 
				       result_dir: "bank_statement_yusys_v3"
			
--- a/ocr_validator/config/山西云集科技有限公司.yaml
+++ b/ocr_validator/config/山西云集科技有限公司.yaml
@@ -4,6 +4,13 @@ document:
 
				   
			
 
				   # 🎯 关键改进：定义该文档使用的 OCR 工具及其结果目录
			
 
				   ocr_results:
			
 
				+    # bank_statement_yusys_v4
			
 
				+    - tool: "mineru"
			
 
				+      result_dir: "bank_statement_yusys_v4"
			
 
				+      image_dir: "bank_statement_yusys_v4/{{name}}"
			
 
				+      description: "YUSYS-OCR框架 v4.0 GLM-OCR"
			
 
				+      enabled: true
			
 
				+
			
 
				     # bank_statement_yusys_v3
			
 
				     - tool: "mineru"
			
 
				       result_dir: "bank_statement_yusys_v3"
			
--- a/ocr_validator/config/康强_北京农村商业银行.yaml
+++ b/ocr_validator/config/康强_北京农村商业银行.yaml
@@ -5,6 +5,13 @@ document:
 
				   
			
 
				   # 🎯 关键改进：定义该文档使用的 OCR 工具及其结果目录
			
 
				   ocr_results:
			
 
				+    # bank_statement_yusys_v4
			
 
				+    - tool: "mineru"
			
 
				+      result_dir: "bank_statement_yusys_v4"
			
 
				+      image_dir: "bank_statement_yusys_v4/{{name}}"
			
 
				+      description: "YUSYS-OCR框架 v4.0 GLM-OCR"
			
 
				+      enabled: true
			
 
				+
			
 
				     # bank_statement_yusys_v3
			
 
				     - tool: "mineru"
			
 
				       result_dir: "bank_statement_yusys_v3"
			
--- a/ocr_validator/config/张_微信图.yaml
+++ b/ocr_validator/config/张_微信图.yaml
@@ -4,6 +4,13 @@ document:
 
				   
			
 
				   # 🎯 关键改进：定义该文档使用的 OCR 工具及其结果目录
			
 
				   ocr_results:
			
 
				+    # bank_statement_yusys_v4
			
 
				+    - tool: "mineru"
			
 
				+      result_dir: "bank_statement_yusys_v4"
			
 
				+      image_dir: "bank_statement_yusys_v4/{{name}}"
			
 
				+      description: "YUSYS-OCR框架 v4.0 GLM-OCR"
			
 
				+      enabled: true
			
 
				+
			
 
				     # bank_statement_yusys_v3
			
 
				     - tool: "mineru"
			
 
				       result_dir: "bank_statement_yusys_v3"
			
--- a/ocr_validator/config/德_内蒙古银行照.yaml
+++ b/ocr_validator/config/德_内蒙古银行照.yaml
@@ -5,6 +5,13 @@ document:
 
				   
			
 
				   # 🎯 关键改进：定义该文档使用的 OCR 工具及其结果目录
			
 
				   ocr_results:
			
 
				+    # bank_statement_yusys_v4
			
 
				+    - tool: "mineru"
			
 
				+      result_dir: "bank_statement_yusys_v4"
			
 
				+      image_dir: "bank_statement_yusys_v4/{{name}}"
			
 
				+      description: "YUSYS-OCR框架 v4.0 GLM-OCR"
			
 
				+      enabled: true
			
 
				+
			
 
				     # bank_statement_yusys_v3
			
 
				     - tool: "mineru"
			
 
				       result_dir: "bank_statement_yusys_v3"
			
--- a/ocr_validator/config/提取自赤峰黄金2023年报.yaml
+++ b/ocr_validator/config/提取自赤峰黄金2023年报.yaml
@@ -4,6 +4,13 @@ document:
 
				   
			
 
				   # 🎯 关键改进：定义该文档使用的 OCR 工具及其结果目录
			
 
				   ocr_results:
			
 
				+    # bank_statement_yusys_v4
			
 
				+    - tool: "mineru"
			
 
				+      result_dir: "bank_statement_yusys_v4"
			
 
				+      image_dir: "bank_statement_yusys_v4/{{name}}"
			
 
				+      description: "YUSYS-OCR框架 v4.0 GLM-OCR"
			
 
				+      enabled: true
			
 
				+
			
 
				     # bank_statement_yusys_v3
			
 
				     - tool: "mineru"
			
 
				       result_dir: "bank_statement_yusys_v3"
			
--- a/ocr_validator/config/施博深.yaml
+++ b/ocr_validator/config/施博深.yaml
@@ -4,6 +4,13 @@ document:
 
				   
			
 
				   # 🎯 关键改进：定义该文档使用的 OCR 工具及其结果目录
			
 
				   ocr_results:
			
 
				+    # bank_statement_yusys_v4
			
 
				+    - tool: "mineru"
			
 
				+      result_dir: "bank_statement_yusys_v4"
			
 
				+      image_dir: "bank_statement_yusys_v4/{{name}}"
			
 
				+      description: "YUSYS-OCR框架 v4.0 GLM-OCR"
			
 
				+      enabled: true
			
 
				+
			
 
				     # bank_statement_yusys_v3
			
 
				     - tool: "mineru"
			
 
				       result_dir: "bank_statement_yusys_v3"
			
--- a/ocr_validator/config/朱_中信银行图.yaml
+++ b/ocr_validator/config/朱_中信银行图.yaml
@@ -4,6 +4,13 @@ document:
 
				   
			
 
				   # 🎯 关键改进：定义该文档使用的 OCR 工具及其结果目录
			
 
				   ocr_results:
			
 
				+    # bank_statement_yusys_v4
			
 
				+    - tool: "mineru"
			
 
				+      result_dir: "bank_statement_yusys_v4"
			
 
				+      image_dir: "bank_statement_yusys_v4/{{name}}"
			
 
				+      description: "YUSYS-OCR框架 v4.0 GLM-OCR"
			
 
				+      enabled: true
			
 
				+
			
 
				     # bank_statement_yusys_v3
			
 
				     - tool: "mineru"
			
 
				       result_dir: "bank_statement_yusys_v3"
			
--- a/ocr_validator/config/湛_平安银行图.yaml
+++ b/ocr_validator/config/湛_平安银行图.yaml
@@ -4,6 +4,13 @@ document:
 
				   
			
 
				   # 🎯 关键改进：定义该文档使用的 OCR 工具及其结果目录
			
 
				   ocr_results:
			
 
				+    # bank_statement_yusys_v4
			
 
				+    - tool: "mineru"
			
 
				+      result_dir: "bank_statement_yusys_v4"
			
 
				+      image_dir: "bank_statement_yusys_v4/{{name}}"
			
 
				+      description: "YUSYS-OCR框架 v4.0 GLM-OCR"
			
 
				+      enabled: true
			
 
				+
			
 
				     # bank_statement_yusys_v3
			
 
				     - tool: "mineru"
			
 
				       result_dir: "bank_statement_yusys_v3"
			
--- a/ocr_validator/config/至远彩色_2023年报.yaml
+++ b/ocr_validator/config/至远彩色_2023年报.yaml
@@ -4,6 +4,13 @@ document:
 
				   
			
 
				   # 🎯 关键改进：定义该文档使用的 OCR 工具及其结果目录
			
 
				   ocr_results:
			
 
				+    # bank_statement_yusys_v4
			
 
				+    - tool: "mineru"
			
 
				+      result_dir: "bank_statement_yusys_v4"
			
 
				+      image_dir: "bank_statement_yusys_v4/{{name}}"
			
 
				+      description: "YUSYS-OCR框架 v4.0 GLM-OCR"
			
 
				+      enabled: true
			
 
				+
			
 
				     # bank_statement_yusys_v3
			
 
				     - tool: "mineru"
			
 
				       result_dir: "bank_statement_yusys_v3"
			
@@ -13,8 +20,8 @@ document:
 
				 
			
 
				     # bank_statement_yusys_v2
			
 
				     - tool: "mineru"
			
 
				-      result_dir: "bank_statement_yusys_v2"
			
 
				-      image_dir: "mineru_vllm_results/{{name}}"
			
 
				+      result_dir: "bank_statement_glm_vl"
			
 
				+      image_dir: "bank_statement_glm_vl/{{name}}"
			
 
				       description: "YUSYS统一OCR框架"
			
 
				       enabled: true
			
 
				 
			
--- a/ocr_validator/config/许_民生银行图.yaml
+++ b/ocr_validator/config/许_民生银行图.yaml
@@ -4,6 +4,13 @@ document:
 
				   
			
 
				   # 🎯 关键改进：定义该文档使用的 OCR 工具及其结果目录
			
 
				   ocr_results:
			
 
				+    # bank_statement_yusys_v4
			
 
				+    - tool: "mineru"
			
 
				+      result_dir: "bank_statement_yusys_v4"
			
 
				+      image_dir: "bank_statement_yusys_v4/{{name}}"
			
 
				+      description: "YUSYS-OCR框架 v4.0 GLM-OCR"
			
 
				+      enabled: true
			
 
				+
			
 
				     # bank_statement_yusys_v3
			
 
				     - tool: "mineru"
			
 
				       result_dir: "bank_statement_yusys_v3"
Autor	SHA1 Permisiunea de a trimite mesaje. Dacă este dezactivată, utilizatorul nu va putea trimite nici un fel de mesaj	Data
zhch158_admin	0e20f6612e feat(config): 更新默认配置，切换至 GLM-VL 模型，调整页面范围和日志文件路径	1 lună în urmă
zhch158_admin	85626ae88e feat(config): 添加 YUSYS-OCR v4.0 支持，更新多个文档的 OCR 工具配置	1 lună în urmă
zhch158_admin	451b26652d feat(markdown_generator): 添加印章类型支持，增强 Markdown 输出功能	1 lună în urmă
zhch158_admin	90fc1b8ed4 feat(tests): 添加 GLM-OCR 适配器和 API 测试用例，验证适配器加载和 API 调用	1 lună în urmă
zhch158_admin	371113b468 feat(adapter): 添加 GLM-OCR 适配器支持，增强模型适配器功能	1 lună în urmă
zhch158_admin	b24aaa17be fix(adapter): 修改 seal 类别为独立类别，以支持 VLM 识别	1 lună în urmă
zhch158_admin	cb2803c537 feat(adapter): 添加 GLM-OCR VL 识别适配器，支持表格、公式、文本和印章识别	1 lună în urmă
zhch158_admin	e126aaed5a feat(element_processors): 添加印章元素处理功能，支持 VLM 识别	1 lună în urmă
zhch158_admin	9292deaf3d feat(config): 添加银行交易流水V4场景配置，支持多种格式和GLM-OCR识别	1 lună în urmă
zhch158_admin	a92691ddbb feat(config): 添加银行交易流水场景配置，支持GLM-OCR进行VL识别	1 lună în urmă
zhch158_admin	1594cd3e94 feat(processor_configs): 添加yusys_ocr_v4处理器配置，支持银行对账单处理	1 lună în urmă
zhch158_admin	2c1f098ff0 feat(glmocr_vllm_daemon): 添加GLM-OCR vLLM服务守护进程脚本，支持模型启动、停止和状态检查	1 lună în urmă