Просмотр исходного кода

feat: add editor and template services for managing table structures and templates

- Implemented EditorService to handle image and JSON uploads, process OCR data, and save results.
- Added TemplateService for creating, listing, retrieving, and deleting table line templates.
- Introduced multiple template files for different users and scenarios, including relative and absolute coordinates for line placements.
- Enhanced error handling and logging for better traceability during processing.
zhch158_admin 1 неделя назад
Родитель
Сommit
0ac3fe879a
26 измененных файлов с 3075 добавлено и 0 удалено
  1. 7 0
      table_line_generator/backend/api/__init__.py
  2. 172 0
      table_line_generator/backend/api/batch.py
  3. 143 0
      table_line_generator/backend/api/config.py
  4. 212 0
      table_line_generator/backend/api/directory.py
  5. 207 0
      table_line_generator/backend/api/editor.py
  6. 157 0
      table_line_generator/backend/api/template.py
  7. 21 0
      table_line_generator/backend/config/data_sources/B用户_扫描流水.yaml
  8. 42 0
      table_line_generator/backend/config/data_sources/README.md
  9. 21 0
      table_line_generator/backend/config/data_sources/康强_北京农村商业银行_mineru.yaml
  10. 21 0
      table_line_generator/backend/config/data_sources/康强_北京农村商业银行_ppstructure.yaml
  11. 21 0
      table_line_generator/backend/config/data_sources/康强_北京农村商业银行_yusys_ocr.yaml
  12. 21 0
      table_line_generator/backend/config/data_sources/施博深.yaml
  13. 92 0
      table_line_generator/backend/main.py
  14. 27 0
      table_line_generator/backend/models/__init__.py
  15. 188 0
      table_line_generator/backend/models/schemas.py
  16. 19 0
      table_line_generator/backend/requirements.txt
  17. 204 0
      table_line_generator/backend/services/BATCH_README.md
  18. 7 0
      table_line_generator/backend/services/__init__.py
  19. 400 0
      table_line_generator/backend/services/batch_service.py
  20. 271 0
      table_line_generator/backend/services/editor_service.py
  21. 313 0
      table_line_generator/backend/services/template_service.py
  22. 135 0
      table_line_generator/backend/templates/B用户_扫描流水.template.json
  23. 143 0
      table_line_generator/backend/templates/B用户_扫描流水_v2.template.json
  24. 85 0
      table_line_generator/backend/templates/康强_北京农村商业银行.template.json
  25. 71 0
      table_line_generator/backend/templates/施博深_YUSYS统一OCR框架-v2.template.json
  26. 75 0
      table_line_generator/backend/templates/施博深_page_001.template.json

+ 7 - 0
table_line_generator/backend/api/__init__.py

@@ -0,0 +1,7 @@
+"""
+Backend API package
+"""
+
+from .editor import router as editor_router
+
+__all__ = ['editor_router']

+ 172 - 0
table_line_generator/backend/api/batch.py

@@ -0,0 +1,172 @@
+"""
+批量处理 API
+"""
+
+from fastapi import APIRouter, BackgroundTasks, HTTPException
+from pydantic import BaseModel, Field
+from typing import List, Optional, Dict, Any
+from pathlib import Path
+from loguru import logger
+
+from services.batch_service import BatchProcessor
+from models.schemas import TableStructure, ImageSize
+
+
+router = APIRouter(prefix="/api/batch", tags=["batch"])
+
+
+class FilePair(BaseModel):
+    """文件对"""
+    json_path: str = Field(description="JSON 文件路径")
+    image_path: str = Field(description="图片文件路径")
+
+
+class BatchProcessRequest(BaseModel):
+    """批量处理请求"""
+    template_name: str = Field(description="要应用的模板名称")
+    file_pairs: List[FilePair] = Field(description="文件对列表")
+    output_dir: str = Field(description="输出目录")
+    parallel: bool = Field(default=True, description="是否并行处理")
+    adjust_rows: bool = Field(default=True, description="是否自适应调整行分割")
+    structure_suffix: str = Field(default="_structure.json", description="结构文件后缀")
+    image_suffix: str = Field(default="_with_lines.png", description="输出图片文件后缀")
+
+
+class BatchProcessResult(BaseModel):
+    """单个文件处理结果"""
+    success: bool
+    json_path: str
+    image_path: str
+    structure_path: Optional[str] = None
+    filename: str
+    rows: Optional[int] = None
+    cols: Optional[int] = None
+    error: Optional[str] = None
+
+
+class BatchProcessResponse(BaseModel):
+    """批量处理响应"""
+    success: bool
+    total: int
+    processed: int
+    failed: int
+    results: List[Dict[str, Any]]
+    message: Optional[str] = None
+
+
+class DrawBatchRequest(BaseModel):
+    """批量绘图请求"""
+    results: List[Dict[str, Any]] = Field(description="处理结果列表")
+    line_width: int = Field(default=2, description="线条宽度")
+    line_color: List[int] = Field(default=[0, 0, 0], description="线条颜色 RGB")
+
+
+class DrawBatchResponse(BaseModel):
+    """批量绘图响应"""
+    success: bool
+    total: int
+    drawn: int
+    results: List[Dict[str, Any]]
+    message: Optional[str] = None
+
+
+@router.post("/process", response_model=BatchProcessResponse)
+async def batch_process(request: BatchProcessRequest):
+    """
+    批量处理文件
+    
+    将指定模板的结构应用到多个文件:
+    - 复用列边界(竖线)
+    - 自适应调整行分割(横线)
+    - 支持并行处理
+    - 自动处理不同图片尺寸的坐标映射
+    """
+    try:
+        # 验证输出目录
+        output_path = Path(request.output_dir)
+        if not output_path.parent.exists():
+            raise HTTPException(
+                status_code=400, 
+                detail=f"输出目录的父目录不存在: {output_path.parent}"
+            )
+        
+        # 转换文件对格式
+        file_pairs = [
+            {
+                'json_path': pair.json_path,
+                'image_path': pair.image_path
+            }
+            for pair in request.file_pairs
+        ]
+        
+        # 创建处理器
+        processor = BatchProcessor(max_workers=4)
+        
+        # 执行批量处理
+        summary = processor.process_batch_from_data_source(
+            template_name=request.template_name,
+            file_pairs=file_pairs,
+            output_dir=request.output_dir,
+            parallel=request.parallel,
+            adjust_rows=request.adjust_rows,
+            structure_suffix=request.structure_suffix,
+            image_suffix=request.image_suffix
+        )
+        
+        return BatchProcessResponse(
+            success=True,
+            total=summary['total'],
+            processed=summary['success'],
+            failed=summary['failed'],
+            results=summary['results'],
+            message=f"批量处理完成: 成功 {summary['success']}/{summary['total']}"
+        )
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.exception(f"批量处理失败: {e}")
+        raise HTTPException(status_code=500, detail=f"批量处理失败: {e}")
+
+
+@router.post("/draw", response_model=DrawBatchResponse)
+async def batch_draw(request: DrawBatchRequest):
+    """
+    批量绘制表格线到图片上
+    
+    根据处理结果中的结构文件,在原图上绘制表格线
+    """
+    try:
+        processor = BatchProcessor()
+        
+        # 确保颜色是 RGB 三元组
+        line_color = tuple(request.line_color[:3]) if len(request.line_color) >= 3 else (0, 0, 0)
+        
+        draw_results = processor.draw_batch_images(
+            results=request.results,
+            line_width=request.line_width,
+            line_color=line_color  # type: ignore
+        )
+        
+        success_count = sum(1 for r in draw_results if r.get('success'))
+        
+        return DrawBatchResponse(
+            success=True,
+            total=len(request.results),
+            drawn=success_count,
+            results=draw_results,
+            message=f"绘制完成: {success_count}/{len(request.results)} 张图片"
+        )
+        
+    except Exception as e:
+        logger.exception(f"批量绘图失败: {e}")
+        raise HTTPException(status_code=500, detail=f"批量绘图失败: {e}")
+
+
+@router.get("/health")
+async def batch_health_check():
+    """批量处理服务健康检查"""
+    return {
+        "status": "ok",
+        "service": "batch-processor"
+    }

+ 143 - 0
table_line_generator/backend/api/config.py

@@ -0,0 +1,143 @@
+"""
+配置 API 路由
+提供数据源配置的读取和管理
+"""
+
+from fastapi import APIRouter, HTTPException
+from pathlib import Path
+from typing import List, Dict, Any
+import yaml
+from loguru import logger
+
+from models.schemas import HealthResponse
+
+
+router = APIRouter(prefix="/api/config", tags=["config"])
+
+
+def _get_config_dir() -> Path:
+    """获取配置目录路径"""
+    return Path(__file__).parent.parent / "config" / "data_sources"
+
+
+def _resolve_template(template: str, variables: Dict[str, str]) -> str:
+    """解析模板变量"""
+    resolved = template
+    for key, value in variables.items():
+        resolved = resolved.replace(f"{{{{{key}}}}}", value)
+        resolved = resolved.replace(f"{{{{ {key} }}}}", value)
+    return resolved
+
+
+def _parse_data_source(config: Dict[str, Any]) -> Dict[str, Any]:
+    """解析数据源配置,展开模板变量"""
+    if 'data_source' not in config:
+        return config
+    
+    ds = config['data_source']
+    
+    # 准备变量字典
+    variables = {
+        'name': ds.get('name', ''),
+        'base_dir': ds.get('base_dir', '')
+    }
+    
+    # 解析路径模板
+    if 'json_dir' in ds:
+        ds['json_dir'] = _resolve_template(ds['json_dir'], variables)
+    if 'image_dir' in ds:
+        ds['image_dir'] = _resolve_template(ds['image_dir'], variables)
+    if 'output' in ds and 'directory' in ds['output']:
+        ds['output']['directory'] = _resolve_template(ds['output']['directory'], variables)
+    
+    return ds
+
+
+@router.get("/data-sources")
+async def list_data_sources() -> Dict[str, List[Dict[str, Any]]]:
+    """
+    列出所有预定义数据源
+    
+    Returns:
+        包含 sources 列表的字典,每个数据源包含 filename 字段(不含 .yaml 扩展名)
+    """
+    config_dir = _get_config_dir()
+    sources = []
+    
+    try:
+        if not config_dir.exists():
+            logger.warning(f"配置目录不存在: {config_dir}")
+            return {"sources": []}
+        
+        for yaml_file in sorted(config_dir.glob("*.yaml")):
+            if yaml_file.name == "README.md":
+                continue
+                
+            try:
+                with open(yaml_file, 'r', encoding='utf-8') as f:
+                    config = yaml.safe_load(f)
+                
+                if config and 'data_source' in config:
+                    parsed = _parse_data_source(config)
+                    # 添加文件名作为唯一标识符(不含 .yaml 扩展名)
+                    filename = yaml_file.stem  # 获取不含扩展名的文件名
+                    parsed['filename'] = filename
+                    sources.append(parsed)
+                    logger.debug(f"加载数据源配置: {yaml_file.name} (filename: {filename})")
+            except Exception as e:
+                logger.error(f"加载配置文件失败 {yaml_file}: {e}")
+                continue
+        
+        logger.info(f"成功加载 {len(sources)} 个数据源配置")
+        return {"sources": sources}
+        
+    except Exception as e:
+        logger.exception(f"列出数据源失败: {e}")
+        raise HTTPException(status_code=500, detail=f"列出数据源失败: {e}")
+
+
+@router.get("/data-sources/{filename}")
+async def get_data_source(filename: str) -> Dict[str, Any]:
+    """
+    获取指定数据源配置
+    
+    Args:
+        filename: 数据源配置文件名(不含 .yaml 扩展名)
+        
+    Returns:
+        数据源配置
+    """
+    config_dir = _get_config_dir()
+    
+    try:
+        # 直接使用文件名查找配置文件
+        yaml_file = config_dir / f"{filename}.yaml"
+        
+        if not yaml_file.exists():
+            raise HTTPException(status_code=404, detail=f"数据源配置文件不存在: {filename}.yaml")
+        
+        with open(yaml_file, 'r', encoding='utf-8') as f:
+            config = yaml.safe_load(f)
+        
+        if config and 'data_source' in config:
+            parsed = _parse_data_source(config)
+            # 添加文件名作为唯一标识符
+            parsed['filename'] = filename
+            return parsed
+        else:
+            raise HTTPException(status_code=404, detail=f"配置文件格式错误: {filename}.yaml")
+        
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.exception(f"获取数据源失败: {e}")
+        raise HTTPException(status_code=500, detail=f"获取数据源失败: {e}")
+
+
+@router.get("/health", response_model=HealthResponse)
+async def config_health_check():
+    """配置服务健康检查"""
+    return HealthResponse(
+        status="ok",
+        service="config"
+    )

+ 212 - 0
table_line_generator/backend/api/directory.py

@@ -0,0 +1,212 @@
+"""
+目录浏览 API 路由
+提供文件系统浏览和图片+JSON文件扫描功能
+"""
+import os
+import re
+from pathlib import Path
+from fastapi import APIRouter, HTTPException, Query
+from pydantic import BaseModel
+from typing import List, Optional, Dict
+
+from models.schemas import HomeDirectoryResponse
+
+
+router = APIRouter(prefix="/api/directories", tags=["directories"])
+
+
+class DirectoryItem(BaseModel):
+    """目录项"""
+    name: str
+    path: str
+    is_dir: bool
+    size: Optional[int] = None
+
+
+class BrowseResponse(BaseModel):
+    """浏览响应"""
+    current_path: str
+    parent_path: Optional[str]
+    items: List[DirectoryItem]
+
+
+class FilePair(BaseModel):
+    """图片+JSON文件对"""
+    index: int
+    image_path: str
+    image_name: str
+    json_path: str
+    json_name: str
+    has_structure: bool = False  # 是否已有结构文件
+    structure_path: Optional[str] = None
+
+
+class ScanRequest(BaseModel):
+    """扫描请求"""
+    image_dir: str
+    json_dir: str
+    image_pattern: Optional[str] = r".*\.(png|jpg|jpeg)$"
+    json_pattern: Optional[str] = r".*\.json$"
+    output_dir: Optional[str] = None  # 用于检查已有结构文件
+
+
+class ScanResponse(BaseModel):
+    """扫描响应"""
+    total: int
+    pairs: List[FilePair]
+    labeled_count: int  # 已标注数量
+
+
+@router.get("/browse", response_model=BrowseResponse)
+async def browse_directory(
+    path: str = Query(default="~", description="要浏览的目录路径"),
+    show_hidden: bool = Query(default=False, description="是否显示隐藏文件")
+):
+    """
+    浏览目录结构
+    
+    Args:
+        path: 目录路径,支持 ~ 表示用户主目录
+        show_hidden: 是否显示隐藏文件(以.开头的文件)
+    """
+    # 展开用户目录
+    if path.startswith("~"):
+        path = os.path.expanduser(path)
+    
+    target_path = Path(path).resolve()
+    
+    if not target_path.exists():
+        raise HTTPException(status_code=404, detail=f"目录不存在: {path}")
+    
+    if not target_path.is_dir():
+        raise HTTPException(status_code=400, detail=f"不是目录: {path}")
+    
+    items = []
+    try:
+        for item in sorted(target_path.iterdir()):
+            # 跳过隐藏文件
+            if not show_hidden and item.name.startswith('.'):
+                continue
+            
+            try:
+                is_dir = item.is_dir()
+                size = None
+                if not is_dir:
+                    try:
+                        size = item.stat().st_size
+                    except:
+                        pass
+                
+                items.append(DirectoryItem(
+                    name=item.name,
+                    path=str(item),
+                    is_dir=is_dir,
+                    size=size
+                ))
+            except PermissionError:
+                continue
+    except PermissionError:
+        raise HTTPException(status_code=403, detail=f"无权限访问目录: {path}")
+    
+    # 目录排在前面
+    items.sort(key=lambda x: (not x.is_dir, x.name.lower()))
+    
+    # 计算父目录
+    parent = target_path.parent
+    parent_path = str(parent) if parent != target_path else None
+    
+    return BrowseResponse(
+        current_path=str(target_path),
+        parent_path=parent_path,
+        items=items
+    )
+
+
+@router.post("/scan", response_model=ScanResponse)
+async def scan_directory(request: ScanRequest):
+    """
+    扫描目录下的图片+JSON文件对
+    
+    扫描 image_dir 中匹配 image_pattern 的图片,
+    然后在 json_dir 中查找对应的 JSON 文件。
+    """
+    image_dir = Path(request.image_dir)
+    json_dir = Path(request.json_dir)
+    
+    if not image_dir.exists():
+        raise HTTPException(status_code=404, detail=f"图片目录不存在: {request.image_dir}")
+    
+    if not json_dir.exists():
+        raise HTTPException(status_code=404, detail=f"JSON目录不存在: {request.json_dir}")
+    
+    # 编译正则表达式
+    try:
+        image_re = re.compile(request.image_pattern or r".*\.(png|jpg|jpeg)$", re.IGNORECASE)
+        json_re = re.compile(request.json_pattern or r".*\.json$", re.IGNORECASE)
+    except re.error as e:
+        raise HTTPException(status_code=400, detail=f"无效的正则表达式: {e}")
+    
+    # 扫描图片文件
+    image_files = []
+    for f in image_dir.iterdir():
+        if f.is_file() and image_re.match(f.name):
+            image_files.append(f)
+    
+    # 按名称排序
+    image_files.sort(key=lambda x: x.name)
+    
+    # 构建文件对
+    pairs = []
+    labeled_count = 0
+    output_dir = Path(request.output_dir) if request.output_dir else None
+    
+    for idx, image_file in enumerate(image_files):
+        # 查找对应的 JSON 文件
+        base_name = image_file.stem
+        json_file = None
+        
+        # 尝试多种匹配方式
+        for jf in json_dir.iterdir():
+            if jf.is_file() and json_re.match(jf.name):
+                # 完全匹配
+                if jf.stem == base_name:
+                    json_file = jf
+                    break
+                # 前缀匹配
+                if jf.stem.startswith(base_name) or base_name.startswith(jf.stem):
+                    json_file = jf
+                    break
+        
+        if json_file:
+            # 检查是否已有结构文件
+            has_structure = False
+            structure_path = None
+            if output_dir and output_dir.exists():
+                structure_file = output_dir / f"{base_name}_structure.json"
+                if structure_file.exists():
+                    has_structure = True
+                    structure_path = str(structure_file)
+                    labeled_count += 1
+            
+            pairs.append(FilePair(
+                index=idx + 1,
+                image_path=str(image_file),
+                image_name=image_file.name,
+                json_path=str(json_file),
+                json_name=json_file.name,
+                has_structure=has_structure,
+                structure_path=structure_path
+            ))
+    
+    return ScanResponse(
+        total=len(pairs),
+        pairs=pairs,
+        labeled_count=labeled_count
+    )
+
+
+@router.get("/home", response_model=HomeDirectoryResponse)
+async def get_home_directory():
+    """获取用户主目录"""
+    home = os.path.expanduser("~")
+    return HomeDirectoryResponse(path=home)

+ 207 - 0
table_line_generator/backend/api/editor.py

@@ -0,0 +1,207 @@
+"""
+编辑器 API 路由
+"""
+
+import json
+from fastapi import APIRouter, UploadFile, File, HTTPException
+from loguru import logger
+
+from models.schemas import (
+    UploadResponse,
+    AnalyzeRequest,
+    AnalyzeResponse,
+    SaveRequest,
+    SaveResponse,
+    TableStructure,
+    ImageSize,
+    LoadByPathRequest,
+    HealthResponse,
+)
+from services.editor_service import EditorService
+
+
+router = APIRouter(prefix="/api", tags=["editor"])
+
+
+@router.post("/upload", response_model=UploadResponse)
+async def upload_files(
+    json_file: UploadFile = File(..., description="OCR JSON 文件"),
+    image_file: UploadFile = File(..., description="图片文件")
+):
+    """
+    上传 OCR JSON 和图片文件,返回分析结果
+    
+    - 自动检测 OCR 格式(PPStructure / MinerU)
+    - 图片超过 4096x4096 会自动缩放
+    - 返回 base64 编码的图片和表格结构
+    """
+    try:
+        # 验证文件类型
+        if not json_file.filename.endswith('.json'):
+            raise HTTPException(status_code=400, detail="请上传 JSON 文件")
+        
+        allowed_image_types = {'.png', '.jpg', '.jpeg', '.bmp', '.tiff'}
+        image_ext = '.' + image_file.filename.split('.')[-1].lower()
+        if image_ext not in allowed_image_types:
+            raise HTTPException(status_code=400, detail=f"不支持的图片格式: {image_ext}")
+        
+        # 读取文件内容
+        json_content = await json_file.read()
+        image_content = await image_file.read()
+        
+        logger.info(f"收到上传: JSON={json_file.filename}, Image={image_file.filename}")
+        
+        # 处理上传,从上传的文件名中提取建议的文件名
+        result = EditorService.process_upload(
+            json_content, 
+            image_content,
+            json_path=json_file.filename
+        )
+        
+        return UploadResponse(
+            success=True,
+            image_base64=result['image_base64'],
+            structure=TableStructure(**result['structure']),
+            image_size=ImageSize(**result['image_size']),
+            scale_factor=result['scale_factor'],
+            ocr_data=result['ocr_data'],
+            suggested_filename=result.get('suggested_filename'),
+            message="上传成功"
+        )
+        
+    except ValueError as e:
+        logger.error(f"上传处理失败: {e}")
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.exception(f"上传处理异常: {e}")
+        raise HTTPException(status_code=500, detail=f"服务器错误: {e}")
+
+
+@router.post("/analyze", response_model=AnalyzeResponse)
+async def analyze_structure(request: AnalyzeRequest):
+    """
+    重新分析表格结构(使用不同参数)
+    """
+    try:
+        structure = EditorService.analyze_structure(
+            request.ocr_data,
+            request.params.model_dump()
+        )
+        
+        return AnalyzeResponse(
+            success=True,
+            structure=TableStructure(**structure),
+            message="分析完成"
+        )
+        
+    except Exception as e:
+        logger.exception(f"分析失败: {e}")
+        raise HTTPException(status_code=500, detail=f"分析失败: {e}")
+
+
+@router.post("/save", response_model=SaveResponse)
+async def save_result(request: SaveRequest):
+    """
+    保存结果(结构 JSON + 可选的带线图片)
+    """
+    try:
+        paths = EditorService.save_result(
+            structure=request.structure.model_dump(),
+            image_base64=request.image_base64,
+            output_dir=request.output_dir,
+            filename=request.filename,
+            image_filename=request.image_filename,
+            overwrite_mode=request.overwrite_mode,
+            structure_suffix=request.structure_suffix,
+            image_suffix=request.image_suffix,
+            line_width=request.line_width,
+            line_color=(request.line_color[0], request.line_color[1], request.line_color[2])
+        )
+        
+        return SaveResponse(
+            success=True,
+            structure_path=paths['structure_path'],
+            image_path=paths['image_path'],
+            message="保存成功"
+        )
+        
+    except Exception as e:
+        logger.exception(f"保存失败: {e}")
+        raise HTTPException(status_code=500, detail=f"保存失败: {e}")
+
+
+@router.get("/health", response_model=HealthResponse)
+async def health_check():
+    """健康检查"""
+    return HealthResponse(status="ok", service="table-line-editor")
+
+
+@router.post("/load-by-path", response_model=UploadResponse)
+async def load_by_path(request: LoadByPathRequest):
+    """按路径加载数据(优先加载标注结果)"""
+    from pathlib import Path
+    
+    image_path = Path(request.image_path)
+    json_path = Path(request.json_path)
+    output_dir = Path(request.output_dir) if request.output_dir else None
+    
+    try:
+        if not image_path.exists():
+            raise HTTPException(status_code=404, detail=f"图片文件不存在: {image_path}")
+        
+        if not json_path.exists():
+            raise HTTPException(status_code=404, detail=f"JSON 文件不存在: {json_path}")
+        
+        # 使用来自前端的后缀,不再读配置文件
+        base_name = json_path.stem
+        # 确保 structure_suffix 以 .json 结尾
+        structure_suffix = request.structure_suffix
+        if not structure_suffix.endswith('.json'):
+            structure_suffix = structure_suffix + '.json'
+        structure_path = output_dir / f"{base_name}{structure_suffix}" if output_dir else None
+        
+        with open(json_path, 'rb') as f:
+            json_content = f.read()
+        with open(image_path, 'rb') as f:
+            image_content = f.read()
+        
+        # 如果存在标注结果,优先加载
+        if structure_path and structure_path.exists():
+            logger.info(f"找到标注结果: {structure_path}")
+            with open(structure_path, 'r', encoding='utf-8') as f:
+                structure_data = json.load(f)
+            
+            result = EditorService.process_upload(
+                json_content, 
+                image_content, 
+                json_path=str(json_path),
+                annotated_structure=structure_data
+            )
+        else:
+            logger.info(f"未找到标注结果,使用原始OCR数据")
+            result = EditorService.process_upload(
+                json_content, 
+                image_content, 
+                json_path=str(json_path)
+            )
+        
+        return UploadResponse(
+            success=True,
+            image_base64=result['image_base64'],
+            structure=TableStructure(**result['structure']),
+            image_size=ImageSize(**result['image_size']),
+            scale_factor=result['scale_factor'],
+            ocr_data=result['ocr_data'],
+            suggested_filename=result.get('suggested_filename'),
+            message="加载成功"
+        )
+        
+    except HTTPException:
+        raise
+    except ValueError as e:
+        logger.error(f"加载处理失败: {e}")
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.exception(f"加载处理异常: {e}")
+        raise HTTPException(status_code=500, detail=f"服务器错误: {e}")
+

+ 157 - 0
table_line_generator/backend/api/template.py

@@ -0,0 +1,157 @@
+"""
+模板 API 路由
+"""
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+from typing import Optional, List, Dict
+
+from services.template_service import get_template_service
+from models.schemas import (
+    CreateTemplateResponse,
+    GetTemplateResponse,
+    TemplateApplyResponse,
+    DeleteTemplateResponse,
+)
+
+
+router = APIRouter(prefix="/api/templates", tags=["templates"])
+
+
+class CreateTemplateRequest(BaseModel):
+    """创建模板请求"""
+    name: str
+    structure: Dict
+    image_size: Dict
+    description: Optional[str] = ""
+
+
+class PreviewApplyRequest(BaseModel):
+    """预览应用请求"""
+    target_image_size: Dict
+    target_table_bbox: Optional[List] = None  # 目标页面的 table_bbox [x1, y1, x2, y2]
+    mode: Optional[str] = "relative"  # "relative" 或 "absolute"
+
+
+class ApplyTemplateRequest(BaseModel):
+    """应用模板请求"""
+    target_image_size: Dict
+    target_table_bbox: Optional[List] = None  # 目标页面的 table_bbox [x1, y1, x2, y2]
+    mode: Optional[str] = "relative"
+
+
+class TemplateInfo(BaseModel):
+    """模板信息"""
+    name: str
+    description: Optional[str] = ""
+    created_at: Optional[str] = ""
+    stats: Optional[Dict] = {}
+    source_image_size: Optional[Dict] = {}
+
+
+class TemplateListResponse(BaseModel):
+    """模板列表响应"""
+    templates: List[Dict]
+    total: int
+
+
+@router.get("", response_model=TemplateListResponse)
+async def list_templates():
+    """列出所有模板"""
+    service = get_template_service()
+    templates = service.list_templates()
+    return TemplateListResponse(
+        templates=templates,
+        total=len(templates)
+    )
+
+
+@router.post("", response_model=CreateTemplateResponse)
+async def create_template(request: CreateTemplateRequest):
+    """从当前结构创建模板"""
+    service = get_template_service()
+    try:
+        result = service.create_template(
+            name=request.name,
+            structure=request.structure,
+            image_size=request.image_size,
+            description=request.description or ""
+        )
+        return CreateTemplateResponse(
+            success=True,
+            data=result,
+            message="模板创建成功"
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"创建模板失败: {str(e)}")
+
+
+@router.get("/{name}", response_model=GetTemplateResponse)
+async def get_template(name: str):
+    """获取模板详情"""
+    service = get_template_service()
+    template = service.get_template(name)
+    if not template:
+        raise HTTPException(status_code=404, detail=f"模板 '{name}' 不存在")
+    return GetTemplateResponse(
+        success=True,
+        data=template,
+        message="获取成功"
+    )
+
+
+@router.delete("/{name}", response_model=DeleteTemplateResponse)
+async def delete_template(name: str):
+    """删除模板"""
+    service = get_template_service()
+    if service.delete_template(name):
+        return DeleteTemplateResponse(
+            success=True,
+            message=f"模板 '{name}' 已删除"
+        )
+    raise HTTPException(status_code=404, detail=f"模板 '{name}' 不存在")
+
+
+@router.post("/{name}/preview", response_model=TemplateApplyResponse)
+async def preview_apply(name: str, request: PreviewApplyRequest):
+    """预览模板应用效果"""
+    service = get_template_service()
+    try:
+        structure = service.preview_apply(
+            template_name=name,
+            target_image_size=request.target_image_size,
+            target_table_bbox=request.target_table_bbox,
+            mode=request.mode or "relative"
+        )
+        return TemplateApplyResponse(
+            success=True,
+            data=structure,
+            message="预览成功"
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=404, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"预览失败: {str(e)}")
+
+
+@router.post("/{name}/apply", response_model=TemplateApplyResponse)
+async def apply_template(name: str, request: ApplyTemplateRequest):
+    """确认应用模板"""
+    service = get_template_service()
+    try:
+        structure = service.apply_template(
+            template_name=name,
+            target_image_size=request.target_image_size,
+            target_table_bbox=request.target_table_bbox,
+            mode=request.mode or "relative"
+        )
+        return TemplateApplyResponse(
+            success=True,
+            data=structure,
+            message="应用成功"
+        )
+    except ValueError as e:
+        raise HTTPException(status_code=404, detail=str(e))
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"应用模板失败: {str(e)}")

+ 21 - 0
table_line_generator/backend/config/data_sources/B用户_扫描流水.yaml

@@ -0,0 +1,21 @@
+data_source:
+  name: "B用户_扫描流水"
+  description: "B用户_扫描流水 MinerU + PaddleOCR 坐标"
+  json_format: "mineru"
+  base_dir: "/Users/zhch158/workspace/data/流水分析"
+  json_dir: "{{name}}/mineru_vllm_results_cell_bbox"
+  image_dir: "{{name}}/mineru_vllm_results/{{name}}"
+  json_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.json"
+  image_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.png"
+  sort_key: "page"
+  output:
+    directory: "{{base_dir}}/{{ name }}.wiredtable"
+    structure_suffix: "_structure.json"
+    image_suffix: ".png"
+  
+  # 批量处理配置
+  batch:
+    enabled: true
+    template_name: null  # 可选:指定默认模板
+    auto_apply: false    # 是否自动应用模板
+    parallel_workers: 4  # 并行处理线程数

+ 42 - 0
table_line_generator/backend/config/data_sources/README.md

@@ -0,0 +1,42 @@
+# 数据源配置说明
+
+本目录存放 Table Line Editor Vue 的数据源配置文件。
+
+## 配置文件格式
+
+每个数据源一个 YAML 文件,格式如下:
+
+```yaml
+data_source:
+  name: "数据源名称"
+  description: "数据源描述"
+  json_format: "mineru"  # 或 "ppstructure"
+  base_dir: "/path/to/base"
+  json_dir: "{{name}}/ocr_results"
+  image_dir: "{{name}}/images"
+  json_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.json"
+  image_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.png"
+  sort_key: "page"
+  output:
+    directory: "{{base_dir}}/{{ name }}.wiredtable"
+    structure_suffix: "_structure.json"
+    image_suffix: ".png"
+  
+  batch:
+    enabled: true
+    template_name: null  # 可选:默认模板名称
+    auto_apply: false
+    parallel_workers: 4
+```
+
+## 模板变量
+
+支持以下模板变量:
+- `{{name}}`: 数据源名称
+- `{{base_dir}}`: 基础目录路径
+
+## 使用方式
+
+1. **手动创建配置文件**:参考示例文件创建新的数据源配置
+2. **通过 API 读取**:前端通过 `/api/config/data-sources` 获取所有数据源
+3. **应用配置**:选择数据源后,自动设置扫描目录和输出目录

+ 21 - 0
table_line_generator/backend/config/data_sources/康强_北京农村商业银行_mineru.yaml

@@ -0,0 +1,21 @@
+data_source:
+  name: "康强_北京农村商业银行"
+  description: "康强_北京农村商业银行 MinerU + PaddleOCR 坐标"
+  json_format: "mineru"
+  base_dir: "/Users/zhch158/workspace/data/流水分析"
+  json_dir: "{{name}}/mineru_vllm_results_cell_bbox"
+  image_dir: "{{name}}/mineru_vllm_results/{{name}}"
+  json_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.json"
+  image_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.png"
+  sort_key: "page"
+  output:
+    directory: "{{base_dir}}/{{ name }}.wiredtable"
+    structure_suffix: "_structure.json"
+    image_suffix: ".png"
+  
+  # 批量处理配置
+  batch:
+    enabled: true
+    template_name: null
+    auto_apply: false
+    parallel_workers: 4

+ 21 - 0
table_line_generator/backend/config/data_sources/康强_北京农村商业银行_ppstructure.yaml

@@ -0,0 +1,21 @@
+data_source:
+  name: "康强_北京农村商业银行"
+  description: "康强_北京农村商业银行 PPStructure 坐标"
+  json_format: "ppstructure"
+  base_dir: "/Users/zhch158/workspace/data/流水分析"
+  json_dir: "{{name}}/ppstructurev3_client_results"
+  image_dir: "{{name}}/ppstructurev3_client_results/{{name}}"
+  json_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.json"
+  image_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.png"
+  sort_key: "page"
+  output:
+    directory: "{{base_dir}}/{{ name }}.clustered_wiredtable"
+    structure_suffix: "_structure.json"
+    image_suffix: ".png"
+  
+  # 批量处理配置
+  batch:
+    enabled: true
+    template_name: null
+    auto_apply: false
+    parallel_workers: 4

+ 21 - 0
table_line_generator/backend/config/data_sources/康强_北京农村商业银行_yusys_ocr.yaml

@@ -0,0 +1,21 @@
+data_source:
+  name: "康强_北京农村商业银行"
+  description: "康强_北京农村商业银行 yusys_ocr 坐标"
+  json_format: "mineru"
+  base_dir: "/Users/zhch158/workspace/data/流水分析"
+  json_dir: "{{name}}/bank_statement_yusys_v2"
+  image_dir: "{{name}}/mineru_vllm_results/{{name}}"
+  json_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.json"
+  image_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.png"
+  sort_key: "page"
+  output:
+    directory: "{{base_dir}}/{{ name }}.wiredtable"
+    structure_suffix: "_structure.json"
+    image_suffix: ".png"
+  
+  # 批量处理配置
+  batch:
+    enabled: true
+    template_name: null
+    auto_apply: false
+    parallel_workers: 4

+ 21 - 0
table_line_generator/backend/config/data_sources/施博深.yaml

@@ -0,0 +1,21 @@
+data_source:
+  name: "施博深"
+  description: "施博深 YUSYS统一OCR框架"
+  json_format: "mineru"
+  base_dir: "/Users/zhch158/workspace/data/流水分析"
+  json_dir: "{{name}}/bank_statement_yusys_v2"
+  image_dir: "{{name}}/mineru_vllm_results/{{name}}"
+  json_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.json"
+  image_pattern: "{{ name }}_page_(?P<page>\\d{3})\\.png"
+  sort_key: "page"
+  output:
+    directory: "{{base_dir}}/{{ name }}.wiredtable"
+    structure_suffix: "_structure.json"
+    image_suffix: ".png"
+  
+  # 批量处理配置
+  batch:
+    enabled: true
+    template_name: null  # 可选:指定默认模板
+    auto_apply: false    # 是否自动应用模板
+    parallel_workers: 4  # 并行处理线程数

+ 92 - 0
table_line_generator/backend/main.py

@@ -0,0 +1,92 @@
+"""
+表格线编辑器后端服务
+FastAPI 应用入口
+"""
+
+import sys
+from pathlib import Path
+
+# 添加 ocr_platform 根目录到 Python 路径(必须在导入其他模块之前)
+_file_path = Path(__file__).resolve()
+ocr_platform_root = _file_path.parents[2]  # main.py -> backend -> table_line_generator -> ocr_platform
+if str(ocr_platform_root) not in sys.path:
+    sys.path.insert(0, str(ocr_platform_root))
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from loguru import logger
+
+# 配置日志
+logger.remove()
+logger.add(
+    sys.stdout,
+    level="INFO",
+    format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>"
+)
+
+# 创建应用
+app = FastAPI(
+    title="表格线编辑器 API",
+    description="无线表格智能标注工具后端服务",
+    version="0.1.0",
+    docs_url="/docs",
+    redoc_url="/redoc"
+)
+
+# CORS 配置
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=[
+        "http://localhost:5173",  # Vite 开发服务器
+        "http://localhost:3000",  # 备用端口
+        "http://127.0.0.1:5173",
+        "http://127.0.0.1:3000",
+    ],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# 注册路由
+from api.editor import router as editor_router
+from api.template import router as template_router
+from api.directory import router as directory_router
+from api.config import router as config_router
+from api.batch import router as batch_router
+
+app.include_router(editor_router)
+app.include_router(template_router)
+app.include_router(directory_router)
+app.include_router(config_router)
+app.include_router(batch_router)
+
+
+@app.get("/")
+async def root():
+    """根路径"""
+    return {
+        "service": "table-line-editor",
+        "version": "0.1.0",
+        "docs": "/docs"
+    }
+
+
+@app.on_event("startup")
+async def startup_event():
+    logger.info("🚀 表格线编辑器后端服务启动")
+    logger.info("📚 API 文档: http://localhost:8000/docs")
+
+
+@app.on_event("shutdown")
+async def shutdown_event():
+    logger.info("👋 表格线编辑器后端服务关闭")
+
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        "main:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True
+    )

+ 27 - 0
table_line_generator/backend/models/__init__.py

@@ -0,0 +1,27 @@
+"""
+Backend models package
+"""
+
+from .schemas import (
+    TableStructure,
+    ImageSize,
+    UploadResponse,
+    AnalyzeParams,
+    AnalyzeRequest,
+    AnalyzeResponse,
+    SaveRequest,
+    SaveResponse,
+    ErrorResponse,
+)
+
+__all__ = [
+    'TableStructure',
+    'ImageSize',
+    'UploadResponse',
+    'AnalyzeParams',
+    'AnalyzeRequest',
+    'AnalyzeResponse',
+    'SaveRequest',
+    'SaveResponse',
+    'ErrorResponse',
+]

+ 188 - 0
table_line_generator/backend/models/schemas.py

@@ -0,0 +1,188 @@
+"""
+Pydantic 数据模型定义
+"""
+
+from pydantic import BaseModel, Field
+from typing import List, Dict, Optional, Any
+
+
+class TableStructure(BaseModel):
+    """表格结构"""
+    horizontal_lines: List[int] = Field(default_factory=list, description="横线Y坐标列表")
+    vertical_lines: List[int] = Field(default_factory=list, description="竖线X坐标列表")
+    table_bbox: Optional[List[int]] = Field(default=None, description="表格边界框 [x1, y1, x2, y2]")
+    row_height: Optional[int] = Field(default=None, description="标准行高")
+    col_widths: Optional[List[int]] = Field(default=None, description="列宽列表")
+    total_rows: Optional[int] = Field(default=None, description="总行数")
+    total_cols: Optional[int] = Field(default=None, description="总列数")
+    mode: Optional[str] = Field(default="hybrid", description="分析模式")
+    modified_h_lines: List[int] = Field(default_factory=list, description="被修改的横线索引")
+    modified_v_lines: List[int] = Field(default_factory=list, description="被修改的竖线索引")
+    image_rotation_angle: float = Field(default=0.0, description="图片旋转角度")
+    skew_angle: float = Field(default=0.0, description="倾斜角度")
+    is_skew_corrected: bool = Field(default=False, description="是否已校正倾斜")
+
+
+class ImageSize(BaseModel):
+    """图片尺寸"""
+    width: int
+    height: int
+
+
+class UploadResponse(BaseModel):
+    """上传响应"""
+    success: bool
+    image_base64: str = Field(description="Base64 编码的图片")
+    structure: TableStructure = Field(description="表格结构")
+    image_size: ImageSize = Field(description="图片尺寸")
+    scale_factor: float = Field(default=1.0, description="缩放比例 (1.0 表示未缩放)")
+    ocr_data: Optional[Dict[str, Any]] = Field(default=None, description="原始 OCR 数据 (用于重新分析)")
+    suggested_filename: Optional[str] = Field(default=None, description="建议的文件名")
+    message: Optional[str] = None
+
+
+class AnalyzeParams(BaseModel):
+    """分析参数"""
+    y_tolerance: int = Field(default=5, ge=1, le=50, description="Y轴聚类容差")
+    x_tolerance: int = Field(default=10, ge=1, le=100, description="X轴聚类容差")
+    min_row_height: int = Field(default=20, ge=5, le=100, description="最小行高")
+    method: str = Field(default="auto", description="分析方法: auto/cluster/mineru")
+
+
+class AnalyzeRequest(BaseModel):
+    """重新分析请求"""
+    ocr_data: Dict[str, Any] = Field(description="OCR 数据")
+    params: AnalyzeParams = Field(default_factory=AnalyzeParams)
+
+
+class AnalyzeResponse(BaseModel):
+    """分析响应"""
+    success: bool
+    structure: TableStructure
+    message: Optional[str] = None
+
+
+class SaveRequest(BaseModel):
+    """保存请求"""
+    structure: TableStructure = Field(description="表格结构")
+    image_base64: Optional[str] = Field(default=None, description="Base64 编码的原始图片(可选,saveImage=false时不需要)")
+    output_dir: str = Field(description="输出目录路径")
+    filename: str = Field(description="结构文件名 (不含扩展名)")
+    image_filename: Optional[str] = Field(default=None, description="图片文件名 (不含扩展名)")
+    overwrite_mode: str = Field(default="overwrite", description="覆盖策略: overwrite/skip/new")
+    structure_suffix: str = Field(default="_structure", description="结构文件后缀(不含.json扩展名)")
+    image_suffix: str = Field(default="", description="图片文件后缀(不含.png扩展名)")
+    line_width: int = Field(default=2, description="线条宽度")
+    line_color: List[int] = Field(default=[0, 0, 0], description="线条颜色 RGB")
+
+
+class SaveResponse(BaseModel):
+    """保存响应"""
+    success: bool
+    structure_path: Optional[str] = Field(default=None, description="结构文件路径")
+    image_path: Optional[str] = Field(default=None, description="图片文件路径(可选)")
+    message: Optional[str] = None
+
+
+class ErrorResponse(BaseModel):
+    """错误响应"""
+    success: bool = False
+    error: str
+    detail: Optional[str] = None
+
+
+class LoadByPathRequest(BaseModel):
+    """按路径加载请求"""
+    json_path: str = Field(description="JSON 文件路径")
+    image_path: str = Field(description="图片文件路径")
+    output_dir: Optional[str] = Field(default=None, description="输出目录路径(可选,用于查找已保存的标注结构)")
+    structure_suffix: str = Field(default="_structure", description="结构文件后缀(不含.json)")
+    image_suffix: str = Field(default="", description="图片文件后缀(不含.png)")
+
+
+class PreviewApplyRequest(BaseModel):
+    """预览应用模板请求"""
+    template_name: str = Field(description="模板名称")
+    target_image_size: ImageSize = Field(description="目标图片尺寸")
+    target_table_bbox: Optional[List[int]] = Field(default=None, description="目标表格边界框 [x1, y1, x2, y2]")
+
+
+class ApplyTemplateRequest(BaseModel):
+    """应用模板请求"""
+    template_name: str = Field(description="模板名称")
+    structure: TableStructure = Field(description="原始表格结构")
+    target_table_bbox: Optional[List[int]] = Field(default=None, description="目标表格边界框")
+
+
+class CreateTemplateRequest(BaseModel):
+    """创建模板请求"""
+    name: str = Field(description="模板名称")
+    description: Optional[str] = Field(default=None, description="模板描述")
+    structure: TableStructure = Field(description="表格结构")
+
+
+class TemplateInfo(BaseModel):
+    """模板信息"""
+    name: str
+    description: Optional[str]
+    created_at: str
+    row_count: int
+    col_count: int
+
+
+class TemplateListResponse(BaseModel):
+    """模板列表响应"""
+    success: bool
+    templates: List[TemplateInfo]
+    message: Optional[str] = None
+
+
+class TemplateResponse(BaseModel):
+    """单个模板响应"""
+    success: bool
+    data: Dict
+    message: Optional[str] = None
+
+
+class DeleteTemplateRequest(BaseModel):
+    """删除模板请求"""
+    name: str = Field(description="模板名称")
+
+
+class DeleteTemplateResponse(BaseModel):
+    """删除模板响应"""
+    success: bool
+    message: str
+
+
+class CreateTemplateResponse(BaseModel):
+    """创建模板响应"""
+    success: bool
+    data: Dict = Field(description="包含 name, path, stats, created_at")
+    message: Optional[str] = None
+
+
+class GetTemplateResponse(BaseModel):
+    """获取模板响应"""
+    success: bool
+    data: Dict = Field(description="模板完整信息")
+    message: Optional[str] = None
+
+
+class TemplateApplyResponse(BaseModel):
+    """预览/应用模板响应"""
+    success: bool
+    data: Dict = Field(description="应用后的表格结构")
+    message: Optional[str] = None
+
+
+class HealthResponse(BaseModel):
+    """健康检查响应"""
+    status: str = Field(description="服务状态")
+    service: str = Field(description="服务名称")
+
+
+class HomeDirectoryResponse(BaseModel):
+    """主目录响应"""
+    path: str = Field(description="用户主目录路径")
+

+ 19 - 0
table_line_generator/backend/requirements.txt

@@ -0,0 +1,19 @@
+# FastAPI 后端依赖
+fastapi>=0.104.0
+uvicorn[standard]>=0.24.0
+python-multipart>=0.0.6
+pydantic>=2.0.0
+
+# 图像处理
+Pillow>=9.0.0
+numpy>=1.20.0
+opencv-python>=4.5.0
+
+# 日志
+loguru>=0.7.0
+
+# HTML 解析
+beautifulsoup4>=4.9.0
+
+# 开发
+python-dotenv>=1.0.0

+ 204 - 0
table_line_generator/backend/services/BATCH_README.md

@@ -0,0 +1,204 @@
+# 批量处理模块
+
+## 概述
+
+批量处理模块用于将首页学习的表格模板应用到多个文件,适用于多页银行流水等场景。
+
+## 架构设计
+
+### 核心思路
+1. **从首页学习模板**:主要学习列边界(竖线)
+2. **应用到所有页**:
+   - **竖线(列边界)**:完全复用
+   - **横线(行分割)**:自适应重新计算
+3. **并行处理**:提升大批量处理效率
+
+### 与旧系统的差异
+
+| 特性 | 旧系统 (batch_processor.py) | 新系统 (batch_service.py) |
+|------|---------------------------|-------------------------|
+| 依赖 | SmartTableLineGenerator | TableAnalyzer |
+| 数据结构 | TableStructure 数据类 | Dict 结构 |
+| 列检测 | ColumnDetector 独立模块 | 内置聚类算法 |
+| 行检测 | AdaptiveRowSplitter | TableAnalyzer.analyze() |
+| 接口 | 命令行工具 | FastAPI REST API |
+
+## API 使用
+
+### 1. 批量处理
+
+**端点**: `POST /api/batch/process`
+
+**请求体**:
+```json
+{
+  "template_structure": {
+    "vertical_lines": [100, 200, 300, 400],
+    "table_bbox": [50, 100, 800, 2000],
+    "total_cols": 5,
+    "mode": "cluster"
+  },
+  "file_pairs": [
+    {
+      "json_path": "/path/to/page_001.json",
+      "image_path": "/path/to/page_001.png"
+    },
+    {
+      "json_path": "/path/to/page_002.json",
+      "image_path": "/path/to/page_002.png"
+    }
+  ],
+  "output_dir": "/path/to/output",
+  "parallel": true,
+  "adjust_rows": true
+}
+```
+
+**响应**:
+```json
+{
+  "success": true,
+  "total": 20,
+  "processed": 18,
+  "failed": 2,
+  "results": [
+    {
+      "success": true,
+      "json_path": "/path/to/page_001.json",
+      "image_path": "/path/to/page_001.png",
+      "structure_path": "/path/to/output/page_001_structure.json",
+      "filename": "page_001.png",
+      "rows": 45,
+      "cols": 5
+    }
+  ],
+  "message": "批量处理完成: 成功 18/20"
+}
+```
+
+### 2. 批量绘图
+
+**端点**: `POST /api/batch/draw`
+
+**请求体**:
+```json
+{
+  "results": [
+    {
+      "success": true,
+      "image_path": "/path/to/page_001.png",
+      "structure_path": "/path/to/output/page_001_structure.json",
+      "filename": "page_001.png"
+    }
+  ],
+  "line_width": 2,
+  "line_color": [0, 0, 0]
+}
+```
+
+## 前端集成示例
+
+```typescript
+import { batchApi } from '@/api'
+
+// 批量处理
+async function processBatch() {
+  try {
+    const response = await batchApi.batchProcess({
+      template_structure: editorStore.structure,
+      file_pairs: templateStore.filePairs.map(pair => ({
+        json_path: pair.json_path,
+        image_path: pair.image_path
+      })),
+      output_dir: templateStore.scanConfig.outputDir,
+      parallel: true,
+      adjust_rows: true
+    })
+    
+    console.log(`处理完成: ${response.processed}/${response.total}`)
+    
+    // 可选:批量绘图
+    if (response.success && response.processed > 0) {
+      const drawResponse = await batchApi.batchDraw({
+        results: response.results.filter(r => r.success),
+        line_width: 2,
+        line_color: [0, 0, 0]
+      })
+      console.log(`绘制完成: ${drawResponse.drawn}/${drawResponse.total}`)
+    }
+  } catch (error) {
+    console.error('批量处理失败:', error)
+  }
+}
+```
+
+## 工作流程
+
+### 典型使用场景:银行流水批量标注
+
+1. **手动标注首页**
+   - 用户在编辑器中标注第一页
+   - 调整行列结构至满意
+   - 保存为模板
+
+2. **选择数据源**
+   - 在"预定义数据源"中选择对应的数据源
+   - 系统自动扫描所有文件对
+
+3. **批量应用模板**
+   - 点击"批量处理"按钮
+   - 系统将首页的列结构应用到所有页
+   - 每页的行结构根据实际 OCR 内容自适应
+
+4. **查看结果**
+   - 所有 `_structure.json` 文件保存到输出目录
+   - 可选绘制表格线图片用于验证
+
+### 参数说明
+
+- **adjust_rows**: 
+  - `true`(推荐):每页自适应调整行分割,适应不同页面的内容高度
+  - `false`:完全复用模板的行结构,适用于行高度完全一致的场景
+  
+- **parallel**: 
+  - `true`(推荐):并行处理,速度快
+  - `false`:串行处理,便于调试
+
+## 性能
+
+- **串行处理**: ~1-2 秒/页
+- **并行处理**: ~0.3-0.5 秒/页(4 线程)
+- **瓶颈**: OCR 数据读取和行分割算法
+
+## 未来扩展
+
+### Phase 3+: 高级功能
+- [ ] WebSocket 实时进度推送
+- [ ] 后台任务队列(Celery)
+- [ ] 批量处理历史记录
+- [ ] 失败重试机制
+- [ ] 处理结果可视化对比
+
+## 文件说明
+
+```
+backend/
+├── services/
+│   └── batch_service.py       # 核心批量处理逻辑
+├── api/
+│   └── batch.py               # REST API 端点
+└── main.py                    # 注册 batch router
+
+frontend/
+└── src/
+    └── api/
+        └── batch.ts           # 前端 API 客户端
+```
+
+## 与旧系统兼容性
+
+旧的 `table_line_generator/batch_processor.py` 暂时保留作为参考,但推荐使用新系统:
+- ✅ 更简洁的实现
+- ✅ 统一的架构
+- ✅ Web API 集成
+- ✅ 前后端一体化

+ 7 - 0
table_line_generator/backend/services/__init__.py

@@ -0,0 +1,7 @@
+"""
+Backend services package
+"""
+
+from .editor_service import EditorService
+
+__all__ = ['EditorService']

+ 400 - 0
table_line_generator/backend/services/batch_service.py

@@ -0,0 +1,400 @@
+"""
+批量处理服务
+基于首页模板应用到多个文件,适用于多页银行流水等场景
+"""
+
+import json
+import sys
+from pathlib import Path
+from typing import List, Dict, Optional, Tuple, Any, Callable
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from loguru import logger
+from PIL import Image
+import io
+
+# 添加 ocr_platform 根目录到 Python 路径(用于导入 core 和 ocr_utils)
+_file_path = Path(__file__).resolve()
+ocr_platform_root = _file_path.parents[2]  # batch_service.py -> services -> backend -> table_line_generator -> ocr_platform
+if str(ocr_platform_root) not in sys.path:
+    sys.path.insert(0, str(ocr_platform_root))
+
+from table_line_generator.core.table_analyzer import TableAnalyzer
+from table_line_generator.core.ocr_parser import OcrParser
+from table_line_generator.core.drawing_service import DrawingService
+from table_line_generator.backend.services.template_service import TemplateService
+
+
+class BatchProcessor:
+    """
+    批量表格处理器
+    
+    工作流程:
+    1. 从首页学习模板结构(主要是竖线/列边界)
+    2. 将模板应用到所有页(竖线复用,横线自适应)
+    3. 支持并行处理提升效率
+    """
+    
+    def __init__(self, max_workers: int = 4):
+        """
+        初始化批量处理器
+        
+        Args:
+            max_workers: 并行处理的最大线程数
+        """
+        self.max_workers = max_workers
+    
+    def learn_template_from_structure(
+        self,
+        first_page_structure: Dict
+    ) -> Dict:
+        """
+        从首页结构中学习模板
+        
+        Args:
+            first_page_structure: 首页的表格结构
+            
+        Returns:
+            模板字典,包含可复用的部分(主要是列边界)
+        """
+        template = {
+            'vertical_lines': first_page_structure.get('vertical_lines', []).copy(),
+            'table_bbox': first_page_structure.get('table_bbox'),
+            'col_widths': first_page_structure.get('col_widths'),
+            'total_cols': first_page_structure.get('total_cols'),
+            'mode': first_page_structure.get('mode', 'hybrid')
+        }
+        
+        logger.info(f"从首页学习模板: {len(template['vertical_lines'])} 条竖线, {template['total_cols']} 列")
+        
+        return template
+    
+    def apply_template_to_structure(
+        self,
+        template: Dict,
+        target_ocr_data: Dict,
+        adjust_rows: bool = True,
+        y_tolerance: int = 5,
+        min_row_height: int = 20
+    ) -> Dict:
+        """
+        将模板应用到目标页面的 OCR 数据
+        
+        Args:
+            template: 从首页学习的模板
+            target_ocr_data: 目标页面的 OCR 数据
+            adjust_rows: 是否自适应调整行分割
+            y_tolerance: Y轴聚类容差
+            min_row_height: 最小行高
+            
+        Returns:
+            应用模板后的结构
+        """
+        # 创建分析器
+        analyzer = TableAnalyzer(None, target_ocr_data)
+        
+        if adjust_rows:
+            # 重新分析行结构(自适应)
+            analyzed = analyzer.analyze(
+                y_tolerance=y_tolerance,
+                min_row_height=min_row_height,
+                method=template.get('mode', 'auto')
+            )
+            
+            # 复用模板的列信息
+            new_structure = {
+                'horizontal_lines': analyzed['horizontal_lines'],
+                'vertical_lines': template['vertical_lines'].copy(),
+                'table_bbox': template.get('table_bbox') or analyzed.get('table_bbox'),
+                'row_height': analyzed.get('row_height'),
+                'col_widths': template.get('col_widths'),
+                'total_rows': analyzed.get('total_rows'),
+                'total_cols': template.get('total_cols'),
+                'mode': template.get('mode'),
+                'modified_h_lines': [],
+                'modified_v_lines': [],
+                'image_rotation_angle': target_ocr_data.get('image_rotation_angle', 0.0),
+                'skew_angle': target_ocr_data.get('skew_angle', 0.0),
+                'is_skew_corrected': target_ocr_data.get('is_skew_corrected', False)
+            }
+        else:
+            # 完全复用模板(包括横线)
+            new_structure = template.copy()
+            new_structure['image_rotation_angle'] = target_ocr_data.get('image_rotation_angle', 0.0)
+            new_structure['skew_angle'] = target_ocr_data.get('skew_angle', 0.0)
+            new_structure['is_skew_corrected'] = target_ocr_data.get('is_skew_corrected', False)
+        
+        return new_structure
+    
+    def process_batch_from_data_source(
+        self,
+        template_name: str,
+        file_pairs: List[Dict],
+        output_dir: str,
+        parallel: bool = True,
+        adjust_rows: bool = True,
+        structure_suffix: str = "_structure.json",
+        image_suffix: str = "_with_lines.png",
+        progress_callback: Optional[Callable[[int, int], None]] = None
+    ) -> Dict:
+        """
+        批量处理数据源中的文件
+        
+        Args:
+            template_name: 模板名称(从 TemplateService 加载)
+            file_pairs: 文件对列表 [{'json_path': ..., 'image_path': ...}, ...]
+            output_dir: 输出目录
+            parallel: 是否并行处理
+            adjust_rows: 是否自适应调整行分割
+            structure_suffix: 结构文件后缀
+            image_suffix: 输出图片后缀
+            progress_callback: 进度回调 callback(index, total)
+            
+        Returns:
+            处理结果摘要
+        """
+        total = len(file_pairs)
+        results = []
+        
+        # 加载模板
+        template_service = TemplateService()
+        logger.info(f"开始批量处理: {total} 个文件, 使用模板: {template_name}, 并行={parallel}")
+        
+        if parallel and total > 1:
+            # 并行处理
+            with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+                futures = {
+                    executor.submit(
+                        self._process_single_file,
+                        pair, template_name, output_dir, adjust_rows, structure_suffix
+                    ): idx
+                    for idx, pair in enumerate(file_pairs)
+                }
+                
+                for future in as_completed(futures):
+                    idx = futures[future]
+                    try:
+                        result = future.result()
+                        results.append(result)
+                        
+                        if progress_callback:
+                            progress_callback(idx, total)
+                        
+                        status = "✅" if result['success'] else "❌"
+                        logger.info(f"{status} [{idx+1}/{total}] {result.get('filename', 'unknown')}")
+                        
+                    except Exception as e:
+                        logger.error(f"❌ 处理失败 [{idx+1}/{total}]: {e}")
+                        results.append({
+                            'success': False,
+                            'error': str(e),
+                            'index': idx
+                        })
+        else:
+            # 串行处理
+            for idx, pair in enumerate(file_pairs):
+                try:
+                    result = self._process_single_file(
+                        pair, template_name, output_dir, adjust_rows, structure_suffix
+                    )
+                    results.append(result)
+                    
+                    if progress_callback:
+                        progress_callback(idx, total)
+                    
+                    status = "✅" if result['success'] else "❌"
+                    logger.info(f"{status} [{idx+1}/{total}] {result.get('filename', 'unknown')}")
+                    
+                except Exception as e:
+                    logger.error(f"❌ 处理失败 [{idx+1}/{total}]: {e}")
+                    results.append({
+                        'success': False,
+                        'error': str(e),
+                        'index': idx
+                    })
+        
+        # 统计结果
+        success_count = sum(1 for r in results if r.get('success'))
+        failed_count = total - success_count
+        
+        summary = {
+            'total': total,
+            'success': success_count,
+            'failed': failed_count,
+            'results': results
+        }
+        
+        logger.info(f"📊 批量处理完成: 成功 {success_count}/{total}, 失败 {failed_count}")
+        
+        return summary
+    
+    def _process_single_file(
+        self,
+        file_pair: Dict,
+        template_name: str,
+        output_dir: str,
+        adjust_rows: bool,
+        structure_suffix: str = "_structure.json"
+    ) -> Dict:
+        """
+        处理单个文件
+        
+        Args:
+            file_pair: 文件对 {'json_path': ..., 'image_path': ...}
+            template_name: 模板名称(从 TemplateService 加载)
+            output_dir: 输出目录
+            adjust_rows: 是否调整行
+            structure_suffix: 结构文件后缀
+            
+        Returns:
+            处理结果
+        """
+        json_path = Path(file_pair['json_path'])
+        image_path = Path(file_pair['image_path'])
+        
+        try:
+            # 1. 读取 OCR 数据
+            with open(json_path, 'r', encoding='utf-8') as f:
+                ocr_result = json.load(f)
+            
+            # 2. 解析 OCR 数据(获取目标页面的 table_bbox 和 ocr_data)
+            target_table_bbox, ocr_data = OcrParser.parse(ocr_result)
+            target_image_size = ocr_data.get('image_size', {'width': 1, 'height': 1})
+            
+            # 3. 使用 TemplateService.preview_apply() 应用模板到目标页面
+            #    这会自动处理坐标映射,适配不同尺寸的图片
+            template_service = TemplateService()
+            applied_template = template_service.preview_apply(
+                template_name=template_name,
+                target_image_size=target_image_size,
+                target_table_bbox=target_table_bbox,
+                mode='relative'  # 使用相对坐标映射,适应不同尺寸图片
+            )
+            
+            # 4. 构建最终结构(合并应用的模板和目标页面的元数据)
+            if adjust_rows:
+                # 如果启用自适应行,则自动分析目标页面的行结构
+                analyzer = TableAnalyzer(None, ocr_data)
+                analyzed = analyzer.analyze()
+                
+                # 关键:使用模板的竖线(已通过 preview_apply 映射),结合目标的横线
+                new_structure = {
+                    'horizontal_lines': analyzed['horizontal_lines'],  # 自适应调整
+                    'vertical_lines': applied_template['vertical_lines'],  # 来自模板,已映射
+                    'table_bbox': applied_template['table_bbox'],  # 目标页面的 bbox
+                    'row_height': analyzed.get('row_height'),
+                    'col_widths': applied_template.get('col_widths'),
+                    'total_rows': analyzed.get('total_rows'),
+                    'total_cols': applied_template.get('total_cols'),
+                    'mode': applied_template.get('mode', 'hybrid'),
+                    'modified_h_lines': [],
+                    'modified_v_lines': [],
+                    # 各页使用自己的旋转角度
+                    'image_rotation_angle': ocr_data.get('image_rotation_angle', 0.0),
+                    'skew_angle': ocr_data.get('skew_angle', 0.0),
+                    'is_skew_corrected': ocr_data.get('is_skew_corrected', False)
+                }
+            else:
+                # 完全复用应用的模板(包括竖线和横线)
+                new_structure = applied_template.copy()
+                new_structure['image_rotation_angle'] = ocr_data.get('image_rotation_angle', 0.0)
+                new_structure['skew_angle'] = ocr_data.get('skew_angle', 0.0)
+                new_structure['is_skew_corrected'] = ocr_data.get('is_skew_corrected', False)
+            
+            # 5. 保存结构文件
+            output_path = Path(output_dir)
+            output_path.mkdir(parents=True, exist_ok=True)
+            
+            structure_filename = f"{image_path.stem}{structure_suffix}"
+            structure_path = output_path / structure_filename
+            
+            # 准备保存的数据(移除不需要的字段)
+            structure_to_save = new_structure.copy()
+            for key in ['rows', 'columns']:
+                structure_to_save.pop(key, None)
+            
+            with open(structure_path, 'w', encoding='utf-8') as f:
+                json.dump(structure_to_save, f, ensure_ascii=False, indent=2)
+            
+            return {
+                'success': True,
+                'json_path': str(json_path),
+                'image_path': str(image_path),
+                'structure_path': str(structure_path),
+                'filename': image_path.name,
+                'rows': new_structure.get('total_rows', 0),
+                'cols': new_structure.get('total_cols', 0)
+            }
+            
+        except Exception as e:
+            logger.exception(f"处理文件失败: {json_path}")
+            return {
+                'success': False,
+                'json_path': str(json_path),
+                'image_path': str(image_path),
+                'error': str(e),
+                'filename': image_path.name if image_path else 'unknown'
+            }
+    
+    def draw_batch_images(
+        self,
+        results: List[Dict],
+        line_width: int = 2,
+        line_color: Tuple[int, int, int] = (0, 0, 0)
+    ) -> List[Dict]:
+        """
+        批量绘制表格线到图片上
+        
+        Args:
+            results: process_batch_from_data_source 的返回结果中的 results 列表
+            line_width: 线条宽度
+            line_color: 线条颜色 RGB
+            
+        Returns:
+            绘制结果列表
+        """
+        draw_results = []
+        
+        for result in results:
+            if not result.get('success'):
+                continue
+            
+            try:
+                image_path = Path(result['image_path'])
+                structure_path = Path(result['structure_path'])
+                
+                # 读取图片
+                image = Image.open(image_path)
+                if image.mode != 'RGB':
+                    image = image.convert('RGB')
+                
+                # 读取结构
+                with open(structure_path, 'r', encoding='utf-8') as f:
+                    structure = json.load(f)
+                
+                # 绘制线条
+                image_with_lines = DrawingService.draw_clean_lines(
+                    image, structure, line_width=line_width, line_color=line_color
+                )
+                
+                # 保存
+                output_path = structure_path.parent / f"{image_path.stem}.png"
+                image_with_lines.save(str(output_path), 'PNG')
+                
+                draw_results.append({
+                    'success': True,
+                    'image_path': str(output_path),
+                    'filename': image_path.name
+                })
+                
+            except Exception as e:
+                logger.error(f"绘制图片失败 {result.get('filename')}: {e}")
+                draw_results.append({
+                    'success': False,
+                    'error': str(e),
+                    'filename': result.get('filename')
+                })
+        
+        success_count = sum(1 for r in draw_results if r.get('success'))
+        logger.info(f"🖼️ 绘制完成: {success_count}/{len(results)} 张图片")
+        
+        return draw_results

+ 271 - 0
table_line_generator/backend/services/editor_service.py

@@ -0,0 +1,271 @@
+"""
+编辑器业务逻辑服务
+"""
+
+import base64
+import json
+import io
+from pathlib import Path
+from typing import Dict, Tuple, Optional, Any, cast
+from PIL import Image
+from loguru import logger
+
+import sys
+from pathlib import Path
+
+# 添加 ocr_platform 根目录到 Python 路径(用于导入 core 和 ocr_utils)
+_file_path = Path(__file__).resolve()
+ocr_platform_root = _file_path.parents[2]  # editor_service.py -> services -> backend -> table_line_generator -> ocr_platform
+if str(ocr_platform_root) not in sys.path:
+    sys.path.insert(0, str(ocr_platform_root))
+
+from table_line_generator.core.ocr_parser import OcrParser
+from table_line_generator.core.table_analyzer import TableAnalyzer
+from table_line_generator.core.drawing_service import DrawingService
+
+
+class EditorService:
+    """编辑器服务"""
+    
+    MAX_IMAGE_SIZE = 4096
+    
+    @staticmethod
+    def _extract_suggested_filename(json_path: str) -> str:
+        """
+        从 JSON 文件路径中提取建议的文件名(不含.json后缀)
+        
+        例如: "/path/to/施博深_page_001.json" -> "施博深_page_001"
+        
+        Args:
+            json_path: JSON 文件的完整路径
+            
+        Returns:
+            提取的文件名(不含扩展名)
+        """
+        path = Path(json_path)
+        return path.stem
+    
+    @classmethod
+    def process_upload(
+        cls,
+        json_content: bytes,
+        image_content: bytes,
+        json_path: Optional[str] = None,
+        annotated_structure: Optional[Dict] = None
+    ) -> Dict[str, Any]:
+        """
+        处理上传的文件
+        
+        Args:
+            json_content: JSON 文件内容
+            image_content: 图片文件内容
+            json_path: JSON 文件路径(用于提取建议文件名)
+            annotated_structure: 标注结果(如果提供,直接使用)
+            
+        Returns:
+            处理结果
+        """
+        # 解析 JSON
+        try:
+            ocr_result = json.loads(json_content.decode('utf-8'))
+        except json.JSONDecodeError as e:
+            raise ValueError(f"JSON 解析失败: {e}")
+        
+        # 加载图片
+        try:
+            image = Image.open(io.BytesIO(image_content))
+            if image.mode != 'RGB':
+                image = image.convert('RGB')
+        except Exception as e:
+            raise ValueError(f"图片加载失败: {e}")
+        
+        original_size = image.size
+        
+        # 检查并缩放图片
+        image, scale_factor = TableAnalyzer.resize_image_if_needed(
+            image, cls.MAX_IMAGE_SIZE
+        )
+        
+        if scale_factor < 1.0:
+            logger.info(f"图片已缩放: {original_size} -> {image.size}, scale={scale_factor:.3f}")
+        
+        # 解析 OCR 数据
+        try:
+            table_bbox, ocr_data = OcrParser.parse(ocr_result)
+        except Exception as e:
+            raise ValueError(f"OCR 数据解析失败: {e}")
+        
+        # ✅ 如果提供了标注结果,直接使用
+        if annotated_structure:
+            logger.info("使用保存的标注结果")
+            structure = annotated_structure
+        else:
+            # 分析表格结构
+            analyzer = TableAnalyzer(image, ocr_data)
+            structure = analyzer.analyze()
+        
+        # 图片转 base64
+        image_base64 = cls._image_to_base64(image)
+        
+        result = {
+            'image_base64': image_base64,
+            'structure': structure,
+            'image_size': {
+                'width': image.size[0],
+                'height': image.size[1]
+            },
+            'scale_factor': scale_factor,
+            'ocr_data': ocr_data
+        }
+        
+        # 如果提供了 json_path,提取建议的文件名
+        if json_path:
+            result['suggested_filename'] = cls._extract_suggested_filename(json_path)
+        
+        return result
+
+    @classmethod
+    def analyze_structure(
+        cls,
+        ocr_data: Dict,
+        params: Dict
+    ) -> Dict:
+        """
+        重新分析表格结构
+        
+        Args:
+            ocr_data: OCR 数据
+            params: 分析参数
+            
+        Returns:
+            表格结构
+        """
+        structure = TableAnalyzer.analyze_structure_only(
+            ocr_data,
+            y_tolerance=params.get('y_tolerance', 5),
+            x_tolerance=params.get('x_tolerance', 10),
+            min_row_height=params.get('min_row_height', 20),
+            method=params.get('method', 'auto')
+        )
+        return structure
+
+    @classmethod
+    def save_result(
+        cls,
+        structure: Dict,
+        image_base64: Optional[str] = None,
+        output_dir: str = './output',
+        filename: str = 'structure',
+        image_filename: Optional[str] = None,
+        overwrite_mode: str = 'overwrite',
+        structure_suffix: str = '_structure',
+        image_suffix: str = '',
+        line_width: int = 2,
+        line_color: Tuple[int, int, int] = (0, 0, 0)
+    ) -> Dict[str, Optional[str]]:
+        """
+        保存结果
+        
+        Args:
+            structure: 表格结构
+            image_base64: Base64 编码的图片(可选)
+            output_dir: 输出目录
+            filename: 结构文件名(不含扩展名)
+            image_filename: 图片文件名(不含扩展名)
+            overwrite_mode: 覆盖策略 (overwrite/skip/new)
+            structure_suffix: 结构文件后缀(默认 '_structure')
+            image_suffix: 图片文件后缀(默认 '')
+            line_width: 线条宽度
+            line_color: 线条颜色
+            
+        Returns:
+            保存的文件路径字典
+        """
+        from datetime import datetime
+        
+        output_path = Path(output_dir)
+        output_path.mkdir(parents=True, exist_ok=True)
+        
+        # 保存结构 JSON
+        # 确保 structure_suffix 以 .json 结尾
+        if not structure_suffix.endswith('.json'):
+            structure_suffix = structure_suffix + '.json'
+        structure_path = output_path / f"{filename}{structure_suffix}"
+        
+        # 处理覆盖策略
+        if structure_path.exists() and overwrite_mode != 'overwrite':
+            if overwrite_mode == 'skip':
+                logger.info(f"文件已存在,跳过保存: {structure_path}")
+                return {
+                    'structure_path': str(structure_path),
+                    'image_path': None
+                }
+            elif overwrite_mode == 'new':
+                # 添加时间戳生成新文件名
+                timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+                base_suffix = structure_suffix.replace('.json', '')
+                structure_path = output_path / f"{filename}_{timestamp}{base_suffix}.json"
+        
+        # 确保 modified_h_lines 和 modified_v_lines 是列表
+        structure_to_save = dict(structure)
+        if 'modified_h_lines' in structure_to_save:
+            structure_to_save['modified_h_lines'] = list(structure_to_save['modified_h_lines'])
+        if 'modified_v_lines' in structure_to_save:
+            structure_to_save['modified_v_lines'] = list(structure_to_save['modified_v_lines'])
+        
+        # 移除不需要保存的字段
+        for key in ['rows', 'columns']:
+            structure_to_save.pop(key, None)
+        
+        with open(structure_path, 'w', encoding='utf-8') as f:
+            json.dump(structure_to_save, f, ensure_ascii=False, indent=2)
+        
+        logger.info(f"保存结构文件: {structure_path}")
+        
+        # 保存图片(可选)
+        image_path = None
+        if image_base64:
+            if image_filename is None:
+                image_filename = filename
+            
+            # 确保 image_suffix 以 .png 结尾
+            if not image_suffix.endswith('.png'):
+                image_suffix = image_suffix + '.png'
+            image_path = output_path / f"{image_filename}{image_suffix}"
+            
+            # 处理覆盖策略
+            if image_path.exists() and overwrite_mode == 'new':
+                timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+                base_suffix = image_suffix.replace('.png', '')
+                image_path = output_path / f"{image_filename}_{timestamp}{base_suffix}.png"
+            
+            # 解码图片并绘制表格线
+            image = cls._base64_to_image(image_base64)
+            image_with_lines = DrawingService.draw_clean_lines(
+                image, structure, line_width=line_width, line_color=cast(Tuple[int, int, int], line_color)
+            )
+            
+            image_with_lines.save(str(image_path), 'PNG')
+            logger.info(f"保存图片文件: {image_path}")
+        
+        return {
+            'structure_path': str(structure_path),
+            'image_path': str(image_path) if image_path else None
+        }
+
+    @staticmethod
+    def _image_to_base64(image: Image.Image) -> str:
+        """将 PIL Image 转换为 base64 字符串"""
+        buffer = io.BytesIO()
+        image.save(buffer, format='PNG')
+        return base64.b64encode(buffer.getvalue()).decode('utf-8')
+
+    @staticmethod
+    def _base64_to_image(base64_str: str) -> Image.Image:
+        """将 base64 字符串转换为 PIL Image"""
+        # 处理可能的 data URL 前缀
+        if ',' in base64_str:
+            base64_str = base64_str.split(',')[1]
+        
+        image_data = base64.b64decode(base64_str)
+        return Image.open(io.BytesIO(image_data))

+ 313 - 0
table_line_generator/backend/services/template_service.py

@@ -0,0 +1,313 @@
+"""
+模板服务
+管理表格线条模板的创建、应用、预览和删除
+"""
+import json
+import os
+from pathlib import Path
+from typing import List, Dict, Optional, Tuple
+from datetime import datetime
+
+
+class TemplateService:
+    """表格线条模板服务"""
+    
+    # 模板存储目录
+    _file_path = Path(__file__).resolve()
+    TEMPLATE_DIR = _file_path.parent.parent / "templates"
+    TEMPLATE_SUFFIX = ".template.json"
+    
+    def __init__(self):
+        """初始化模板服务,确保模板目录存在"""
+        self.TEMPLATE_DIR.mkdir(parents=True, exist_ok=True)
+    
+    def _get_template_path(self, name: str) -> Path:
+        """获取模板文件路径"""
+        # 清理名称,移除不安全字符
+        safe_name = "".join(c for c in name if c.isalnum() or c in "._- ")
+        return self.TEMPLATE_DIR / f"{safe_name}{self.TEMPLATE_SUFFIX}"
+    
+    def create_template(
+        self, 
+        name: str, 
+        structure: Dict,
+        image_size: Dict,
+        description: str = ""
+    ) -> Dict:
+        """
+        从当前结构创建模板
+        
+        Args:
+            name: 模板名称(用户自定义)
+            structure: 当前表格结构 {horizontal_lines, vertical_lines, table_bbox, ...}
+            image_size: 图片尺寸 {width, height}
+            description: 模板描述
+            
+        Returns:
+            创建的模板信息
+        """
+        if not name or not name.strip():
+            raise ValueError("模板名称不能为空")
+        
+        name = name.strip()
+        template_path = self._get_template_path(name)
+        
+        if template_path.exists():
+            raise ValueError(f"模板 '{name}' 已存在")
+        
+        # 提取模板信息
+        horizontal_lines = structure.get("horizontal_lines", [])
+        vertical_lines = structure.get("vertical_lines", [])
+        table_bbox = structure.get("table_bbox")
+        
+        if not horizontal_lines and not vertical_lines:
+            raise ValueError("结构中没有线条信息")
+        
+        # 计算相对坐标(相对于 table_bbox,这样应用时可以直接映射到目标 bbox)
+        img_width = image_size.get("width", 1)
+        img_height = image_size.get("height", 1)
+        
+        # 如果有 table_bbox,计算相对于 bbox 的坐标;否则相对于整个图片
+        if table_bbox and len(table_bbox) == 4:
+            bbox_x1, bbox_y1, bbox_x2, bbox_y2 = table_bbox
+            bbox_width = bbox_x2 - bbox_x1
+            bbox_height = bbox_y2 - bbox_y1
+            
+            # 相对于 table_bbox 的坐标 (0-1)
+            rel_horizontal_lines = [(y - bbox_y1) / bbox_height for y in horizontal_lines] if bbox_height > 0 else []
+            rel_vertical_lines = [(x - bbox_x1) / bbox_width for x in vertical_lines] if bbox_width > 0 else []
+            # table_bbox 本身相对于图片尺寸
+            rel_table_bbox = [
+                bbox_x1 / img_width,
+                bbox_y1 / img_height,
+                bbox_x2 / img_width,
+                bbox_y2 / img_height,
+            ]
+        else:
+            # 没有 table_bbox 时,相对于整个图片
+            rel_horizontal_lines = [y / img_height for y in horizontal_lines]
+            rel_vertical_lines = [x / img_width for x in vertical_lines]
+            rel_table_bbox = None
+        
+        template = {
+            "name": name,
+            "description": description,
+            "created_at": datetime.now().isoformat(),
+            "source_image_size": image_size,
+            # 存储绝对坐标
+            "horizontal_lines": horizontal_lines,
+            "vertical_lines": vertical_lines,
+            "table_bbox": table_bbox,
+            # 相对坐标 (0-1范围),相对于 table_bbox,用于不同尺寸图片的适配
+            "relative": {
+                "horizontal_lines": rel_horizontal_lines,
+                "vertical_lines": rel_vertical_lines,
+                "table_bbox": rel_table_bbox
+            },
+            # 统计信息
+            "stats": {
+                "row_count": len(horizontal_lines) - 1 if len(horizontal_lines) > 1 else 0,
+                "col_count": len(vertical_lines) - 1 if len(vertical_lines) > 1 else 0,
+            }
+        }
+        
+        # 保存模板
+        with open(template_path, 'w', encoding='utf-8') as f:
+            json.dump(template, f, ensure_ascii=False, indent=2)
+        
+        return {
+            "name": name,
+            "path": str(template_path),
+            "stats": template["stats"],
+            "created_at": template["created_at"]
+        }
+    
+    def list_templates(self) -> List[Dict]:
+        """
+        列出所有模板
+        
+        Returns:
+            模板列表 [{name, description, created_at, stats}, ...]
+        """
+        templates = []
+        
+        for file_path in self.TEMPLATE_DIR.glob(f"*{self.TEMPLATE_SUFFIX}"):
+            try:
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    template = json.load(f)
+                templates.append({
+                    "name": template.get("name", file_path.stem),
+                    "description": template.get("description", ""),
+                    "created_at": template.get("created_at", ""),
+                    "stats": template.get("stats", {}),
+                    "source_image_size": template.get("source_image_size", {})
+                })
+            except Exception as e:
+                print(f"读取模板 {file_path} 失败: {e}")
+                continue
+        
+        # 按创建时间排序(最新在前)
+        templates.sort(key=lambda x: x.get("created_at", ""), reverse=True)
+        return templates
+    
+    def get_template(self, name: str) -> Optional[Dict]:
+        """
+        获取模板详情
+        
+        Args:
+            name: 模板名称
+            
+        Returns:
+            模板详情或 None
+        """
+        template_path = self._get_template_path(name)
+        if not template_path.exists():
+            return None
+        
+        with open(template_path, 'r', encoding='utf-8') as f:
+            return json.load(f)
+    
+    def delete_template(self, name: str) -> bool:
+        """
+        删除模板
+        
+        Args:
+            name: 模板名称
+            
+        Returns:
+            是否删除成功
+        """
+        template_path = self._get_template_path(name)
+        if template_path.exists():
+            template_path.unlink()
+            return True
+        return False
+    
+    def preview_apply(
+        self, 
+        template_name: str, 
+        target_image_size: Dict,
+        target_table_bbox: Optional[List] = None,
+        mode: str = "relative"
+    ) -> Dict:
+        """
+        预览模板应用效果(不保存)
+        
+        Args:
+            template_name: 模板名称
+            target_image_size: 目标图片尺寸 {width, height}
+            target_table_bbox: 目标页面的表格边界框 [x1, y1, x2, y2]
+                              如果提供,则将模板线条映射到此区域内
+                              如果不提供,则使用模板的 table_bbox(缩放后)
+            mode: 应用模式
+                - "relative": 按相对坐标缩放(适用于不同尺寸图片)
+                - "absolute": 使用绝对坐标(适用于相同尺寸图片)
+                
+        Returns:
+            预览结构 {horizontal_lines, vertical_lines, table_bbox, ...}
+        """
+        template = self.get_template(template_name)
+        if not template:
+            raise ValueError(f"模板 '{template_name}' 不存在")
+        
+        target_width = target_image_size.get("width", 1)
+        target_height = target_image_size.get("height", 1)
+        
+        # 获取模板的相对坐标信息(相对于 table_bbox 的 0-1 坐标)
+        relative = template.get("relative", {})
+        rel_h_lines = relative.get("horizontal_lines", [])
+        rel_v_lines = relative.get("vertical_lines", [])
+        rel_bbox = relative.get("table_bbox")
+        
+        if mode == "absolute":
+            # 使用绝对坐标(直接使用模板原始值)
+            horizontal_lines = template.get("horizontal_lines", []).copy()
+            vertical_lines = template.get("vertical_lines", []).copy()
+            table_bbox = template.get("table_bbox")
+        else:
+            # relative 模式
+            if target_table_bbox and len(target_table_bbox) == 4:
+                # 使用目标页面的 table_bbox
+                bbox_x1, bbox_y1, bbox_x2, bbox_y2 = target_table_bbox
+                table_bbox = list(target_table_bbox)
+            elif rel_bbox and len(rel_bbox) == 4:
+                # 使用模板的 table_bbox(缩放到目标图片尺寸)
+                bbox_x1 = int(rel_bbox[0] * target_width)
+                bbox_y1 = int(rel_bbox[1] * target_height)
+                bbox_x2 = int(rel_bbox[2] * target_width)
+                bbox_y2 = int(rel_bbox[3] * target_height)
+                table_bbox = [bbox_x1, bbox_y1, bbox_x2, bbox_y2]
+            else:
+                # 没有 bbox 信息,使用整个图片
+                bbox_x1, bbox_y1 = 0, 0
+                bbox_x2, bbox_y2 = target_width, target_height
+                table_bbox = [bbox_x1, bbox_y1, bbox_x2, bbox_y2]
+            
+            bbox_width = bbox_x2 - bbox_x1
+            bbox_height = bbox_y2 - bbox_y1
+            
+            # 将模板的相对位置(0-1)映射到目标 bbox 内
+            horizontal_lines = [
+                int(bbox_y1 + y * bbox_height) 
+                for y in rel_h_lines
+            ]
+            vertical_lines = [
+                int(bbox_x1 + x * bbox_width) 
+                for x in rel_v_lines
+            ]
+        
+        # 强制对齐:确保第一条和最后一条线与 table_bbox 边界一致
+        if table_bbox and horizontal_lines:
+            if len(horizontal_lines) > 0:
+                horizontal_lines[0] = table_bbox[1]  # 第一条横线 = top
+            if len(horizontal_lines) > 1:
+                horizontal_lines[-1] = table_bbox[3]  # 最后一条横线 = bottom
+        
+        if table_bbox and vertical_lines:
+            if len(vertical_lines) > 0:
+                vertical_lines[0] = table_bbox[0]  # 第一条竖线 = left
+            if len(vertical_lines) > 1:
+                vertical_lines[-1] = table_bbox[2]  # 最后一条竖线 = right
+        
+        # 计算行列数
+        total_rows = len(horizontal_lines) - 1 if len(horizontal_lines) > 1 else 0
+        total_cols = len(vertical_lines) - 1 if len(vertical_lines) > 1 else 0
+        
+        return {
+            "horizontal_lines": horizontal_lines,
+            "vertical_lines": vertical_lines,
+            "table_bbox": table_bbox,
+            "total_rows": total_rows,
+            "total_cols": total_cols,
+            "modified_h_lines": [],
+            "modified_v_lines": [],
+            "applied_template": template_name,
+            "apply_mode": mode
+        }
+    
+    def apply_template(
+        self, 
+        template_name: str, 
+        target_image_size: Dict,
+        target_table_bbox: Optional[List] = None,
+        mode: str = "relative"
+    ) -> Dict:
+        """
+        应用模板(返回结构,由调用者决定是否保存)
+        
+        与 preview_apply 相同,但标记为已确认应用
+        """
+        structure = self.preview_apply(template_name, target_image_size, target_table_bbox, mode)
+        structure["confirmed"] = True
+        return structure
+
+
+# 单例
+_template_service: Optional[TemplateService] = None
+
+def get_template_service() -> TemplateService:
+    """获取模板服务单例"""
+    global _template_service
+    if _template_service is None:
+        _template_service = TemplateService()
+    return _template_service

+ 135 - 0
table_line_generator/backend/templates/B用户_扫描流水.template.json

@@ -0,0 +1,135 @@
+{
+  "name": "B用户_扫描流水",
+  "description": "",
+  "created_at": "2025-12-08T20:16:13.933527",
+  "source_image_size": {
+    "width": 1654,
+    "height": 2339
+  },
+  "horizontal_lines": [
+    449,
+    490,
+    532,
+    573,
+    615,
+    657,
+    693,
+    748,
+    788,
+    829,
+    871,
+    917,
+    964,
+    1003,
+    1038,
+    1094,
+    1134,
+    1177,
+    1220,
+    1261,
+    1303,
+    1340,
+    1418,
+    1458,
+    1500,
+    1535,
+    1590,
+    1624,
+    1676,
+    1727,
+    1767,
+    1810,
+    1852,
+    1892,
+    1931,
+    2007,
+    2040,
+    2106,
+    2147,
+    2175
+  ],
+  "vertical_lines": [
+    155,
+    267,
+    376,
+    490,
+    599,
+    715,
+    881,
+    977,
+    1091,
+    1509
+  ],
+  "table_bbox": [
+    155,
+    339,
+    1509,
+    2177
+  ],
+  "relative": {
+    "horizontal_lines": [
+      0.05984766050054407,
+      0.08215451577801959,
+      0.10500544069640914,
+      0.12731229597388466,
+      0.1501632208922742,
+      0.17301414581066377,
+      0.1926006528835691,
+      0.22252448313384113,
+      0.2442872687704026,
+      0.26659412404787813,
+      0.28944504896626766,
+      0.3144722524483134,
+      0.3400435255712731,
+      0.36126224156692055,
+      0.38030467899891185,
+      0.41077257889009794,
+      0.4325353645266594,
+      0.455930359085963,
+      0.47932535364526657,
+      0.5016322089227421,
+      0.5244831338411317,
+      0.544613710554951,
+      0.5870511425462459,
+      0.6088139281828074,
+      0.6316648531011969,
+      0.6507072905331882,
+      0.6806311207834603,
+      0.6991294885745375,
+      0.7274211099020674,
+      0.7551686615886833,
+      0.7769314472252449,
+      0.8003264417845484,
+      0.823177366702938,
+      0.8449401523394995,
+      0.8661588683351469,
+      0.9075081610446137,
+      0.9254624591947769,
+      0.9613710554951034,
+      0.9836779107725789,
+      0.998911860718172
+    ],
+    "vertical_lines": [
+      0.0,
+      0.0827178729689808,
+      0.16322008862629248,
+      0.24741506646971936,
+      0.32791728212703103,
+      0.413589364844904,
+      0.5361890694239291,
+      0.6070901033973413,
+      0.691285081240768,
+      1.0
+    ],
+    "table_bbox": [
+      0.09371221281741234,
+      0.14493373236425822,
+      0.9123337363966143,
+      0.9307396323215049
+    ]
+  },
+  "stats": {
+    "row_count": 39,
+    "col_count": 9
+  }
+}

+ 143 - 0
table_line_generator/backend/templates/B用户_扫描流水_v2.template.json

@@ -0,0 +1,143 @@
+{
+  "name": "B用户_扫描流水_v2",
+  "description": "",
+  "created_at": "2025-12-08T21:10:07.289702",
+  "source_image_size": {
+    "width": 1654,
+    "height": 2339
+  },
+  "horizontal_lines": [
+    132,
+    171,
+    244,
+    292,
+    332,
+    373,
+    423,
+    471,
+    515,
+    559,
+    603,
+    667,
+    735,
+    787,
+    827,
+    870,
+    909,
+    954,
+    998,
+    1042,
+    1078,
+    1151,
+    1218,
+    1271,
+    1314,
+    1357,
+    1399,
+    1440,
+    1478,
+    1523,
+    1561,
+    1607,
+    1646,
+    1684,
+    1732,
+    1768,
+    1831,
+    1871,
+    1911,
+    1953,
+    2004,
+    2078,
+    2125,
+    2177
+  ],
+  "vertical_lines": [
+    154,
+    266,
+    375,
+    489,
+    598,
+    714,
+    882,
+    976,
+    1090,
+    1508
+  ],
+  "table_bbox": [
+    154,
+    132,
+    1508,
+    2177
+  ],
+  "relative": {
+    "horizontal_lines": [
+      0.0,
+      0.019070904645476772,
+      0.05476772616136919,
+      0.07823960880195599,
+      0.097799511002445,
+      0.1178484107579462,
+      0.14229828850855747,
+      0.16577017114914425,
+      0.18728606356968217,
+      0.20880195599022006,
+      0.23031784841075795,
+      0.2616136919315403,
+      0.2948655256723716,
+      0.3202933985330073,
+      0.33985330073349634,
+      0.360880195599022,
+      0.3799511002444988,
+      0.4019559902200489,
+      0.42347188264058677,
+      0.4449877750611247,
+      0.4625916870415648,
+      0.4982885085574572,
+      0.5310513447432763,
+      0.5569682151589243,
+      0.5779951100244499,
+      0.5990220048899756,
+      0.619559902200489,
+      0.6396088019559902,
+      0.6581907090464547,
+      0.6801955990220049,
+      0.6987775061124695,
+      0.7212713936430318,
+      0.7403422982885085,
+      0.7589242053789731,
+      0.78239608801956,
+      0.8,
+      0.8308068459657701,
+      0.8503667481662591,
+      0.8699266503667482,
+      0.8904645476772616,
+      0.9154034229828851,
+      0.9515892420537897,
+      0.9745721271393643,
+      1.0
+    ],
+    "vertical_lines": [
+      0.0,
+      0.0827178729689808,
+      0.16322008862629248,
+      0.24741506646971936,
+      0.32791728212703103,
+      0.413589364844904,
+      0.5376661742983752,
+      0.6070901033973413,
+      0.691285081240768,
+      1.0
+    ],
+    "table_bbox": [
+      0.09310761789600967,
+      0.05643437366395896,
+      0.9117291414752116,
+      0.9307396323215049
+    ]
+  },
+  "stats": {
+    "row_count": 43,
+    "col_count": 9
+  }
+}

+ 85 - 0
table_line_generator/backend/templates/康强_北京农村商业银行.template.json

@@ -0,0 +1,85 @@
+{
+  "name": "康强_北京农村商业银行",
+  "description": "",
+  "created_at": "2025-12-08T15:44:38.566819",
+  "source_image_size": {
+    "width": 1654,
+    "height": 2339
+  },
+  "horizontal_lines": [
+    687,
+    733,
+    790,
+    850,
+    908,
+    968,
+    1028,
+    1087,
+    1144,
+    1205,
+    1263,
+    1325,
+    1382,
+    1442,
+    1506
+  ],
+  "vertical_lines": [
+    110,
+    235,
+    376,
+    531,
+    658,
+    774,
+    834,
+    1104,
+    1237,
+    1531
+  ],
+  "table_bbox": [
+    110,
+    687,
+    1531,
+    1505
+  ],
+  "relative": {
+    "horizontal_lines": [
+      0.0,
+      0.05623471882640587,
+      0.12591687041564792,
+      0.19926650366748166,
+      0.2701711491442543,
+      0.343520782396088,
+      0.41687041564792177,
+      0.4889975550122249,
+      0.558679706601467,
+      0.6332518337408313,
+      0.7041564792176039,
+      0.7799511002444988,
+      0.8496332518337408,
+      0.9229828850855746,
+      1.0012224938875305
+    ],
+    "vertical_lines": [
+      0.0,
+      0.08796622097114708,
+      0.18719211822660098,
+      0.2962702322308234,
+      0.3856439127375088,
+      0.4672765657987333,
+      0.5095003518648838,
+      0.6995073891625616,
+      0.7931034482758621,
+      1.0
+    ],
+    "table_bbox": [
+      0.06650544135429262,
+      0.2937152629328773,
+      0.9256348246674728,
+      0.6434373663958957
+    ]
+  },
+  "stats": {
+    "row_count": 14,
+    "col_count": 9
+  }
+}

+ 71 - 0
table_line_generator/backend/templates/施博深_YUSYS统一OCR框架-v2.template.json

@@ -0,0 +1,71 @@
+{
+  "name": "施博深_YUSYS统一OCR框架-v2",
+  "description": "",
+  "created_at": "2025-12-08T14:43:36.684907",
+  "source_image_size": {
+    "width": 2339,
+    "height": 1653
+  },
+  "horizontal_lines": [
+    425,
+    513,
+    560,
+    793,
+    1020,
+    1252
+  ],
+  "vertical_lines": [
+    121,
+    204,
+    328,
+    452,
+    605,
+    695,
+    914,
+    1130,
+    1365,
+    1610,
+    1844,
+    2058
+  ],
+  "table_bbox": [
+    121,
+    425,
+    2058,
+    1252
+  ],
+  "relative": {
+    "horizontal_lines": [
+      0.0,
+      0.10640870616686819,
+      0.16324062877871826,
+      0.4449818621523579,
+      0.7194679564691656,
+      1.0
+    ],
+    "vertical_lines": [
+      0.0,
+      0.04284976768198245,
+      0.10686628807434176,
+      0.1708828084667011,
+      0.24987093443469283,
+      0.2963345379452762,
+      0.40939597315436244,
+      0.5209086215797625,
+      0.642230252968508,
+      0.7687145069695406,
+      0.8895198760970573,
+      1.0
+    ],
+    "table_bbox": [
+      0.05173150919196238,
+      0.2571082879612825,
+      0.8798631893971783,
+      0.7574107683000605
+    ]
+  },
+  "stats": {
+    "row_count": 5,
+    "col_count": 11
+  }
+}

+ 75 - 0
table_line_generator/backend/templates/施博深_page_001.template.json

@@ -0,0 +1,75 @@
+{
+  "name": "施博深_page_001",
+  "description": "相对坐标基于table_box",
+  "created_at": "2025-12-06T19:41:29.874344",
+  "source_image_size": {
+    "width": 2339,
+    "height": 1653
+  },
+  "horizontal_lines": [
+    426,
+    513,
+    716,
+    818,
+    970,
+    1068,
+    1219,
+    1324
+  ],
+  "vertical_lines": [
+    114,
+    202,
+    332,
+    453,
+    604,
+    694,
+    914,
+    1135,
+    1374,
+    1608,
+    1849,
+    2053
+  ],
+  "table_bbox": [
+    114,
+    426,
+    2053,
+    1324
+  ],
+  "relative": {
+    "horizontal_lines": [
+      0.0,
+      0.09688195991091314,
+      0.32293986636971045,
+      0.4365256124721604,
+      0.6057906458797327,
+      0.7149220489977728,
+      0.8830734966592427,
+      1.0
+    ],
+    "vertical_lines": [
+      0.0,
+      0.04538421866941723,
+      0.11242908715832904,
+      0.17483238782877772,
+      0.2527075812274368,
+      0.2991232594120681,
+      0.4125838060856111,
+      0.5265600825167612,
+      0.6498194945848376,
+      0.7705002578648789,
+      0.8947911294481692,
+      1.0
+    ],
+    "table_bbox": [
+      0.04873877725523728,
+      0.2577132486388385,
+      0.8777255237280889,
+      0.8009679370840895
+    ]
+  },
+  "stats": {
+    "row_count": 7,
+    "col_count": 11
+  }
+}