batch.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. """
  2. 批量处理 API
  3. """
  4. from fastapi import APIRouter, BackgroundTasks, HTTPException
  5. from pydantic import BaseModel, Field
  6. from typing import List, Optional, Dict, Any
  7. from pathlib import Path
  8. from loguru import logger
  9. from services.batch_service import BatchProcessor
  10. from models.schemas import TableStructure, ImageSize
  11. router = APIRouter(prefix="/api/batch", tags=["batch"])
  12. class FilePair(BaseModel):
  13. """文件对"""
  14. json_path: str = Field(description="JSON 文件路径")
  15. image_path: str = Field(description="图片文件路径")
  16. class BatchProcessRequest(BaseModel):
  17. """批量处理请求"""
  18. template_name: str = Field(description="要应用的模板名称")
  19. file_pairs: List[FilePair] = Field(description="文件对列表")
  20. output_dir: str = Field(description="输出目录")
  21. parallel: bool = Field(default=True, description="是否并行处理")
  22. adjust_rows: bool = Field(default=True, description="是否自适应调整行分割")
  23. structure_suffix: str = Field(default="_structure.json", description="结构文件后缀")
  24. image_suffix: str = Field(default="_with_lines.png", description="输出图片文件后缀")
  25. class BatchProcessResult(BaseModel):
  26. """单个文件处理结果"""
  27. success: bool
  28. json_path: str
  29. image_path: str
  30. structure_path: Optional[str] = None
  31. filename: str
  32. rows: Optional[int] = None
  33. cols: Optional[int] = None
  34. error: Optional[str] = None
  35. class BatchProcessResponse(BaseModel):
  36. """批量处理响应"""
  37. success: bool
  38. total: int
  39. processed: int
  40. failed: int
  41. results: List[Dict[str, Any]]
  42. message: Optional[str] = None
  43. class DrawBatchRequest(BaseModel):
  44. """批量绘图请求"""
  45. results: List[Dict[str, Any]] = Field(description="处理结果列表")
  46. line_width: int = Field(default=2, description="线条宽度")
  47. line_color: List[int] = Field(default=[0, 0, 0], description="线条颜色 RGB")
  48. class DrawBatchResponse(BaseModel):
  49. """批量绘图响应"""
  50. success: bool
  51. total: int
  52. drawn: int
  53. results: List[Dict[str, Any]]
  54. message: Optional[str] = None
  55. @router.post("/process", response_model=BatchProcessResponse)
  56. async def batch_process(request: BatchProcessRequest):
  57. """
  58. 批量处理文件
  59. 将指定模板的结构应用到多个文件:
  60. - 复用列边界(竖线)
  61. - 自适应调整行分割(横线)
  62. - 支持并行处理
  63. - 自动处理不同图片尺寸的坐标映射
  64. """
  65. try:
  66. # 验证输出目录
  67. output_path = Path(request.output_dir)
  68. if not output_path.parent.exists():
  69. raise HTTPException(
  70. status_code=400,
  71. detail=f"输出目录的父目录不存在: {output_path.parent}"
  72. )
  73. # 转换文件对格式
  74. file_pairs = [
  75. {
  76. 'json_path': pair.json_path,
  77. 'image_path': pair.image_path
  78. }
  79. for pair in request.file_pairs
  80. ]
  81. # 创建处理器
  82. processor = BatchProcessor(max_workers=4)
  83. # 执行批量处理
  84. summary = processor.process_batch_from_data_source(
  85. template_name=request.template_name,
  86. file_pairs=file_pairs,
  87. output_dir=request.output_dir,
  88. parallel=request.parallel,
  89. adjust_rows=request.adjust_rows,
  90. structure_suffix=request.structure_suffix,
  91. image_suffix=request.image_suffix
  92. )
  93. return BatchProcessResponse(
  94. success=True,
  95. total=summary['total'],
  96. processed=summary['success'],
  97. failed=summary['failed'],
  98. results=summary['results'],
  99. message=f"批量处理完成: 成功 {summary['success']}/{summary['total']}"
  100. )
  101. except HTTPException:
  102. raise
  103. except Exception as e:
  104. logger.exception(f"批量处理失败: {e}")
  105. raise HTTPException(status_code=500, detail=f"批量处理失败: {e}")
  106. @router.post("/draw", response_model=DrawBatchResponse)
  107. async def batch_draw(request: DrawBatchRequest):
  108. """
  109. 批量绘制表格线到图片上
  110. 根据处理结果中的结构文件,在原图上绘制表格线
  111. """
  112. try:
  113. processor = BatchProcessor()
  114. # 确保颜色是 RGB 三元组
  115. line_color = tuple(request.line_color[:3]) if len(request.line_color) >= 3 else (0, 0, 0)
  116. draw_results = processor.draw_batch_images(
  117. results=request.results,
  118. line_width=request.line_width,
  119. line_color=line_color # type: ignore
  120. )
  121. success_count = sum(1 for r in draw_results if r.get('success'))
  122. return DrawBatchResponse(
  123. success=True,
  124. total=len(request.results),
  125. drawn=success_count,
  126. results=draw_results,
  127. message=f"绘制完成: {success_count}/{len(request.results)} 张图片"
  128. )
  129. except Exception as e:
  130. logger.exception(f"批量绘图失败: {e}")
  131. raise HTTPException(status_code=500, detail=f"批量绘图失败: {e}")
  132. @router.get("/health")
  133. async def batch_health_check():
  134. """批量处理服务健康检查"""
  135. return {
  136. "status": "ok",
  137. "service": "batch-processor"
  138. }