editor.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. """
  2. 编辑器 API 路由
  3. """
  4. import json
  5. from fastapi import APIRouter, UploadFile, File, HTTPException
  6. from loguru import logger
  7. from models.schemas import (
  8. UploadResponse,
  9. AnalyzeRequest,
  10. AnalyzeResponse,
  11. SaveRequest,
  12. SaveResponse,
  13. TableStructure,
  14. ImageSize,
  15. LoadByPathRequest,
  16. HealthResponse,
  17. )
  18. from services.editor_service import EditorService
  19. router = APIRouter(prefix="/api", tags=["editor"])
  20. @router.post("/upload", response_model=UploadResponse)
  21. async def upload_files(
  22. json_file: UploadFile = File(..., description="OCR JSON 文件"),
  23. image_file: UploadFile = File(..., description="图片文件")
  24. ):
  25. """
  26. 上传 OCR JSON 和图片文件,返回分析结果
  27. - 自动检测 OCR 格式(PPStructure / MinerU)
  28. - 图片超过 4096x4096 会自动缩放
  29. - 返回 base64 编码的图片和表格结构
  30. """
  31. try:
  32. # 验证文件类型
  33. if not json_file.filename.endswith('.json'):
  34. raise HTTPException(status_code=400, detail="请上传 JSON 文件")
  35. allowed_image_types = {'.png', '.jpg', '.jpeg', '.bmp', '.tiff'}
  36. image_ext = '.' + image_file.filename.split('.')[-1].lower()
  37. if image_ext not in allowed_image_types:
  38. raise HTTPException(status_code=400, detail=f"不支持的图片格式: {image_ext}")
  39. # 读取文件内容
  40. json_content = await json_file.read()
  41. image_content = await image_file.read()
  42. logger.info(f"收到上传: JSON={json_file.filename}, Image={image_file.filename}")
  43. # 处理上传,从上传的文件名中提取建议的文件名
  44. result = EditorService.process_upload(
  45. json_content,
  46. image_content,
  47. json_path=json_file.filename
  48. )
  49. return UploadResponse(
  50. success=True,
  51. image_base64=result['image_base64'],
  52. structure=TableStructure(**result['structure']),
  53. image_size=ImageSize(**result['image_size']),
  54. scale_factor=result['scale_factor'],
  55. ocr_data=result['ocr_data'],
  56. suggested_filename=result.get('suggested_filename'),
  57. message="上传成功"
  58. )
  59. except ValueError as e:
  60. logger.error(f"上传处理失败: {e}")
  61. raise HTTPException(status_code=400, detail=str(e))
  62. except Exception as e:
  63. logger.exception(f"上传处理异常: {e}")
  64. raise HTTPException(status_code=500, detail=f"服务器错误: {e}")
  65. @router.post("/analyze", response_model=AnalyzeResponse)
  66. async def analyze_structure(request: AnalyzeRequest):
  67. """
  68. 重新分析表格结构(使用不同参数)
  69. """
  70. try:
  71. structure = EditorService.analyze_structure(
  72. request.ocr_data,
  73. request.params.model_dump()
  74. )
  75. return AnalyzeResponse(
  76. success=True,
  77. structure=TableStructure(**structure),
  78. message="分析完成"
  79. )
  80. except Exception as e:
  81. logger.exception(f"分析失败: {e}")
  82. raise HTTPException(status_code=500, detail=f"分析失败: {e}")
  83. @router.post("/save", response_model=SaveResponse)
  84. async def save_result(request: SaveRequest):
  85. """
  86. 保存结果(结构 JSON + 可选的带线图片)
  87. """
  88. try:
  89. paths = EditorService.save_result(
  90. structure=request.structure.model_dump(),
  91. image_base64=request.image_base64,
  92. output_dir=request.output_dir,
  93. filename=request.filename,
  94. image_filename=request.image_filename,
  95. overwrite_mode=request.overwrite_mode,
  96. structure_suffix=request.structure_suffix,
  97. image_suffix=request.image_suffix,
  98. line_width=request.line_width,
  99. line_color=(request.line_color[0], request.line_color[1], request.line_color[2])
  100. )
  101. return SaveResponse(
  102. success=True,
  103. structure_path=paths['structure_path'],
  104. image_path=paths['image_path'],
  105. message="保存成功"
  106. )
  107. except Exception as e:
  108. logger.exception(f"保存失败: {e}")
  109. raise HTTPException(status_code=500, detail=f"保存失败: {e}")
  110. @router.get("/health", response_model=HealthResponse)
  111. async def health_check():
  112. """健康检查"""
  113. return HealthResponse(status="ok", service="table-line-editor")
  114. @router.post("/load-by-path", response_model=UploadResponse)
  115. async def load_by_path(request: LoadByPathRequest):
  116. """按路径加载数据(优先加载标注结果)"""
  117. from pathlib import Path
  118. image_path = Path(request.image_path)
  119. json_path = Path(request.json_path)
  120. output_dir = Path(request.output_dir) if request.output_dir else None
  121. try:
  122. if not image_path.exists():
  123. raise HTTPException(status_code=404, detail=f"图片文件不存在: {image_path}")
  124. if not json_path.exists():
  125. raise HTTPException(status_code=404, detail=f"JSON 文件不存在: {json_path}")
  126. # 使用来自前端的后缀,不再读配置文件
  127. base_name = json_path.stem
  128. # 确保 structure_suffix 以 .json 结尾
  129. structure_suffix = request.structure_suffix
  130. if not structure_suffix.endswith('.json'):
  131. structure_suffix = structure_suffix + '.json'
  132. structure_path = output_dir / f"{base_name}{structure_suffix}" if output_dir else None
  133. with open(json_path, 'rb') as f:
  134. json_content = f.read()
  135. with open(image_path, 'rb') as f:
  136. image_content = f.read()
  137. # 如果存在标注结果,优先加载
  138. if structure_path and structure_path.exists():
  139. logger.info(f"找到标注结果: {structure_path}")
  140. with open(structure_path, 'r', encoding='utf-8') as f:
  141. structure_data = json.load(f)
  142. result = EditorService.process_upload(
  143. json_content,
  144. image_content,
  145. json_path=str(json_path),
  146. annotated_structure=structure_data
  147. )
  148. else:
  149. logger.info(f"未找到标注结果,使用原始OCR数据")
  150. result = EditorService.process_upload(
  151. json_content,
  152. image_content,
  153. json_path=str(json_path)
  154. )
  155. return UploadResponse(
  156. success=True,
  157. image_base64=result['image_base64'],
  158. structure=TableStructure(**result['structure']),
  159. image_size=ImageSize(**result['image_size']),
  160. scale_factor=result['scale_factor'],
  161. ocr_data=result['ocr_data'],
  162. suggested_filename=result.get('suggested_filename'),
  163. message="加载成功"
  164. )
  165. except HTTPException:
  166. raise
  167. except ValueError as e:
  168. logger.error(f"加载处理失败: {e}")
  169. raise HTTPException(status_code=400, detail=str(e))
  170. except Exception as e:
  171. logger.exception(f"加载处理异常: {e}")
  172. raise HTTPException(status_code=500, detail=f"服务器错误: {e}")