editor.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. """
  2. 编辑器 API 路由
  3. """
  4. import json
  5. from fastapi import APIRouter, UploadFile, File, HTTPException
  6. from loguru import logger
  7. from models.schemas import (
  8. UploadResponse,
  9. AnalyzeRequest,
  10. AnalyzeResponse,
  11. SaveRequest,
  12. SaveResponse,
  13. TableStructure,
  14. ImageSize,
  15. LoadByPathRequest,
  16. HealthResponse,
  17. )
  18. from services.editor_service import EditorService
  19. router = APIRouter(prefix="/api", tags=["editor"])
  20. @router.post("/upload", response_model=UploadResponse)
  21. async def upload_files(
  22. json_file: UploadFile = File(..., description="OCR JSON 文件"),
  23. image_file: UploadFile = File(..., description="图片文件")
  24. ):
  25. """
  26. 上传 OCR JSON 和图片文件,返回分析结果
  27. - 自动检测 OCR 格式(PPStructure / MinerU)
  28. - 图片超过 4096x4096 会自动缩放
  29. - 返回 base64 编码的图片和表格结构
  30. """
  31. try:
  32. # 验证文件类型
  33. if not json_file.filename.endswith('.json'):
  34. raise HTTPException(status_code=400, detail="请上传 JSON 文件")
  35. allowed_image_types = {'.png', '.jpg', '.jpeg', '.bmp', '.tiff'}
  36. image_ext = '.' + image_file.filename.split('.')[-1].lower()
  37. if image_ext not in allowed_image_types:
  38. raise HTTPException(status_code=400, detail=f"不支持的图片格式: {image_ext}")
  39. # 读取文件内容
  40. json_content = await json_file.read()
  41. image_content = await image_file.read()
  42. logger.info(f"收到上传: JSON={json_file.filename}, Image={image_file.filename}")
  43. # 处理上传,从上传的文件名中提取建议的文件名
  44. result = EditorService.process_upload(
  45. json_content,
  46. image_content,
  47. json_path=json_file.filename
  48. )
  49. return UploadResponse(
  50. success=True,
  51. image_base64=result['image_base64'],
  52. structure=TableStructure(**result['structure']),
  53. image_size=ImageSize(**result['image_size']),
  54. scale_factor=result['scale_factor'],
  55. ocr_data=result['ocr_data'],
  56. suggested_filename=result.get('suggested_filename'),
  57. message="上传成功"
  58. )
  59. except ValueError as e:
  60. logger.error(f"上传处理失败: {e}")
  61. import traceback
  62. logger.error(traceback.format_exc())
  63. raise HTTPException(status_code=400, detail=str(e))
  64. except Exception as e:
  65. logger.exception(f"上传处理异常: {e}")
  66. raise HTTPException(status_code=500, detail=f"服务器错误: {e}")
  67. @router.post("/analyze", response_model=AnalyzeResponse)
  68. async def analyze_structure(request: AnalyzeRequest):
  69. """
  70. 重新分析表格结构(使用不同参数)
  71. """
  72. try:
  73. structure = EditorService.analyze_structure(
  74. request.ocr_data,
  75. request.params.model_dump()
  76. )
  77. return AnalyzeResponse(
  78. success=True,
  79. structure=TableStructure(**structure),
  80. message="分析完成"
  81. )
  82. except Exception as e:
  83. logger.exception(f"分析失败: {e}")
  84. raise HTTPException(status_code=500, detail=f"分析失败: {e}")
  85. @router.post("/save", response_model=SaveResponse)
  86. async def save_result(request: SaveRequest):
  87. """
  88. 保存结果(结构 JSON + 可选的带线图片)
  89. """
  90. try:
  91. paths = EditorService.save_result(
  92. structure=request.structure.model_dump(),
  93. image_base64=request.image_base64,
  94. output_dir=request.output_dir,
  95. filename=request.filename,
  96. image_filename=request.image_filename,
  97. overwrite_mode=request.overwrite_mode,
  98. structure_suffix=request.structure_suffix,
  99. image_suffix=request.image_suffix,
  100. line_width=request.line_width,
  101. line_color=(request.line_color[0], request.line_color[1], request.line_color[2])
  102. )
  103. return SaveResponse(
  104. success=True,
  105. structure_path=paths['structure_path'],
  106. image_path=paths['image_path'],
  107. message="保存成功"
  108. )
  109. except Exception as e:
  110. logger.exception(f"保存失败: {e}")
  111. raise HTTPException(status_code=500, detail=f"保存失败: {e}")
  112. @router.get("/health", response_model=HealthResponse)
  113. async def health_check():
  114. """健康检查"""
  115. return HealthResponse(status="ok", service="table-line-editor")
  116. @router.post("/load-by-path", response_model=UploadResponse)
  117. async def load_by_path(request: LoadByPathRequest):
  118. """按路径加载数据(优先加载标注结果)"""
  119. from pathlib import Path
  120. image_path = Path(request.image_path)
  121. json_path = Path(request.json_path)
  122. output_dir = Path(request.output_dir) if request.output_dir else None
  123. try:
  124. if not image_path.exists():
  125. raise HTTPException(status_code=404, detail=f"图片文件不存在: {image_path}")
  126. if not json_path.exists():
  127. raise HTTPException(status_code=404, detail=f"JSON 文件不存在: {json_path}")
  128. # 使用来自前端的后缀,不再读配置文件
  129. base_name = json_path.stem
  130. # 确保 structure_suffix 以 .json 结尾
  131. structure_suffix = request.structure_suffix
  132. if not structure_suffix.endswith('.json'):
  133. structure_suffix = structure_suffix + '.json'
  134. structure_path = output_dir / f"{base_name}{structure_suffix}" if output_dir else None
  135. with open(json_path, 'rb') as f:
  136. json_content = f.read()
  137. with open(image_path, 'rb') as f:
  138. image_content = f.read()
  139. # 如果存在标注结果,优先加载
  140. if structure_path and structure_path.exists():
  141. logger.info(f"找到标注结果: {structure_path}")
  142. with open(structure_path, 'r', encoding='utf-8') as f:
  143. structure_data = json.load(f)
  144. result = EditorService.process_upload(
  145. json_content,
  146. image_content,
  147. json_path=str(json_path),
  148. annotated_structure=structure_data
  149. )
  150. else:
  151. logger.info(f"未找到标注结果,使用原始OCR数据")
  152. result = EditorService.process_upload(
  153. json_content,
  154. image_content,
  155. json_path=str(json_path)
  156. )
  157. return UploadResponse(
  158. success=True,
  159. image_base64=result['image_base64'],
  160. structure=TableStructure(**result['structure']),
  161. image_size=ImageSize(**result['image_size']),
  162. scale_factor=result['scale_factor'],
  163. ocr_data=result['ocr_data'],
  164. suggested_filename=result.get('suggested_filename'),
  165. message="加载成功"
  166. )
  167. except HTTPException:
  168. raise
  169. except ValueError as e:
  170. logger.error(f"加载处理失败: {e}")
  171. raise HTTPException(status_code=400, detail=str(e))
  172. except Exception as e:
  173. logger.exception(f"加载处理异常: {e}")
  174. raise HTTPException(status_code=500, detail=f"服务器错误: {e}")