result.py 868 B

12345678910111213141516171819202122232425
  1. from dataclasses import dataclass, field
  2. from typing import Dict, List, Optional
  3. @dataclass
  4. class ParseResult:
  5. """解析结果的统一输出结构"""
  6. content: str = "" # 解析出的 Markdown 文本
  7. metadata: Dict[str, any] = field(default_factory=dict) # 页数、作者、时长等元数据
  8. file_type: str = "" # 识别出的具体类型
  9. tables: List[Dict] = field(default_factory=list) # 提取出的结构化表格数据
  10. def to_dict(self) -> Dict[str, any]:
  11. """转换为字典格式"""
  12. return {
  13. "content": self.content,
  14. "metadata": self.metadata,
  15. "file_type": self.file_type,
  16. "tables": self.tables
  17. }
  18. def to_json(self) -> str:
  19. """转换为JSON字符串"""
  20. import json
  21. return json.dumps(self.to_dict(), ensure_ascii=False, indent=2)