| 12345678910111213141516171819202122232425 |
- from dataclasses import dataclass, field
- from typing import Dict, List, Optional
- @dataclass
- class ParseResult:
- """解析结果的统一输出结构"""
- content: str = "" # 解析出的 Markdown 文本
- metadata: Dict[str, any] = field(default_factory=dict) # 页数、作者、时长等元数据
- file_type: str = "" # 识别出的具体类型
- tables: List[Dict] = field(default_factory=list) # 提取出的结构化表格数据
- def to_dict(self) -> Dict[str, any]:
- """转换为字典格式"""
- return {
- "content": self.content,
- "metadata": self.metadata,
- "file_type": self.file_type,
- "tables": self.tables
- }
- def to_json(self) -> str:
- """转换为JSON字符串"""
- import json
- return json.dumps(self.to_dict(), ensure_ascii=False, indent=2)
|