| 123456789101112131415161718192021222324252627282930 |
- """
- Universal Document Parser
- 统一文档处理流水线,支持多种模型(MinerU、PaddleX、DotsOCR等)进行文档解析。
- 提供完整的处理流程:PDF分类、页面方向识别、Layout检测、OCR识别、表格VLM识别等。
- """
- from .core.pipeline_manager_v2 import EnhancedDocPipeline
- from .core.pipeline_manager_v2_streaming import StreamingDocPipeline
- from .core.config_manager import ConfigManager
- from .core.model_factory import ModelFactory
- # 从 ocr_utils 导入工具函数
- try:
- from ocr_utils import OutputFormatterV2, save_mineru_format
- except ImportError:
- # 降级:从 utils 导入(向后兼容)
- from .utils import OutputFormatterV2, save_mineru_format
- __all__ = [
- 'EnhancedDocPipeline',
- 'StreamingDocPipeline',
- 'ConfigManager',
- 'ModelFactory',
- 'OutputFormatterV2',
- 'save_mineru_format',
- ]
- __version__ = "2.0.0"
|