__init__.py 894 B

123456789101112131415161718192021222324252627282930
  1. """
  2. Universal Document Parser
  3. 统一文档处理流水线,支持多种模型(MinerU、PaddleX、DotsOCR等)进行文档解析。
  4. 提供完整的处理流程:PDF分类、页面方向识别、Layout检测、OCR识别、表格VLM识别等。
  5. """
  6. from .core.pipeline_manager_v2 import EnhancedDocPipeline
  7. from .core.pipeline_manager_v2_streaming import StreamingDocPipeline
  8. from .core.config_manager import ConfigManager
  9. from .core.model_factory import ModelFactory
  10. # 从 ocr_utils 导入工具函数
  11. try:
  12. from ocr_utils import OutputFormatterV2, save_mineru_format
  13. except ImportError:
  14. # 降级:从 utils 导入(向后兼容)
  15. from .utils import OutputFormatterV2, save_mineru_format
  16. __all__ = [
  17. 'EnhancedDocPipeline',
  18. 'StreamingDocPipeline',
  19. 'ConfigManager',
  20. 'ModelFactory',
  21. 'OutputFormatterV2',
  22. 'save_mineru_format',
  23. ]
  24. __version__ = "2.0.0"