__init__.py 1013 B

12345678910111213141516171819202122232425262728293031
  1. """
  2. MinerU/PaddleOCR_VL + PaddleOCR 合并工具包
  3. """
  4. import sys
  5. from pathlib import Path
  6. # 添加 ocr_platform 根目录到 Python 路径(用于导入 ocr_utils)
  7. ocr_platform_root = Path(__file__).parents[3] # ocr_merger -> ocr_tools -> ocr_platform -> repository.git
  8. if str(ocr_platform_root) not in sys.path:
  9. sys.path.insert(0, str(ocr_platform_root))
  10. from .merger_core import MinerUPaddleOCRMerger
  11. from .paddleocr_vl_merger import PaddleOCRVLMerger
  12. from .text_matcher import TextMatcher
  13. from ocr_utils import BBoxExtractor # 从 ocr_utils 导入
  14. from .data_processor import DataProcessor
  15. from .markdown_generator import MarkdownGenerator
  16. from .unified_output_converter import UnifiedOutputConverter
  17. from .table_cell_matcher import TableCellMatcher
  18. __all__ = [
  19. 'MinerUPaddleOCRMerger',
  20. 'PaddleOCRVLMerger',
  21. 'TextMatcher',
  22. 'BBoxExtractor', # 重新导出,保持向后兼容
  23. 'DataProcessor',
  24. 'MarkdownGenerator',
  25. 'UnifiedOutputConverter',
  26. 'TableCellMatcher',
  27. ]