| 12345678910111213141516171819202122232425262728293031 |
- """
- MinerU/PaddleOCR_VL + PaddleOCR 合并工具包
- """
- import sys
- from pathlib import Path
- # 添加 ocr_platform 根目录到 Python 路径(用于导入 ocr_utils)
- ocr_platform_root = Path(__file__).parents[3] # ocr_merger -> ocr_tools -> ocr_platform -> repository.git
- if str(ocr_platform_root) not in sys.path:
- sys.path.insert(0, str(ocr_platform_root))
- from .merger_core import MinerUPaddleOCRMerger
- from .paddleocr_vl_merger import PaddleOCRVLMerger
- from .text_matcher import TextMatcher
- from ocr_utils import BBoxExtractor # 从 ocr_utils 导入
- from .data_processor import DataProcessor
- from .markdown_generator import MarkdownGenerator
- from .unified_output_converter import UnifiedOutputConverter
- from .table_cell_matcher import TableCellMatcher
- __all__ = [
- 'MinerUPaddleOCRMerger',
- 'PaddleOCRVLMerger',
- 'TextMatcher',
- 'BBoxExtractor', # 重新导出,保持向后兼容
- 'DataProcessor',
- 'MarkdownGenerator',
- 'UnifiedOutputConverter',
- 'TableCellMatcher',
- ]
|