""" OCR 工具包 整合了文档处理相关的工具函数,包括: - PDF 处理工具 - JSON/Markdown/HTML 格式化工具 - 文件处理工具 - 数字标准化工具 """ from .pdf_utils import PDFUtils from .json_formatters import JSONFormatters from .markdown_generator import MarkdownGenerator from .html_generator import HTMLGenerator from .visualization_utils import VisualizationUtils from .output_formatter_v2 import OutputFormatterV2, save_mineru_format from .pdf_extractor import extract_pdf_pages from .normalize_financial_numbers import ( normalize_financial_numbers, normalize_json_table, normalize_markdown_table, normalize_json_file ) from .file_utils import ( get_input_files, collect_pid_files, get_image_files_from_dir, get_image_files_from_list, get_image_files_from_csv, convert_pdf_to_images, split_files, create_temp_file_list ) from .log_utils import setup_logging __all__ = [ # PDF 工具 'PDFUtils', 'extract_pdf_pages', # JSON 格式化 'JSONFormatters', # Markdown 生成 'MarkdownGenerator', # HTML 生成 'HTMLGenerator', # 可视化 'VisualizationUtils', # 输出格式化 'OutputFormatterV2', 'save_mineru_format', # 数字标准化 'normalize_financial_numbers', 'normalize_json_table', 'normalize_markdown_table', 'normalize_json_file', # 文件工具 'get_input_files', 'collect_pid_files', 'get_image_files_from_dir', 'get_image_files_from_list', 'get_image_files_from_csv', 'convert_pdf_to_images', 'split_files', 'create_temp_file_list', # 日志工具 'setup_logging', ] __version__ = "1.0.0" __author__ = "zhch158"