utils.py 698 B

123456789101112131415161718192021222324
  1. import os
  2. from loguru import logger
  3. from mineru.utils.table_merge import merge_table
  4. def cross_page_table_merge(pdf_info: list[dict]):
  5. """Merge tables that span across multiple pages in a PDF document.
  6. Args:
  7. pdf_info (list[dict]): A list of dictionaries containing information about each page in the PDF.
  8. Returns:
  9. None
  10. """
  11. is_merge_table = os.getenv('MINERU_TABLE_MERGE_ENABLE', 'true')
  12. if is_merge_table.lower() in ['true', '1', 'yes']:
  13. merge_table(pdf_info)
  14. elif is_merge_table.lower() in ['false', '0', 'no']:
  15. pass
  16. else:
  17. logger.warning(f'unknown MINERU_TABLE_MERGE_ENABLE config: {is_merge_table}, pass')
  18. pass