pdf_parse_by_txt.py 537 B

12345678910111213141516171819
  1. from magic_pdf.pdf_parse_union_core import pdf_parse_union
  2. def parse_pdf_by_txt(
  3. pdf_bytes,
  4. model_list,
  5. imageWriter,
  6. start_page_id=0,
  7. end_page_id=None,
  8. debug_mode=False,
  9. ):
  10. return pdf_parse_union(pdf_bytes,
  11. model_list,
  12. imageWriter,
  13. "txt",
  14. start_page_id=start_page_id,
  15. end_page_id=end_page_id,
  16. debug_mode=debug_mode,
  17. )