pdf_parse_by_ocr_v2.py 637 B

1234567891011121314151617
  1. from magic_pdf.pdf_parse_union_core import pdf_parse_union
  2. def parse_pdf_by_ocr(pdf_bytes,
  3. model_list,
  4. imageWriter,
  5. start_page_id=0,
  6. end_page_id=None,
  7. debug_mode=False,
  8. ):
  9. return pdf_parse_union(pdf_bytes,
  10. model_list,
  11. imageWriter,
  12. "ocr",
  13. start_page_id=start_page_id,
  14. end_page_id=end_page_id,
  15. debug_mode=debug_mode,
  16. )