TXTPipe.py 1.1 KB

123456789101112131415161718192021222324252627282930
  1. from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
  2. from magic_pdf.libs.json_compressor import JsonCompressor
  3. from magic_pdf.pipe.AbsPipe import AbsPipe
  4. from magic_pdf.user_api import parse_txt_pdf
  5. class TXTPipe(AbsPipe):
  6. def __init__(self, pdf_bytes: bytes, model_list: list, image_writer: AbsReaderWriter, img_bucket_path: str):
  7. self.compressed_pdf_mid_data = None
  8. self.pdf_mid_data = None
  9. self.pdf_bytes = pdf_bytes
  10. self.model_list = model_list
  11. self.image_writer = image_writer
  12. self.img_bucket_path = img_bucket_path
  13. def pipe_classify(self):
  14. pass
  15. def pipe_parse(self):
  16. self.pdf_mid_data = parse_txt_pdf(self.pdf_bytes, self.model_list, self.image_writer)
  17. self.compressed_pdf_mid_data = JsonCompressor.compress_json(self.pdf_mid_data)
  18. def pipe_mk_uni_format(self):
  19. content_list = AbsPipe.mk_uni_format(self.compressed_pdf_mid_data, self.img_bucket_path)
  20. return content_list
  21. def pipe_mk_markdown(self):
  22. md_content = AbsPipe.mk_markdown(self.compressed_pdf_mid_data, self.img_bucket_path)
  23. return md_content