TXTPipe.py 961 B

123456789101112131415161718192021222324
  1. from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
  2. from magic_pdf.libs.json_compressor import JsonCompressor
  3. from magic_pdf.pipe.AbsPipe import AbsPipe
  4. from magic_pdf.user_api import parse_txt_pdf
  5. class TXTPipe(AbsPipe):
  6. def __init__(self, pdf_bytes: bytes, model_list: list, image_writer: AbsReaderWriter, img_parent_path: str, is_debug:bool=False):
  7. super().__init__(pdf_bytes, model_list, image_writer, img_parent_path, is_debug)
  8. def pipe_classify(self):
  9. pass
  10. def pipe_parse(self):
  11. self.pdf_mid_data = parse_txt_pdf(self.pdf_bytes, self.model_list, self.image_writer, is_debug=self.is_debug)
  12. def pipe_mk_uni_format(self):
  13. content_list = AbsPipe.mk_uni_format(self.get_compress_pdf_mid_data(), self.img_parent_path)
  14. return content_list
  15. def pipe_mk_markdown(self):
  16. md_content = AbsPipe.mk_markdown(self.get_compress_pdf_mid_data(), self.img_parent_path)
  17. return md_content