filebase.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. import os
  2. from magic_pdf.data.data_reader_writer.base import DataReader, DataWriter
  3. class FileBasedDataReader(DataReader):
  4. def __init__(self, parent_dir: str = ''):
  5. """Initialized with parent_dir.
  6. Args:
  7. parent_dir (str, optional): the parent directory that may be used within methods. Defaults to ''.
  8. """
  9. self._parent_dir = parent_dir
  10. def read_at(self, path: str, offset: int = 0, limit: int = -1) -> bytes:
  11. """Read at offset and limit.
  12. Args:
  13. path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
  14. offset (int, optional): the number of bytes skipped. Defaults to 0.
  15. limit (int, optional): the length of bytes want to read. Defaults to -1.
  16. Returns:
  17. bytes: the content of file
  18. """
  19. fn_path = path
  20. if not os.path.isabs(fn_path) and len(self._parent_dir) > 0:
  21. fn_path = os.path.join(self._parent_dir, path)
  22. with open(fn_path, 'rb') as f:
  23. f.seek(offset)
  24. if limit == -1:
  25. return f.read()
  26. else:
  27. return f.read(limit)
  28. class FileBasedDataWriter(DataWriter):
  29. def __init__(self, parent_dir: str = '') -> None:
  30. """Initialized with parent_dir.
  31. Args:
  32. parent_dir (str, optional): the parent directory that may be used within methods. Defaults to ''.
  33. """
  34. self._parent_dir = parent_dir
  35. def write(self, path: str, data: bytes) -> None:
  36. """Write file with data.
  37. Args:
  38. path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
  39. data (bytes): the data want to write
  40. """
  41. fn_path = path
  42. if not os.path.isabs(fn_path) and len(self._parent_dir) > 0:
  43. fn_path = os.path.join(self._parent_dir, path)
  44. with open(fn_path, 'wb') as f:
  45. f.write(data)