import os from magic_pdf.data.data_reader_writer.base import DataReader, DataWriter class FileBasedDataReader(DataReader): def __init__(self, parent_dir: str = ''): """Initialized with parent_dir. Args: parent_dir (str, optional): the parent directory that may be used within methods. Defaults to ''. """ self._parent_dir = parent_dir def read_at(self, path: str, offset: int = 0, limit: int = -1) -> bytes: """Read at offset and limit. Args: path (str): the path of file, if the path is relative path, it will be joined with parent_dir. offset (int, optional): the number of bytes skipped. Defaults to 0. limit (int, optional): the length of bytes want to read. Defaults to -1. Returns: bytes: the content of file """ fn_path = path if not os.path.isabs(fn_path) and len(self._parent_dir) > 0: fn_path = os.path.join(self._parent_dir, path) with open(fn_path, 'rb') as f: f.seek(offset) if limit == -1: return f.read() else: return f.read(limit) class FileBasedDataWriter(DataWriter): def __init__(self, parent_dir: str = '') -> None: """Initialized with parent_dir. Args: parent_dir (str, optional): the parent directory that may be used within methods. Defaults to ''. """ self._parent_dir = parent_dir def write(self, path: str, data: bytes) -> None: """Write file with data. Args: path (str): the path of file, if the path is relative path, it will be joined with parent_dir. data (bytes): the data want to write """ fn_path = path if not os.path.isabs(fn_path) and len(self._parent_dir) > 0: fn_path = os.path.join(self._parent_dir, path) with open(fn_path, 'wb') as f: f.write(data)