| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- import os
- from magic_pdf.data.data_reader_writer.base import DataReader, DataWriter
- class FileBasedDataReader(DataReader):
- def __init__(self, parent_dir: str = ''):
- """Initialized with parent_dir.
- Args:
- parent_dir (str, optional): the parent directory that may be used within methods. Defaults to ''.
- """
- self._parent_dir = parent_dir
- def read_at(self, path: str, offset: int = 0, limit: int = -1) -> bytes:
- """Read at offset and limit.
- Args:
- path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
- offset (int, optional): the number of bytes skipped. Defaults to 0.
- limit (int, optional): the length of bytes want to read. Defaults to -1.
- Returns:
- bytes: the content of file
- """
- fn_path = path
- if not os.path.isabs(fn_path) and len(self._parent_dir) > 0:
- fn_path = os.path.join(self._parent_dir, path)
- with open(fn_path, 'rb') as f:
- f.seek(offset)
- if limit == -1:
- return f.read()
- else:
- return f.read(limit)
- class FileBasedDataWriter(DataWriter):
- def __init__(self, parent_dir: str = '') -> None:
- """Initialized with parent_dir.
- Args:
- parent_dir (str, optional): the parent directory that may be used within methods. Defaults to ''.
- """
- self._parent_dir = parent_dir
- def write(self, path: str, data: bytes) -> None:
- """Write file with data.
- Args:
- path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
- data (bytes): the data want to write
- """
- fn_path = path
- if not os.path.isabs(fn_path) and len(self._parent_dir) > 0:
- fn_path = os.path.join(self._parent_dir, path)
- if not os.path.exists(os.path.dirname(fn_path)) and os.path.dirname(fn_path) != "":
- os.makedirs(os.path.dirname(fn_path), exist_ok=True)
- with open(fn_path, 'wb') as f:
- f.write(data)
|