s3.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. import boto3
  2. from botocore.config import Config
  3. from ..io.base import IOReader, IOWriter
  4. class S3Reader(IOReader):
  5. def __init__(
  6. self,
  7. bucket: str,
  8. ak: str,
  9. sk: str,
  10. endpoint_url: str,
  11. addressing_style: str = 'auto',
  12. ):
  13. """s3 reader client.
  14. Args:
  15. bucket (str): bucket name
  16. ak (str): access key
  17. sk (str): secret key
  18. endpoint_url (str): endpoint url of s3
  19. addressing_style (str, optional): Defaults to 'auto'. Other valid options here are 'path' and 'virtual'
  20. refer to https://boto3.amazonaws.com/v1/documentation/api/1.9.42/guide/s3.html
  21. """
  22. self._bucket = bucket
  23. self._ak = ak
  24. self._sk = sk
  25. self._s3_client = boto3.client(
  26. service_name='s3',
  27. aws_access_key_id=ak,
  28. aws_secret_access_key=sk,
  29. endpoint_url=endpoint_url,
  30. config=Config(
  31. s3={'addressing_style': addressing_style},
  32. retries={'max_attempts': 5, 'mode': 'standard'},
  33. ),
  34. )
  35. def read(self, key: str) -> bytes:
  36. """Read the file.
  37. Args:
  38. path (str): file path to read
  39. Returns:
  40. bytes: the content of the file
  41. """
  42. return self.read_at(key)
  43. def read_at(self, key: str, offset: int = 0, limit: int = -1) -> bytes:
  44. """Read at offset and limit.
  45. Args:
  46. path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
  47. offset (int, optional): the number of bytes skipped. Defaults to 0.
  48. limit (int, optional): the length of bytes want to read. Defaults to -1.
  49. Returns:
  50. bytes: the content of file
  51. """
  52. if limit > -1:
  53. range_header = f'bytes={offset}-{offset+limit-1}'
  54. res = self._s3_client.get_object(
  55. Bucket=self._bucket, Key=key, Range=range_header
  56. )
  57. else:
  58. res = self._s3_client.get_object(
  59. Bucket=self._bucket, Key=key, Range=f'bytes={offset}-'
  60. )
  61. return res['Body'].read()
  62. class S3Writer(IOWriter):
  63. def __init__(
  64. self,
  65. bucket: str,
  66. ak: str,
  67. sk: str,
  68. endpoint_url: str,
  69. addressing_style: str = 'auto',
  70. ):
  71. """s3 reader client.
  72. Args:
  73. bucket (str): bucket name
  74. ak (str): access key
  75. sk (str): secret key
  76. endpoint_url (str): endpoint url of s3
  77. addressing_style (str, optional): Defaults to 'auto'. Other valid options here are 'path' and 'virtual'
  78. refer to https://boto3.amazonaws.com/v1/documentation/api/1.9.42/guide/s3.html
  79. """
  80. self._bucket = bucket
  81. self._ak = ak
  82. self._sk = sk
  83. self._s3_client = boto3.client(
  84. service_name='s3',
  85. aws_access_key_id=ak,
  86. aws_secret_access_key=sk,
  87. endpoint_url=endpoint_url,
  88. config=Config(
  89. s3={'addressing_style': addressing_style},
  90. retries={'max_attempts': 5, 'mode': 'standard'},
  91. ),
  92. )
  93. def write(self, key: str, data: bytes):
  94. """Write file with data.
  95. Args:
  96. path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
  97. data (bytes): the data want to write
  98. """
  99. self._s3_client.put_object(Bucket=self._bucket, Key=key, Body=data)