S3ReaderWriter.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. from magic_pdf.io.AbsReaderWriter import AbsReaderWriter
  2. from magic_pdf.libs.commons import parse_aws_param, parse_bucket_key
  3. import boto3
  4. from loguru import logger
  5. from boto3.s3.transfer import TransferConfig
  6. from botocore.config import Config
  7. class S3ReaderWriter(AbsReaderWriter):
  8. def __init__(self, s3_profile):
  9. self.client = self._get_client(s3_profile)
  10. def _get_client(self, s3_profile):
  11. ak, sk, end_point, addressing_style = parse_aws_param(s3_profile)
  12. s3_client = boto3.client(
  13. service_name="s3",
  14. aws_access_key_id=ak,
  15. aws_secret_access_key=sk,
  16. endpoint_url=end_point,
  17. config=Config(s3={"addressing_style": addressing_style},
  18. retries={'max_attempts': 5, 'mode': 'standard'}),
  19. )
  20. return s3_client
  21. def read(self, s3_path, mode="text", encoding="utf-8"):
  22. bucket_name, bucket_key = parse_bucket_key(s3_path)
  23. res = self.client.get_object(Bucket=bucket_name, Key=bucket_key)
  24. body = res["Body"].read()
  25. if mode == 'text':
  26. data = body.decode(encoding) # Decode bytes to text
  27. elif mode == 'binary':
  28. data = body
  29. else:
  30. raise ValueError("Invalid mode. Use 'text' or 'binary'.")
  31. return data
  32. def write(self, data, s3_path, mode="text", encoding="utf-8"):
  33. if mode == 'text':
  34. body = data.encode(encoding) # Encode text data as bytes
  35. elif mode == 'binary':
  36. body = data
  37. else:
  38. raise ValueError("Invalid mode. Use 'text' or 'binary'.")
  39. bucket_name, bucket_key = parse_bucket_key(s3_path)
  40. self.client.put_object(Body=body, Bucket=bucket_name, Key=bucket_key)
  41. logger.info(f"内容已写入 {s3_path} ")
  42. if __name__ == "__main__":
  43. # Config the connection info
  44. profile = {
  45. 'ak': '',
  46. 'sk': '',
  47. 'endpoint': ''
  48. }
  49. # Create an S3ReaderWriter object
  50. s3_reader_writer = S3ReaderWriter(profile)
  51. # Write text data to S3
  52. text_data = "This is some text data"
  53. s3_reader_writer.write(data=text_data, s3_path = "s3://bucket_name/ebook/test/test.json", mode='text')
  54. # Read text data from S3
  55. text_data_read = s3_reader_writer.read(s3_path = "s3://bucket_name/ebook/test/test.json", mode='text')
  56. logger.info(f"Read text data from S3: {text_data_read}")
  57. # Write binary data to S3
  58. binary_data = b"This is some binary data"
  59. s3_reader_writer.write(data=text_data, s3_path = "s3://bucket_name/ebook/test/test2.json", mode='binary')
  60. # Read binary data from S3
  61. binary_data_read = s3_reader_writer.read(s3_path = "s3://bucket_name/ebook/test/test2.json", mode='binary')
  62. logger.info(f"Read binary data from S3: {binary_data_read}")