path_utils.py 1.1 KB

123456789101112131415161718192021222324252627282930313233
  1. # Copyright (c) Opendatalab. All rights reserved.
  2. def remove_non_official_s3_args(s3path):
  3. """
  4. example: s3://abc/xxxx.json?bytes=0,81350 ==> s3://abc/xxxx.json
  5. """
  6. arr = s3path.split("?")
  7. return arr[0]
  8. def parse_s3path(s3path: str):
  9. # from s3pathlib import S3Path
  10. # p = S3Path(remove_non_official_s3_args(s3path))
  11. # return p.bucket, p.key
  12. s3path = remove_non_official_s3_args(s3path).strip()
  13. if s3path.startswith(('s3://', 's3a://')):
  14. prefix, path = s3path.split('://', 1)
  15. bucket_name, key = path.split('/', 1)
  16. return bucket_name, key
  17. elif s3path.startswith('/'):
  18. raise ValueError("The provided path starts with '/'. This does not conform to a valid S3 path format.")
  19. else:
  20. raise ValueError("Invalid S3 path format. Expected 's3://bucket-name/key' or 's3a://bucket-name/key'.")
  21. def parse_s3_range_params(s3path: str):
  22. """
  23. example: s3://abc/xxxx.json?bytes=0,81350 ==> [0, 81350]
  24. """
  25. arr = s3path.split("?bytes=")
  26. if len(arr) == 1:
  27. return None
  28. return arr[1].split(",")