path_utils.py 1.0 KB

1234567891011121314151617181920212223242526272829303132
  1. def remove_non_official_s3_args(s3path):
  2. """
  3. example: s3://abc/xxxx.json?bytes=0,81350 ==> s3://abc/xxxx.json
  4. """
  5. arr = s3path.split("?")
  6. return arr[0]
  7. def parse_s3path(s3path: str):
  8. # from s3pathlib import S3Path
  9. # p = S3Path(remove_non_official_s3_args(s3path))
  10. # return p.bucket, p.key
  11. s3path = remove_non_official_s3_args(s3path).strip()
  12. if s3path.startswith(('s3://', 's3a://')):
  13. prefix, path = s3path.split('://', 1)
  14. bucket_name, key = path.split('/', 1)
  15. return bucket_name, key
  16. elif s3path.startswith('/'):
  17. raise ValueError("The provided path starts with '/'. This does not conform to a valid S3 path format.")
  18. else:
  19. raise ValueError("Invalid S3 path format. Expected 's3://bucket-name/key' or 's3a://bucket-name/key'.")
  20. def parse_s3_range_params(s3path: str):
  21. """
  22. example: s3://abc/xxxx.json?bytes=0,81350 ==> [0, 81350]
  23. """
  24. arr = s3path.split("?bytes=")
  25. if len(arr) == 1:
  26. return None
  27. return arr[1].split(",")