test_s3.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import json
  2. import os
  3. import pytest
  4. from magic_pdf.data.io.s3 import S3Reader, S3Writer
  5. @pytest.mark.skipif(
  6. os.getenv('S3_ACCESS_KEY', None) is None, reason='s3 config not found'
  7. )
  8. def test_s3_reader():
  9. """test s3 reader.
  10. must config s3 config in the environment export S3_BUCKET=xxx export S3_ACCESS_KEY=xxx export S3_SECRET_KEY=xxx
  11. export S3_ENDPOINT=xxx
  12. """
  13. bucket = os.getenv('S3_BUCKET', '')
  14. ak = os.getenv('S3_ACCESS_KEY', '')
  15. sk = os.getenv('S3_SECRET_KEY', '')
  16. endpoint_url = os.getenv('S3_ENDPOINT', '')
  17. reader = S3Reader(bucket=bucket, ak=ak, sk=sk, endpoint_url=endpoint_url)
  18. bits = reader.read(
  19. 'meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl'
  20. )
  21. assert len(bits) > 0
  22. bits = reader.read_at(
  23. 'meta-index/scihub/v001/scihub/part-66210c190659-000026.jsonl',
  24. 566,
  25. 713,
  26. )
  27. assert len(json.loads(bits)) > 0
  28. @pytest.mark.skipif(
  29. os.getenv('S3_ACCESS_KEY', None) is None, reason='s3 config not found'
  30. )
  31. def test_s3_writer():
  32. """test s3 reader.
  33. must config s3 config in the environment export S3_BUCKET=xxx export S3_ACCESS_KEY=xxx export S3_SECRET_KEY=xxx
  34. export S3_ENDPOINT=xxx
  35. """
  36. bucket = os.getenv('S3_BUCKET', '')
  37. ak = os.getenv('S3_ACCESS_KEY', '')
  38. sk = os.getenv('S3_SECRET_KEY', '')
  39. endpoint_url = os.getenv('S3_ENDPOINT', '')
  40. writer = S3Writer(bucket=bucket, ak=ak, sk=sk, endpoint_url=endpoint_url)
  41. test_fn = 'unittest/io/test.jsonl'
  42. writer.write(test_fn, '123'.encode())
  43. reader = S3Reader(bucket=bucket, ak=ak, sk=sk, endpoint_url=endpoint_url)
  44. bits = reader.read(test_fn)
  45. assert bits.decode() == '123'