python-boto3/tests/integration/test_s3.py
2021-11-03 10:27:47 -07:00

703 lines
26 KiB
Python

# Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# https://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import os
import threading
import math
import tempfile
import shutil
import hashlib
import string
import datetime
import logging
from tests import unittest, unique_id
from botocore.compat import six
from botocore.client import Config
import boto3.session
import boto3.s3.transfer
urlopen = six.moves.urllib.request.urlopen
LOG = logging.getLogger('boto3.tests.integration')
def assert_files_equal(first, second):
if os.path.getsize(first) != os.path.getsize(second):
raise AssertionError("Files are not equal: %s, %s" % (first, second))
first_md5 = md5_checksum(first)
second_md5 = md5_checksum(second)
if first_md5 != second_md5:
raise AssertionError(
"Files are not equal: %s(md5=%s) != %s(md5=%s)" % (
first, first_md5, second, second_md5))
def md5_checksum(filename):
checksum = hashlib.md5()
with open(filename, 'rb') as f:
for chunk in iter(lambda: f.read(8192), b''):
checksum.update(chunk)
return checksum.hexdigest()
def random_bucket_name(prefix='boto3-transfer', num_chars=10):
base = string.ascii_lowercase + string.digits
random_bytes = bytearray(os.urandom(num_chars))
return prefix + ''.join([base[b % len(base)] for b in random_bytes])
_SHARED_BUCKET = random_bucket_name()
_DEFAULT_REGION = 'us-west-2'
def setup_module():
s3 = boto3.client('s3')
waiter = s3.get_waiter('bucket_exists')
params = {
'Bucket': _SHARED_BUCKET,
'CreateBucketConfiguration': {
'LocationConstraint': _DEFAULT_REGION,
}
}
try:
s3.create_bucket(**params)
except Exception as e:
# A create_bucket can fail for a number of reasons.
# We're going to defer to the waiter below to make the
# final call as to whether or not the bucket exists.
LOG.debug("create_bucket() raised an exception: %s", e, exc_info=True)
waiter.wait(Bucket=_SHARED_BUCKET)
def clear_out_bucket(bucket, region, delete_bucket=False):
s3 = boto3.client('s3', region_name=region)
page = s3.get_paginator('list_objects')
# Use pages paired with batch delete_objects().
for page in page.paginate(Bucket=bucket):
keys = [{'Key': obj['Key']} for obj in page.get('Contents', [])]
if keys:
s3.delete_objects(Bucket=bucket, Delete={'Objects': keys})
if delete_bucket:
try:
s3.delete_bucket(Bucket=bucket)
except Exception as e:
# We can sometimes get exceptions when trying to
# delete a bucket. We'll let the waiter make
# the final call as to whether the bucket was able
# to be deleted.
LOG.debug("delete_bucket() raised an exception: %s",
e, exc_info=True)
waiter = s3.get_waiter('bucket_not_exists')
waiter.wait(Bucket=bucket)
def teardown_module():
clear_out_bucket(_SHARED_BUCKET, _DEFAULT_REGION, delete_bucket=True)
class FileCreator(object):
def __init__(self):
self.rootdir = tempfile.mkdtemp()
def remove_all(self):
shutil.rmtree(self.rootdir)
def create_file(self, filename, contents, mode='w'):
"""Creates a file in a tmpdir
``filename`` should be a relative path, e.g. "foo/bar/baz.txt"
It will be translated into a full path in a tmp dir.
``mode`` is the mode the file should be opened either as ``w`` or
`wb``.
Returns the full path to the file.
"""
full_path = os.path.join(self.rootdir, filename)
if not os.path.isdir(os.path.dirname(full_path)):
os.makedirs(os.path.dirname(full_path))
with open(full_path, mode) as f:
f.write(contents)
return full_path
def create_file_with_size(self, filename, filesize):
filename = self.create_file(filename, contents='')
chunksize = 8192
with open(filename, 'wb') as f:
for i in range(int(math.ceil(filesize / float(chunksize)))):
f.write(b'a' * chunksize)
return filename
def append_file(self, filename, contents):
"""Append contents to a file
``filename`` should be a relative path, e.g. "foo/bar/baz.txt"
It will be translated into a full path in a tmp dir.
Returns the full path to the file.
"""
full_path = os.path.join(self.rootdir, filename)
if not os.path.isdir(os.path.dirname(full_path)):
os.makedirs(os.path.dirname(full_path))
with open(full_path, 'a') as f:
f.write(contents)
return full_path
def full_path(self, filename):
"""Translate relative path to full path in temp dir.
f.full_path('foo/bar.txt') -> /tmp/asdfasd/foo/bar.txt
"""
return os.path.join(self.rootdir, filename)
class TestS3Resource(unittest.TestCase):
def setUp(self):
self.region = _DEFAULT_REGION
self.bucket_name = _SHARED_BUCKET
clear_out_bucket(self.bucket_name, self.region)
self.session = boto3.session.Session(region_name=self.region)
self.s3 = self.session.resource('s3')
self.bucket = self.s3.Bucket(self.bucket_name)
def create_bucket_resource(self, bucket_name=None, region=None):
if bucket_name is None:
bucket_name = random_bucket_name()
if region is None:
region = self.region
kwargs = {'Bucket': bucket_name}
if region != 'us-east-1':
kwargs['CreateBucketConfiguration'] = {
'LocationConstraint': region
}
bucket = self.s3.create_bucket(**kwargs)
self.addCleanup(bucket.delete)
for _ in range(3):
bucket.wait_until_exists()
return bucket
def test_s3(self):
client = self.s3.meta.client
# Create an object
obj = self.bucket.Object('test.txt')
obj.put(
Body='hello, world')
waiter = client.get_waiter('object_exists')
waiter.wait(Bucket=self.bucket_name, Key='test.txt')
self.addCleanup(obj.delete)
# List objects and make sure ours is present
self.assertIn('test.txt', [o.key for o in self.bucket.objects.all()])
# Lazy-loaded attribute
self.assertEqual(12, obj.content_length)
# Load a similar attribute from the collection response
self.assertEqual(12, list(self.bucket.objects.all())[0].size)
# Perform a resource action with a low-level response
self.assertEqual(b'hello, world',
obj.get()['Body'].read())
def test_s3_resource_waiter(self):
# Create a bucket
bucket_name = random_bucket_name()
bucket = self.create_bucket_resource(bucket_name)
# Wait till the bucket exists
bucket.wait_until_exists()
# Confirm the bucket exists by finding it in a list of all of our
# buckets
self.assertIn(bucket_name,
[b.name for b in self.s3.buckets.all()])
# Create an object
obj = bucket.Object('test.txt')
obj.put(
Body='hello, world')
self.addCleanup(obj.delete)
# Wait till the bucket exists
obj.wait_until_exists()
# List objects and make sure ours is present
self.assertIn('test.txt', [o.key for o in bucket.objects.all()])
def test_can_create_object_directly(self):
obj = self.s3.Object(self.bucket_name, 'test.txt')
self.assertEqual(obj.bucket_name, self.bucket_name)
self.assertEqual(obj.key, 'test.txt')
def test_s3_multipart(self):
# Create the multipart upload
mpu = self.bucket.Object('mp-test.txt').initiate_multipart_upload()
self.addCleanup(mpu.abort)
# Create and upload a part
part = mpu.Part(1)
response = part.upload(Body='hello, world!')
# Complete the upload, which requires info on all of the parts
part_info = {
'Parts': [
{
'PartNumber': 1,
'ETag': response['ETag']
}
]
}
mpu.complete(MultipartUpload=part_info)
self.addCleanup(self.bucket.Object('mp-test.txt').delete)
contents = self.bucket.Object('mp-test.txt').get()['Body'].read()
self.assertEqual(contents, b'hello, world!')
def test_s3_batch_delete(self):
bucket = self.create_bucket_resource()
bucket.Versioning().enable()
# Create several versions of an object
obj = self.bucket.Object('test.txt')
for i in range(10):
obj.put(Body="Version %s" % i)
# Delete all the versions of the object
bucket.object_versions.all().delete()
versions = list(bucket.object_versions.all())
self.assertEqual(len(versions), 0)
class TestS3Transfers(unittest.TestCase):
"""Tests for the high level boto3.s3.transfer module."""
def setUp(self):
self.region = _DEFAULT_REGION
self.bucket_name = _SHARED_BUCKET
clear_out_bucket(self.bucket_name, self.region)
self.session = boto3.session.Session(region_name=self.region)
self.client = self.session.client('s3', self.region)
self.files = FileCreator()
self.progress = 0
def tearDown(self):
self.files.remove_all()
def delete_object(self, key):
self.client.delete_object(
Bucket=self.bucket_name,
Key=key)
def object_exists(self, key):
waiter = self.client.get_waiter('object_exists')
waiter.wait(Bucket=self.bucket_name, Key=key)
return True
def wait_until_object_exists(self, key_name, extra_params=None,
min_successes=3):
waiter = self.client.get_waiter('object_exists')
params = {'Bucket': self.bucket_name, 'Key': key_name}
if extra_params is not None:
params.update(extra_params)
for _ in range(min_successes):
waiter.wait(**params)
def create_s3_transfer(self, config=None):
return boto3.s3.transfer.S3Transfer(self.client,
config=config)
def assert_has_public_read_acl(self, response):
grants = response['Grants']
public_read = [g['Grantee'].get('URI', '') for g in grants
if g['Permission'] == 'READ']
self.assertIn('groups/global/AllUsers', public_read[0])
def test_copy(self):
self.client.put_object(
Bucket=self.bucket_name, Key='foo', Body='beach')
self.addCleanup(self.delete_object, 'foo')
self.client.copy(
CopySource={'Bucket': self.bucket_name, 'Key': 'foo'},
Bucket=self.bucket_name, Key='bar'
)
self.addCleanup(self.delete_object, 'bar')
self.object_exists('bar')
def test_upload_fileobj(self):
fileobj = six.BytesIO(b'foo')
self.client.upload_fileobj(
Fileobj=fileobj, Bucket=self.bucket_name, Key='foo')
self.addCleanup(self.delete_object, 'foo')
self.object_exists('foo')
def test_upload_fileobj_progress(self):
# This has to be an integration test because the fileobj will never
# actually be read from when using the stubber and therefore the
# progress callbacks will not be invoked.
chunksize = 5 * (1024 ** 2)
config = boto3.s3.transfer.TransferConfig(
multipart_chunksize=chunksize,
multipart_threshold=chunksize,
max_concurrency=1
)
fileobj = six.BytesIO(b'0' * (chunksize * 3))
def progress_callback(amount):
self.progress += amount
self.client.upload_fileobj(
Fileobj=fileobj, Bucket=self.bucket_name, Key='foo',
Config=config, Callback=progress_callback)
self.addCleanup(self.delete_object, 'foo')
self.object_exists('foo')
self.assertEqual(self.progress, chunksize * 3)
def test_download_fileobj(self):
fileobj = six.BytesIO()
self.client.put_object(
Bucket=self.bucket_name, Key='foo', Body=b'beach')
self.addCleanup(self.delete_object, 'foo')
self.wait_until_object_exists('foo')
self.client.download_fileobj(
Bucket=self.bucket_name, Key='foo', Fileobj=fileobj)
self.assertEqual(fileobj.getvalue(), b'beach')
def test_upload_below_threshold(self):
config = boto3.s3.transfer.TransferConfig(
multipart_threshold=2 * 1024 * 1024)
transfer = self.create_s3_transfer(config)
filename = self.files.create_file_with_size(
'foo.txt', filesize=1024 * 1024)
transfer.upload_file(filename, self.bucket_name,
'foo.txt')
self.addCleanup(self.delete_object, 'foo.txt')
self.assertTrue(self.object_exists('foo.txt'))
def test_upload_above_threshold(self):
config = boto3.s3.transfer.TransferConfig(
multipart_threshold=2 * 1024 * 1024)
transfer = self.create_s3_transfer(config)
filename = self.files.create_file_with_size(
'20mb.txt', filesize=20 * 1024 * 1024)
transfer.upload_file(filename, self.bucket_name,
'20mb.txt')
self.addCleanup(self.delete_object, '20mb.txt')
self.assertTrue(self.object_exists('20mb.txt'))
def test_upload_file_above_threshold_with_acl(self):
config = boto3.s3.transfer.TransferConfig(
multipart_threshold=5 * 1024 * 1024)
transfer = self.create_s3_transfer(config)
filename = self.files.create_file_with_size(
'6mb.txt', filesize=6 * 1024 * 1024)
extra_args = {'ACL': 'public-read'}
transfer.upload_file(filename, self.bucket_name,
'6mb.txt', extra_args=extra_args)
self.addCleanup(self.delete_object, '6mb.txt')
self.assertTrue(self.object_exists('6mb.txt'))
response = self.client.get_object_acl(
Bucket=self.bucket_name, Key='6mb.txt')
self.assert_has_public_read_acl(response)
def test_upload_file_above_threshold_with_ssec(self):
key_bytes = os.urandom(32)
extra_args = {
'SSECustomerKey': key_bytes,
'SSECustomerAlgorithm': 'AES256',
}
config = boto3.s3.transfer.TransferConfig(
multipart_threshold=5 * 1024 * 1024)
transfer = self.create_s3_transfer(config)
filename = self.files.create_file_with_size(
'6mb.txt', filesize=6 * 1024 * 1024)
transfer.upload_file(filename, self.bucket_name,
'6mb.txt', extra_args=extra_args)
self.addCleanup(self.delete_object, '6mb.txt')
# A head object will fail if it has a customer key
# associated with it and it's not provided in the HeadObject
# request so we can use this to verify our functionality.
response = self.client.head_object(
Bucket=self.bucket_name,
Key='6mb.txt', **extra_args)
self.assertEqual(response['SSECustomerAlgorithm'], 'AES256')
def test_progress_callback_on_upload(self):
self.amount_seen = 0
lock = threading.Lock()
def progress_callback(amount):
with lock:
self.amount_seen += amount
transfer = self.create_s3_transfer()
filename = self.files.create_file_with_size(
'20mb.txt', filesize=20 * 1024 * 1024)
transfer.upload_file(filename, self.bucket_name,
'20mb.txt', callback=progress_callback)
self.addCleanup(self.delete_object, '20mb.txt')
# The callback should have been called enough times such that
# the total amount of bytes we've seen (via the "amount"
# arg to the callback function) should be the size
# of the file we uploaded.
self.assertEqual(self.amount_seen, 20 * 1024 * 1024)
def test_callback_called_once_with_sigv4(self):
# Verify #98, where the callback was being invoked
# twice when using signature version 4.
self.amount_seen = 0
lock = threading.Lock()
def progress_callback(amount):
with lock:
self.amount_seen += amount
client = self.session.client(
's3', self.region,
config=Config(signature_version='s3v4'))
transfer = boto3.s3.transfer.S3Transfer(client)
filename = self.files.create_file_with_size(
'10mb.txt', filesize=10 * 1024 * 1024)
transfer.upload_file(filename, self.bucket_name,
'10mb.txt', callback=progress_callback)
self.addCleanup(self.delete_object, '10mb.txt')
self.assertEqual(self.amount_seen, 10 * 1024 * 1024)
def test_can_send_extra_params_on_upload(self):
transfer = self.create_s3_transfer()
filename = self.files.create_file_with_size('foo.txt', filesize=1024)
transfer.upload_file(filename, self.bucket_name,
'foo.txt', extra_args={'ACL': 'public-read'})
self.addCleanup(self.delete_object, 'foo.txt')
response = self.client.get_object_acl(
Bucket=self.bucket_name, Key='foo.txt')
self.assert_has_public_read_acl(response)
def test_can_configure_threshold(self):
config = boto3.s3.transfer.TransferConfig(
multipart_threshold=6 * 1024 * 1024
)
transfer = self.create_s3_transfer(config)
filename = self.files.create_file_with_size(
'foo.txt', filesize=8 * 1024 * 1024)
transfer.upload_file(filename, self.bucket_name,
'foo.txt')
self.addCleanup(self.delete_object, 'foo.txt')
self.assertTrue(self.object_exists('foo.txt'))
def test_can_send_extra_params_on_download(self):
# We're picking the customer provided sse feature
# of S3 to test the extra_args functionality of
# S3.
key_bytes = os.urandom(32)
extra_args = {
'SSECustomerKey': key_bytes,
'SSECustomerAlgorithm': 'AES256',
}
self.client.put_object(Bucket=self.bucket_name,
Key='foo.txt',
Body=b'hello world',
**extra_args)
self.addCleanup(self.delete_object, 'foo.txt')
transfer = self.create_s3_transfer()
download_path = os.path.join(self.files.rootdir, 'downloaded.txt')
self.wait_until_object_exists('foo.txt', extra_params=extra_args)
transfer.download_file(self.bucket_name, 'foo.txt',
download_path, extra_args=extra_args)
with open(download_path, 'rb') as f:
self.assertEqual(f.read(), b'hello world')
def test_progress_callback_on_download(self):
self.amount_seen = 0
lock = threading.Lock()
def progress_callback(amount):
with lock:
self.amount_seen += amount
transfer = self.create_s3_transfer()
filename = self.files.create_file_with_size(
'20mb.txt', filesize=20 * 1024 * 1024)
with open(filename, 'rb') as f:
self.client.put_object(Bucket=self.bucket_name,
Key='20mb.txt', Body=f)
self.addCleanup(self.delete_object, '20mb.txt')
download_path = os.path.join(self.files.rootdir, 'downloaded.txt')
transfer.download_file(self.bucket_name, '20mb.txt',
download_path, callback=progress_callback)
self.assertEqual(self.amount_seen, 20 * 1024 * 1024)
def test_download_below_threshold(self):
transfer = self.create_s3_transfer()
filename = self.files.create_file_with_size(
'foo.txt', filesize=1024 * 1024)
with open(filename, 'rb') as f:
self.client.put_object(Bucket=self.bucket_name,
Key='foo.txt',
Body=f)
self.addCleanup(self.delete_object, 'foo.txt')
download_path = os.path.join(self.files.rootdir, 'downloaded.txt')
self.wait_until_object_exists('foo.txt')
transfer.download_file(self.bucket_name, 'foo.txt',
download_path)
assert_files_equal(filename, download_path)
def test_download_above_threshold(self):
transfer = self.create_s3_transfer()
filename = self.files.create_file_with_size(
'foo.txt', filesize=20 * 1024 * 1024)
with open(filename, 'rb') as f:
self.client.put_object(Bucket=self.bucket_name,
Key='foo.txt',
Body=f)
self.addCleanup(self.delete_object, 'foo.txt')
download_path = os.path.join(self.files.rootdir, 'downloaded.txt')
self.wait_until_object_exists('foo.txt')
transfer.download_file(self.bucket_name, 'foo.txt',
download_path)
assert_files_equal(filename, download_path)
def test_download_file_with_directory_not_exist(self):
transfer = self.create_s3_transfer()
self.client.put_object(
Bucket=self.bucket_name,
Key='foo.txt',
Body=b'foo'
)
self.addCleanup(self.delete_object, 'foo.txt')
download_path = os.path.join(self.files.rootdir, 'a', 'b', 'c',
'downloaded.txt')
self.wait_until_object_exists('foo.txt')
with self.assertRaises(IOError):
transfer.download_file(self.bucket_name, 'foo.txt', download_path)
def test_download_large_file_directory_not_exist(self):
transfer = self.create_s3_transfer()
filename = self.files.create_file_with_size(
'foo.txt', filesize=20 * 1024 * 1024)
with open(filename, 'rb') as f:
self.client.put_object(Bucket=self.bucket_name,
Key='foo.txt',
Body=f)
self.addCleanup(self.delete_object, 'foo.txt')
download_path = os.path.join(self.files.rootdir, 'a', 'b', 'c',
'downloaded.txt')
self.wait_until_object_exists('foo.txt')
with self.assertRaises(IOError):
transfer.download_file(self.bucket_name, 'foo.txt', download_path)
def test_transfer_methods_through_client(self):
# This is really just a sanity check to ensure that the interface
# from the clients work. We're not exhaustively testing through
# this client interface.
filename = self.files.create_file_with_size(
'foo.txt', filesize=1024 * 1024)
self.client.upload_file(Filename=filename,
Bucket=self.bucket_name,
Key='foo.txt')
self.addCleanup(self.delete_object, 'foo.txt')
download_path = os.path.join(self.files.rootdir, 'downloaded.txt')
self.wait_until_object_exists('foo.txt')
self.client.download_file(Bucket=self.bucket_name,
Key='foo.txt',
Filename=download_path)
assert_files_equal(filename, download_path)
def test_transfer_methods_do_not_use_threads(self):
# This is just a smoke test to make sure that
# setting use_threads to False has no issues transferring files as
# the non-threaded implementation is ran under the same integration
# and functional tests in s3transfer as the normal threaded
# implementation
#
# The methods used are arbitrary other than one of the methods
# use ``boto3.s3.transfer.S3Transfer`` and the other should be
# using ``s3transfer.manager.TransferManager`` directly
content = b'my content'
filename = self.files.create_file('myfile', content.decode('utf-8'))
key = 'foo'
config = boto3.s3.transfer.TransferConfig(use_threads=False)
self.client.upload_file(
Bucket=self.bucket_name, Key=key, Filename=filename,
Config=config)
self.addCleanup(self.delete_object, key)
self.assertTrue(self.object_exists(key))
fileobj = six.BytesIO()
self.client.download_fileobj(
Bucket=self.bucket_name, Key='foo', Fileobj=fileobj, Config=config)
self.assertEqual(fileobj.getvalue(), content)
def test_transfer_methods_through_bucket(self):
# This is just a sanity check to ensure that the bucket interface work.
key = 'bucket.txt'
bucket = self.session.resource('s3').Bucket(self.bucket_name)
filename = self.files.create_file_with_size(key, 1024 * 1024)
bucket.upload_file(Filename=filename, Key=key)
self.addCleanup(self.delete_object, key)
download_path = os.path.join(self.files.rootdir, unique_id('foo'))
bucket.download_file(Key=key, Filename=download_path)
assert_files_equal(filename, download_path)
def test_transfer_methods_through_object(self):
# This is just a sanity check to ensure that the object interface work.
key = 'object.txt'
obj = self.session.resource('s3').Object(self.bucket_name, key)
filename = self.files.create_file_with_size(key, 1024 * 1024)
obj.upload_file(Filename=filename)
self.addCleanup(self.delete_object, key)
download_path = os.path.join(self.files.rootdir, unique_id('foo'))
obj.download_file(Filename=download_path)
assert_files_equal(filename, download_path)
class TestCustomS3BucketLoad(unittest.TestCase):
def setUp(self):
self.region = _DEFAULT_REGION
self.bucket_name = _SHARED_BUCKET
clear_out_bucket(self.bucket_name, self.region)
self.session = boto3.session.Session(region_name=self.region)
self.s3 = self.session.resource('s3')
def test_can_access_buckets_creation_date(self):
bucket = self.s3.Bucket(self.bucket_name)
self.assertIsInstance(bucket.creation_date, datetime.datetime)