python-botocore/botocore/handlers.py

# Copyright 2012-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.

"""Builtin event handlers.

This module contains builtin handlers for events emitted by botocore.
"""

import base64
import hashlib
import logging
import re
import xml.etree.cElementTree

from botocore.compat import urlsplit, urlunsplit, unquote, json, quote, six
from botocore import retryhandler
from botocore import utils
from botocore import translate
import botocore.auth


logger = logging.getLogger(__name__)
LABEL_RE = re.compile('[a-z0-9][a-z0-9\-]*[a-z0-9]')
RESTRICTED_REGIONS = [
    'us-gov-west-1',
    'fips-us-gov-west-1',
]
REGISTER_FIRST = object()
REGISTER_LAST = object()


def check_for_200_error(response, **kwargs):
    # From: http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectCOPY.html
    # There are two opportunities for a copy request to return an error. One
    # can occur when Amazon S3 receives the copy request and the other can
    # occur while Amazon S3 is copying the files. If the error occurs before
    # the copy operation starts, you receive a standard Amazon S3 error. If the
    # error occurs during the copy operation, the error response is embedded in
    # the 200 OK response. This means that a 200 OK response can contain either
    # a success or an error. Make sure to design your application to parse the
    # contents of the response and handle it appropriately.
    #
    # So this handler checks for this case.  Even though the server sends a
    # 200 response, conceptually this should be handled exactly like a
    # 500 response (with respect to raising exceptions, retries, etc.)
    # We're connected *before* all the other retry logic handlers, so as long
    # as we switch the error code to 500, we'll retry the error as expected.
    if response is None:
        # A None response can happen if an exception is raised while
        # trying to retrieve the response.  See Endpoint._get_response().
        return
    http_response, parsed = response
    if _looks_like_special_case_error(http_response):
        logger.debug("Error found for response with 200 status code, "
                        "errors: %s, changing status code to "
                        "500.", parsed)
        http_response.status_code = 500


def _looks_like_special_case_error(http_response):
    if http_response.status_code == 200:
        parser = xml.etree.cElementTree.XMLParser(
            target=xml.etree.cElementTree.TreeBuilder(),
            encoding='utf-8')
        parser.feed(http_response.content)
        root = parser.close()
        if root.tag == 'Error':
            return True
    return False


def decode_console_output(parsed, **kwargs):
    try:
        value = base64.b64decode(six.b(parsed['Output'])).decode('utf-8')
        parsed['Output'] = value
    except (ValueError, TypeError, AttributeError):
        logger.debug('Error decoding base64', exc_info=True)


def decode_quoted_jsondoc(value):
    try:
        value = json.loads(unquote(value))
    except (ValueError, TypeError):
        logger.debug('Error loading quoted JSON', exc_info=True)
    return value


def json_decode_template_body(parsed, **kwargs):
    if 'TemplateBody' in parsed:
        try:
            value = json.loads(parsed['TemplateBody'])
            parsed['TemplateBody'] = value
        except (ValueError, TypeError):
            logger.debug('error loading JSON', exc_info=True)


def calculate_md5(params, **kwargs):
    request_dict = params
    if request_dict['body'] and not 'Content-MD5' in params['headers']:
        md5 = hashlib.md5()
        md5.update(six.b(params['body']))
        value = base64.b64encode(md5.digest()).decode('utf-8')
        params['headers']['Content-MD5'] = value


def sse_md5(params, **kwargs):
    """
    S3 server-side encryption requires the encryption key to be sent to the
    server base64 encoded, as well as a base64-encoded MD5 hash of the
    encryption key. This handler does both if the MD5 has not been set by
    the caller.
    """
    if not _needs_s3_sse_customization(params):
        return
    key_as_bytes = params['SSECustomerKey']
    if isinstance(key_as_bytes, six.text_type):
        key_as_bytes = key_as_bytes.encode('utf-8')
    key_md5_str = base64.b64encode(
        hashlib.md5(key_as_bytes).digest()).decode('utf-8')
    key_b64_encoded = base64.b64encode(key_as_bytes).decode('utf-8')
    params['SSECustomerKey'] = key_b64_encoded
    params['SSECustomerKeyMD5'] = key_md5_str


def _needs_s3_sse_customization(params):
    return (params.get('SSECustomerKey') is not None and
            'SSECustomerKeyMD5' not in params)


def check_dns_name(bucket_name):
    """
    Check to see if the ``bucket_name`` complies with the
    restricted DNS naming conventions necessary to allow
    access via virtual-hosting style.

    Even though "." characters are perfectly valid in this DNS
    naming scheme, we are going to punt on any name containing a
    "." character because these will cause SSL cert validation
    problems if we try to use virtual-hosting style addressing.
    """
    if '.' in bucket_name:
        return False
    n = len(bucket_name)
    if n < 3 or n > 63:
        # Wrong length
        return False
    if n == 1:
        if not bucket_name.isalnum():
            return False
    match = LABEL_RE.match(bucket_name)
    if match is None or match.end() != len(bucket_name):
        return False
    return True


def fix_s3_host(event_name, endpoint, request, auth, **kwargs):
    """
    This handler looks at S3 requests just before they are signed.
    If there is a bucket name on the path (true for everything except
    ListAllBuckets) it checks to see if that bucket name conforms to
    the DNS naming conventions.  If it does, it alters the request to
    use ``virtual hosting`` style addressing rather than ``path-style``
    addressing.  This allows us to avoid 301 redirects for all
    bucket names that can be CNAME'd.
    """
    if request.auth_path is not None:
        # The auth_path has already been applied (this may be a
        # retried request).  We don't need to perform this
        # customization again.
        return
    elif _is_get_bucket_location_request(request):
        # For the GetBucketLocation response, we should not be using
        # the virtual host style addressing so we can avoid any sigv4
        # issues.
        logger.debug("Request is GetBucketLocation operation, not checking "
                     "for DNS compatibility.")
        return
    parts = urlsplit(request.url)
    request.auth_path = parts.path
    path_parts = parts.path.split('/')
    if isinstance(auth, botocore.auth.SigV4Auth):
        return
    if len(path_parts) > 1:
        bucket_name = path_parts[1]
        logger.debug('Checking for DNS compatible bucket for: %s',
                     request.url)
        if check_dns_name(bucket_name) and _allowed_region(endpoint.region_name):
            # If the operation is on a bucket, the auth_path must be
            # terminated with a '/' character.
            if len(path_parts) == 2:
                if request.auth_path[-1] != '/':
                    request.auth_path += '/'
            path_parts.remove(bucket_name)
            global_endpoint = 's3.amazonaws.com'
            host = bucket_name + '.' + global_endpoint
            new_tuple = (parts.scheme, host, '/'.join(path_parts),
                         parts.query, '')
            new_uri = urlunsplit(new_tuple)
            request.url = new_uri
            logger.debug('URI updated to: %s', new_uri)
        else:
            logger.debug('Not changing URI, bucket is not DNS compatible: %s',
                         bucket_name)


def _is_get_bucket_location_request(request):
    return request.url.endswith('?location')


def _allowed_region(region_name):
    return region_name not in RESTRICTED_REGIONS


def register_retries_for_service(service_data, session,
                                 service_name, **kwargs):
    loader = session.get_component('data_loader')
    endpoint_prefix = service_data.get('metadata', {}).get('endpointPrefix')
    if endpoint_prefix is None:
        logger.debug("Not registering retry handlers, could not endpoint "
                     "prefix from model for service %s", service_name)
        return
    config = _load_retry_config(loader, endpoint_prefix)
    if not config:
        return
    logger.debug("Registering retry handlers for service: %s", service_name)
    handler = retryhandler.create_retry_handler(
        config, endpoint_prefix)
    unique_id = 'retry-config-%s' % endpoint_prefix
    session.register('needs-retry.%s' % endpoint_prefix,
                     handler, unique_id=unique_id)
    _register_for_operations(config, session,
                             service_name=endpoint_prefix)


def _load_retry_config(loader, endpoint_prefix):
    original_config = loader.load_data('aws/_retry')
    retry_config = translate.build_retry_config(
        endpoint_prefix, original_config['retry'],
        original_config.get('definitions', {}))
    return retry_config


def _register_for_operations(config, session, service_name):
    # There's certainly a tradeoff for registering the retry config
    # for the operations when the service is created.  In practice,
    # there aren't a whole lot of per operation retry configs so
    # this is ok for now.
    for key in config:
        if key == '__default__':
            continue
        handler = retryhandler.create_retry_handler(config, key)
        unique_id = 'retry-config-%s-%s' % (service_name, key)
        session.register('needs-retry.%s.%s' % (service_name, key),
                         handler, unique_id=unique_id)


def signature_overrides(service_data, service_name, session, **kwargs):
    scoped_config = session.get_scoped_config()
    service_config = scoped_config.get(service_name)
    if service_config is None or not isinstance(service_config, dict):
        return
    signature_version_override = service_config.get('signature_version')
    if signature_version_override is not None:
        logger.debug("Switching signature version for service %s "
                     "to version %s based on config file override.",
                     service_name, signature_version_override)
        service_metadata = service_data['metadata']
        service_metadata['signatureVersion'] = signature_version_override


def add_expect_header(model, params, **kwargs):
    if model.http.get('method', '') not in ['PUT', 'POST']:
        return
    if 'body' in params:
        body = params['body']
        if hasattr(body, 'read'):
            # Any file like object will use an expect 100-continue
            # header regardless of size.
            logger.debug("Adding expect 100 continue header to request.")
            params['headers']['Expect'] = '100-continue'


def quote_source_header(params, **kwargs):
    if params['headers'] and 'x-amz-copy-source' in params['headers']:
        value = params['headers']['x-amz-copy-source']
        params['headers']['x-amz-copy-source'] = quote(
            value.encode('utf-8'), '/~')


def copy_snapshot_encrypted(operation, params, endpoint, **kwargs):
    # The presigned URL that facilities copying an encrypted snapshot.
    # If the user does not provide this value, we will automatically
    # calculate on behalf of the user and inject the PresignedUrl
    # into the requests.
    # The params sent in the event don't quite sync up 100% so we're
    # renaming them here until they can be updated in the event.
    request_dict = params
    params = request_dict['body']
    if 'PresignedUrl' in params:
        # If the customer provided this value, then there's nothing for
        # us to do.
        return
    params['DestinationRegion'] = endpoint.region_name
    # The request will be sent to the destination region, so we need
    # to create an endpoint to the source region and create a presigned
    # url based on the source endpoint.
    region = params['SourceRegion']
    source_endpoint = operation.service.get_endpoint(region)
    presigner = botocore.auth.SigV4QueryAuth(
        credentials=source_endpoint.auth.credentials,
        region_name=region,
        service_name='ec2',
        expires=60 * 60)
    signed_request = source_endpoint.create_request(request_dict, presigner)
    params['PresignedUrl'] = signed_request.url


def json_decode_policies(parsed, model, **kwargs):
    # Any time an IAM operation returns a policy document
    # it is a string that is json that has been urlencoded,
    # i.e urlencode(json.dumps(policy_document)).
    # To give users something more useful, we will urldecode
    # this value and json.loads() the result so that they have
    # the policy document as a dictionary.
    output_shape = model.output_shape
    if output_shape is not None:
        _decode_policy_types(parsed, model.output_shape)


def _decode_policy_types(parsed, shape):
    # IAM consistently uses the policyDocumentType shape to indicate
    # strings that have policy documents.
    shape_name = 'policyDocumentType'
    if shape.type_name == 'structure':
        for member_name, member_shape in shape.members.items():
            if member_shape.type_name == 'string' and \
                    member_shape.name == shape_name:
                parsed[member_name] = decode_quoted_jsondoc(parsed[member_name])
            elif member_name in parsed:
                _decode_policy_types(parsed[member_name], member_shape)
    if shape.type_name == 'list':
        shape_member = shape.member
        for item in parsed:
            _decode_policy_types(item, shape_member)


def parse_get_bucket_location(parsed, http_response, **kwargs):
    # s3.GetBucketLocation cannot be modeled properly.  To
    # account for this we just manually parse the XML document.
    # The "parsed" passed in only has the ResponseMetadata
    # filled out.  This handler will fill in the LocationConstraint
    # value.
    response_body = http_response.content
    parser = xml.etree.cElementTree.XMLParser(
        target=xml.etree.cElementTree.TreeBuilder(),
        encoding='utf-8')
    parser.feed(response_body)
    root = parser.close()
    region = root.text
    parsed['LocationConstraint'] = region


def base64_encode_user_data(params, **kwargs):
    if 'UserData' in params:
        if isinstance(params['UserData'], six.text_type):
            # Encode it to bytes if it is text.
            params['UserData'] = params['UserData'].encode('utf-8')
        params['UserData'] = base64.b64encode(
            params['UserData']).decode('utf-8')


def fix_route53_ids(params, model, **kwargs):
    """
    Check for and split apart Route53 resource IDs, setting
    only the last piece. This allows the output of one operation
    (e.g. ``'foo/1234'``) to be used as input in another
    operation (e.g. it expects just ``'1234'``).
    """
    input_shape = model.input_shape
    if not input_shape or not hasattr(input_shape, 'members'):
        return

    members = [name for (name, shape) in input_shape.members.items()
               if shape.name in ['ResourceId', 'DelegationSetId']]

    for name in members:
        if name in params:
            orig_value = params[name]
            params[name] = orig_value.split('/')[-1]
            logger.debug('%s %s -> %s', name, orig_value, params[name])


def inject_account_id(params, **kwargs):
    if params.get('accountId') is None:
        # Glacier requires accountId, but allows you
        # to specify '-' for the current owners account.
        # We add this default value if the user does not
        # provide the accountId as a convenience.
        params['accountId'] = '-'


def add_glacier_version(model, params, **kwargs):
    request_dict = params
    request_dict['headers']['x-amz-glacier-version'] = \
            model.metadata['apiVersion']


def add_glacier_checksums(params, **kwargs):
    """Add glacier checksums to the http request.

    This will add two headers to the http request:

        * x-amz-content-sha256
        * x-amz-sha256-tree-hash

    These values will only be added if they are not present
    in the HTTP request.

    """
    request_dict = params
    headers = request_dict['headers']
    body = request_dict['body']
    if isinstance(body, six.binary_type):
        # If the user provided a bytes type instead of a file
        # like object, we're temporarily create a BytesIO object
        # so we can use the util functions to calculate the
        # checksums which assume file like objects.  Note that
        # we're not actually changing the body in the request_dict.
        body = six.BytesIO(body)
    starting_position = body.tell()
    if 'x-amz-content-sha256' not in headers:
        headers['x-amz-content-sha256'] = utils.calculate_sha256(
            body, as_hex=True)
    body.seek(starting_position)
    if 'x-amz-sha256-tree-hash' not in headers:
        headers['x-amz-sha256-tree-hash'] = utils.calculate_tree_hash(body)
    body.seek(starting_position)


# This is a list of (event_name, handler).
# When a Session is created, everything in this list will be
# automatically registered with that Session.

BUILTIN_HANDLERS = [
    ('after-call.iam', json_decode_policies),

    ('after-call.ec2.GetConsoleOutput', decode_console_output),
    ('after-call.cloudformation.GetTemplate', json_decode_template_body),
    ('after-call.s3.GetBucketLocation', parse_get_bucket_location),

    ('before-call.s3.PutBucketTagging', calculate_md5),
    ('before-call.s3.PutBucketLifecycle', calculate_md5),
    ('before-call.s3.PutBucketCors', calculate_md5),
    ('before-call.s3.DeleteObjects', calculate_md5),
    ('before-call.s3.UploadPartCopy', quote_source_header),
    ('before-call.s3.CopyObject', quote_source_header),
    ('before-call.s3', add_expect_header),
    ('before-call.glacier', add_glacier_version),
    ('before-call.glacier.UploadArchive', add_glacier_checksums),
    ('before-call.glacier.UploadMultipartPart', add_glacier_checksums),
    ('before-call.ec2.CopySnapshot', copy_snapshot_encrypted),
    ('before-auth.s3', fix_s3_host),
    ('needs-retry.s3.UploadPartCopy', check_for_200_error, REGISTER_FIRST),
    ('needs-retry.s3.CopyObject', check_for_200_error, REGISTER_FIRST),
    ('needs-retry.s3.CompleteMultipartUpload', check_for_200_error,
     REGISTER_FIRST),
    ('service-data-loaded', register_retries_for_service),
    ('service-data-loaded', signature_overrides),
    ('before-parameter-build.s3.HeadObject', sse_md5),
    ('before-parameter-build.s3.GetObject', sse_md5),
    ('before-parameter-build.s3.PutObject', sse_md5),
    ('before-parameter-build.s3.CopyObject', sse_md5),
    ('before-parameter-build.s3.CreateMultipartUpload', sse_md5),
    ('before-parameter-build.s3.UploadPart', sse_md5),
    ('before-parameter-build.s3.UploadPartCopy', sse_md5),
    ('before-parameter-build.ec2.RunInstances', base64_encode_user_data),
    ('before-parameter-build.autoscaling.CreateLaunchConfiguration',
     base64_encode_user_data),
    ('before-parameter-build.route53', fix_route53_ids),
    ('before-parameter-build.glacier', inject_account_id),
]