github.com/apache/beam/sdks/v2@v2.48.2/python/apache_beam/io/gcp/gcsio_overrides.py (about) 1 # 2 # Licensed to the Apache Software Foundation (ASF) under one or more 3 # contributor license agreements. See the NOTICE file distributed with 4 # this work for additional information regarding copyright ownership. 5 # The ASF licenses this file to You under the Apache License, Version 2.0 6 # (the "License"); you may not use this file except in compliance with 7 # the License. You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 # 17 18 # pytype: skip-file 19 20 import logging 21 import math 22 import time 23 24 from apache_beam.metrics.metric import Metrics 25 from apitools.base.py import exceptions 26 from apitools.base.py import http_wrapper 27 from apitools.base.py import util 28 29 _LOGGER = logging.getLogger(__name__) 30 31 32 class GcsIOOverrides(object): 33 """Functions for overriding Google Cloud Storage I/O client.""" 34 35 _THROTTLED_SECS = Metrics.counter('StorageV1', "cumulativeThrottlingSeconds") 36 37 @classmethod 38 def retry_func(cls, retry_args): 39 # handling GCS download throttling errors (BEAM-7424) 40 if (isinstance(retry_args.exc, exceptions.BadStatusCodeError) and 41 retry_args.exc.status_code == http_wrapper.TOO_MANY_REQUESTS): 42 _LOGGER.debug( 43 'Caught GCS quota error (%s), retrying.', retry_args.exc.status_code) 44 else: 45 return http_wrapper.HandleExceptionsAndRebuildHttpConnections(retry_args) 46 47 http_wrapper.RebuildHttpConnections(retry_args.http) 48 _LOGGER.debug( 49 'Retrying request to url %s after exception %s', 50 retry_args.http_request.url, 51 retry_args.exc) 52 sleep_seconds = util.CalculateWaitForRetry( 53 retry_args.num_retries, max_wait=retry_args.max_retry_wait) 54 cls._THROTTLED_SECS.inc(math.ceil(sleep_seconds)) 55 time.sleep(sleep_seconds)