github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/gubernator/third_party/cloudstorage/api_utils.py (about)

     1  # Copyright 2013 Google Inc. All Rights Reserved.
     2  #
     3  # Licensed under the Apache License, Version 2.0 (the "License");
     4  # you may not use this file except in compliance with the License.
     5  # You may obtain a copy of the License at
     6  #
     7  #    http://www.apache.org/licenses/LICENSE-2.0
     8  #
     9  # Unless required by applicable law or agreed to in writing,
    10  # software distributed under the License is distributed on an
    11  # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
    12  # either express or implied. See the License for the specific
    13  # language governing permissions and limitations under the License.
    14  
    15  """Util functions and classes for cloudstorage_api."""
    16  
    17  
    18  
    19  __all__ = ['set_default_retry_params',
    20             'RetryParams',
    21            ]
    22  
    23  import copy
    24  import httplib
    25  import logging
    26  import math
    27  import os
    28  import threading
    29  import time
    30  import urllib
    31  
    32  
    33  try:
    34    from google.appengine.api import app_identity
    35    from google.appengine.api import urlfetch
    36    from google.appengine.api import urlfetch_errors
    37    from google.appengine.datastore import datastore_rpc
    38    from google.appengine.ext import ndb
    39    from google.appengine.ext.ndb import eventloop
    40    from google.appengine.ext.ndb import tasklets
    41    from google.appengine.ext.ndb import utils
    42    from google.appengine import runtime
    43    from google.appengine.runtime import apiproxy_errors
    44  except ImportError:
    45    from google.appengine.api import app_identity
    46    from google.appengine.api import urlfetch
    47    from google.appengine.api import urlfetch_errors
    48    from google.appengine.datastore import datastore_rpc
    49    from google.appengine import runtime
    50    from google.appengine.runtime import apiproxy_errors
    51    from google.appengine.ext import ndb
    52    from google.appengine.ext.ndb import eventloop
    53    from google.appengine.ext.ndb import tasklets
    54    from google.appengine.ext.ndb import utils
    55  
    56  
    57  _RETRIABLE_EXCEPTIONS = (urlfetch.DownloadError,
    58                           urlfetch_errors.InternalTransientError,
    59                           apiproxy_errors.Error,
    60                           app_identity.InternalError,
    61                           app_identity.BackendDeadlineExceeded)
    62  
    63  _thread_local_settings = threading.local()
    64  _thread_local_settings.default_retry_params = None
    65  
    66  
    67  def set_default_retry_params(retry_params):
    68    """Set a default RetryParams for current thread current request."""
    69    _thread_local_settings.default_retry_params = copy.copy(retry_params)
    70  
    71  
    72  def _get_default_retry_params():
    73    """Get default RetryParams for current request and current thread.
    74  
    75    Returns:
    76      A new instance of the default RetryParams.
    77    """
    78    default = getattr(_thread_local_settings, 'default_retry_params', None)
    79    if default is None or not default.belong_to_current_request():
    80      return RetryParams()
    81    else:
    82      return copy.copy(default)
    83  
    84  
    85  def _quote_filename(filename):
    86    """Quotes filename to use as a valid URI path.
    87  
    88    Args:
    89      filename: user provided filename. /bucket/filename.
    90  
    91    Returns:
    92      The filename properly quoted to use as URI's path component.
    93    """
    94    return urllib.quote(filename)
    95  
    96  
    97  def _unquote_filename(filename):
    98    """Unquotes a valid URI path back to its filename.
    99  
   100    This is the opposite of _quote_filename.
   101  
   102    Args:
   103      filename: a quoted filename. /bucket/some%20filename.
   104  
   105    Returns:
   106      The filename unquoted.
   107    """
   108    return urllib.unquote(filename)
   109  
   110  
   111  def _should_retry(resp):
   112    """Given a urlfetch response, decide whether to retry that request."""
   113    return (resp.status_code == httplib.REQUEST_TIMEOUT or
   114            (resp.status_code >= 500 and
   115             resp.status_code < 600))
   116  
   117  
   118  class _RetryWrapper(object):
   119    """A wrapper that wraps retry logic around any tasklet."""
   120  
   121    def __init__(self,
   122                 retry_params,
   123                 retriable_exceptions=_RETRIABLE_EXCEPTIONS,
   124                 should_retry=lambda r: False):
   125      """Init.
   126  
   127      Args:
   128        retry_params: an RetryParams instance.
   129        retriable_exceptions: a list of exception classes that are retriable.
   130        should_retry: a function that takes a result from the tasklet and returns
   131          a boolean. True if the result should be retried.
   132      """
   133      self.retry_params = retry_params
   134      self.retriable_exceptions = retriable_exceptions
   135      self.should_retry = should_retry
   136  
   137    @ndb.tasklet
   138    def run(self, tasklet, **kwds):
   139      """Run a tasklet with retry.
   140  
   141      The retry should be transparent to the caller: if no results
   142      are successful, the exception or result from the last retry is returned
   143      to the caller.
   144  
   145      Args:
   146        tasklet: the tasklet to run.
   147        **kwds: keywords arguments to run the tasklet.
   148  
   149      Raises:
   150        The exception from running the tasklet.
   151  
   152      Returns:
   153        The result from running the tasklet.
   154      """
   155      start_time = time.time()
   156      n = 1
   157  
   158      while True:
   159        e = None
   160        result = None
   161        got_result = False
   162  
   163        try:
   164          result = yield tasklet(**kwds)
   165          got_result = True
   166          if not self.should_retry(result):
   167            raise ndb.Return(result)
   168        except runtime.DeadlineExceededError:
   169          logging.debug(
   170              'Tasklet has exceeded request deadline after %s seconds total',
   171              time.time() - start_time)
   172          raise
   173        except self.retriable_exceptions, e:
   174          pass
   175  
   176        if n == 1:
   177          logging.debug('Tasklet is %r', tasklet)
   178  
   179        delay = self.retry_params.delay(n, start_time)
   180  
   181        if delay <= 0:
   182          logging.debug(
   183              'Tasklet failed after %s attempts and %s seconds in total',
   184              n, time.time() - start_time)
   185          if got_result:
   186            raise ndb.Return(result)
   187          elif e is not None:
   188            raise e
   189          else:
   190            assert False, 'Should never reach here.'
   191  
   192        if got_result:
   193          logging.debug(
   194              'Got result %r from tasklet.', result)
   195        else:
   196          logging.debug(
   197              'Got exception "%r" from tasklet.', e)
   198        logging.debug('Retry in %s seconds.', delay)
   199        n += 1
   200        yield tasklets.sleep(delay)
   201  
   202  
   203  class RetryParams(object):
   204    """Retry configuration parameters."""
   205  
   206    _DEFAULT_USER_AGENT = 'App Engine Python GCS Client'
   207  
   208    @datastore_rpc._positional(1)
   209    def __init__(self,
   210                 backoff_factor=2.0,
   211                 initial_delay=0.1,
   212                 max_delay=10.0,
   213                 min_retries=3,
   214                 max_retries=6,
   215                 max_retry_period=30.0,
   216                 urlfetch_timeout=None,
   217                 save_access_token=False,
   218                 _user_agent=None):
   219      """Init.
   220  
   221      This object is unique per request per thread.
   222  
   223      Library will retry according to this setting when App Engine Server
   224      can't call urlfetch, urlfetch timed out, or urlfetch got a 408 or
   225      500-600 response.
   226  
   227      Args:
   228        backoff_factor: exponential backoff multiplier.
   229        initial_delay: seconds to delay for the first retry.
   230        max_delay: max seconds to delay for every retry.
   231        min_retries: min number of times to retry. This value is automatically
   232          capped by max_retries.
   233        max_retries: max number of times to retry. Set this to 0 for no retry.
   234        max_retry_period: max total seconds spent on retry. Retry stops when
   235          this period passed AND min_retries has been attempted.
   236        urlfetch_timeout: timeout for urlfetch in seconds. Could be None,
   237          in which case the value will be chosen by urlfetch module.
   238        save_access_token: persist access token to datastore to avoid
   239          excessive usage of GetAccessToken API. Usually the token is cached
   240          in process and in memcache. In some cases, memcache isn't very
   241          reliable.
   242        _user_agent: The user agent string that you want to use in your requests.
   243      """
   244      self.backoff_factor = self._check('backoff_factor', backoff_factor)
   245      self.initial_delay = self._check('initial_delay', initial_delay)
   246      self.max_delay = self._check('max_delay', max_delay)
   247      self.max_retry_period = self._check('max_retry_period', max_retry_period)
   248      self.max_retries = self._check('max_retries', max_retries, True, int)
   249      self.min_retries = self._check('min_retries', min_retries, True, int)
   250      if self.min_retries > self.max_retries:
   251        self.min_retries = self.max_retries
   252  
   253      self.urlfetch_timeout = None
   254      if urlfetch_timeout is not None:
   255        self.urlfetch_timeout = self._check('urlfetch_timeout', urlfetch_timeout)
   256      self.save_access_token = self._check('save_access_token', save_access_token,
   257                                           True, bool)
   258      self._user_agent = _user_agent or self._DEFAULT_USER_AGENT
   259  
   260      self._request_id = os.getenv('REQUEST_LOG_ID')
   261  
   262    def __eq__(self, other):
   263      if not isinstance(other, self.__class__):
   264        return False
   265      return self.__dict__ == other.__dict__
   266  
   267    def __ne__(self, other):
   268      return not self.__eq__(other)
   269  
   270    @classmethod
   271    def _check(cls, name, val, can_be_zero=False, val_type=float):
   272      """Check init arguments.
   273  
   274      Args:
   275        name: name of the argument. For logging purpose.
   276        val: value. Value has to be non negative number.
   277        can_be_zero: whether value can be zero.
   278        val_type: Python type of the value.
   279  
   280      Returns:
   281        The value.
   282  
   283      Raises:
   284        ValueError: when invalid value is passed in.
   285        TypeError: when invalid value type is passed in.
   286      """
   287      valid_types = [val_type]
   288      if val_type is float:
   289        valid_types.append(int)
   290  
   291      if type(val) not in valid_types:
   292        raise TypeError(
   293            'Expect type %s for parameter %s' % (val_type.__name__, name))
   294      if val < 0:
   295        raise ValueError(
   296            'Value for parameter %s has to be greater than 0' % name)
   297      if not can_be_zero and val == 0:
   298        raise ValueError(
   299            'Value for parameter %s can not be 0' % name)
   300      return val
   301  
   302    def belong_to_current_request(self):
   303      return os.getenv('REQUEST_LOG_ID') == self._request_id
   304  
   305    def delay(self, n, start_time):
   306      """Calculate delay before the next retry.
   307  
   308      Args:
   309        n: the number of current attempt. The first attempt should be 1.
   310        start_time: the time when retry started in unix time.
   311  
   312      Returns:
   313        Number of seconds to wait before next retry. -1 if retry should give up.
   314      """
   315      if (n > self.max_retries or
   316          (n > self.min_retries and
   317           time.time() - start_time > self.max_retry_period)):
   318        return -1
   319      return min(
   320          math.pow(self.backoff_factor, n-1) * self.initial_delay,
   321          self.max_delay)
   322  
   323  
   324  def _run_until_rpc():
   325    """Eagerly evaluate tasklets until it is blocking on some RPC.
   326  
   327    Usually ndb eventloop el isn't run until some code calls future.get_result().
   328  
   329    When an async tasklet is called, the tasklet wrapper evaluates the tasklet
   330    code into a generator, enqueues a callback _help_tasklet_along onto
   331    the el.current queue, and returns a future.
   332  
   333    _help_tasklet_along, when called by the el, will
   334    get one yielded value from the generator. If the value if another future,
   335    set up a callback _on_future_complete to invoke _help_tasklet_along
   336    when the dependent future fulfills. If the value if a RPC, set up a
   337    callback _on_rpc_complete to invoke _help_tasklet_along when the RPC fulfills.
   338    Thus _help_tasklet_along drills down
   339    the chain of futures until some future is blocked by RPC. El runs
   340    all callbacks and constantly check pending RPC status.
   341    """
   342    el = eventloop.get_event_loop()
   343    while el.current:
   344      el.run0()
   345  
   346  
   347  def _eager_tasklet(tasklet):
   348    """Decorator to turn tasklet to run eagerly."""
   349  
   350    @utils.wrapping(tasklet)
   351    def eager_wrapper(*args, **kwds):
   352      fut = tasklet(*args, **kwds)
   353      _run_until_rpc()
   354      return fut
   355  
   356    return eager_wrapper