github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/gubernator/third_party/cloudstorage/api_utils.py (about) 1 # Copyright 2013 Google Inc. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, 10 # software distributed under the License is distributed on an 11 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 # either express or implied. See the License for the specific 13 # language governing permissions and limitations under the License. 14 15 """Util functions and classes for cloudstorage_api.""" 16 17 18 19 __all__ = ['set_default_retry_params', 20 'RetryParams', 21 ] 22 23 import copy 24 import httplib 25 import logging 26 import math 27 import os 28 import threading 29 import time 30 import urllib 31 32 33 try: 34 from google.appengine.api import app_identity 35 from google.appengine.api import urlfetch 36 from google.appengine.api import urlfetch_errors 37 from google.appengine.datastore import datastore_rpc 38 from google.appengine.ext import ndb 39 from google.appengine.ext.ndb import eventloop 40 from google.appengine.ext.ndb import tasklets 41 from google.appengine.ext.ndb import utils 42 from google.appengine import runtime 43 from google.appengine.runtime import apiproxy_errors 44 except ImportError: 45 from google.appengine.api import app_identity 46 from google.appengine.api import urlfetch 47 from google.appengine.api import urlfetch_errors 48 from google.appengine.datastore import datastore_rpc 49 from google.appengine import runtime 50 from google.appengine.runtime import apiproxy_errors 51 from google.appengine.ext import ndb 52 from google.appengine.ext.ndb import eventloop 53 from google.appengine.ext.ndb import tasklets 54 from google.appengine.ext.ndb import utils 55 56 57 _RETRIABLE_EXCEPTIONS = (urlfetch.DownloadError, 58 urlfetch_errors.InternalTransientError, 59 apiproxy_errors.Error, 60 app_identity.InternalError, 61 app_identity.BackendDeadlineExceeded) 62 63 _thread_local_settings = threading.local() 64 _thread_local_settings.default_retry_params = None 65 66 67 def set_default_retry_params(retry_params): 68 """Set a default RetryParams for current thread current request.""" 69 _thread_local_settings.default_retry_params = copy.copy(retry_params) 70 71 72 def _get_default_retry_params(): 73 """Get default RetryParams for current request and current thread. 74 75 Returns: 76 A new instance of the default RetryParams. 77 """ 78 default = getattr(_thread_local_settings, 'default_retry_params', None) 79 if default is None or not default.belong_to_current_request(): 80 return RetryParams() 81 else: 82 return copy.copy(default) 83 84 85 def _quote_filename(filename): 86 """Quotes filename to use as a valid URI path. 87 88 Args: 89 filename: user provided filename. /bucket/filename. 90 91 Returns: 92 The filename properly quoted to use as URI's path component. 93 """ 94 return urllib.quote(filename) 95 96 97 def _unquote_filename(filename): 98 """Unquotes a valid URI path back to its filename. 99 100 This is the opposite of _quote_filename. 101 102 Args: 103 filename: a quoted filename. /bucket/some%20filename. 104 105 Returns: 106 The filename unquoted. 107 """ 108 return urllib.unquote(filename) 109 110 111 def _should_retry(resp): 112 """Given a urlfetch response, decide whether to retry that request.""" 113 return (resp.status_code == httplib.REQUEST_TIMEOUT or 114 (resp.status_code >= 500 and 115 resp.status_code < 600)) 116 117 118 class _RetryWrapper(object): 119 """A wrapper that wraps retry logic around any tasklet.""" 120 121 def __init__(self, 122 retry_params, 123 retriable_exceptions=_RETRIABLE_EXCEPTIONS, 124 should_retry=lambda r: False): 125 """Init. 126 127 Args: 128 retry_params: an RetryParams instance. 129 retriable_exceptions: a list of exception classes that are retriable. 130 should_retry: a function that takes a result from the tasklet and returns 131 a boolean. True if the result should be retried. 132 """ 133 self.retry_params = retry_params 134 self.retriable_exceptions = retriable_exceptions 135 self.should_retry = should_retry 136 137 @ndb.tasklet 138 def run(self, tasklet, **kwds): 139 """Run a tasklet with retry. 140 141 The retry should be transparent to the caller: if no results 142 are successful, the exception or result from the last retry is returned 143 to the caller. 144 145 Args: 146 tasklet: the tasklet to run. 147 **kwds: keywords arguments to run the tasklet. 148 149 Raises: 150 The exception from running the tasklet. 151 152 Returns: 153 The result from running the tasklet. 154 """ 155 start_time = time.time() 156 n = 1 157 158 while True: 159 e = None 160 result = None 161 got_result = False 162 163 try: 164 result = yield tasklet(**kwds) 165 got_result = True 166 if not self.should_retry(result): 167 raise ndb.Return(result) 168 except runtime.DeadlineExceededError: 169 logging.debug( 170 'Tasklet has exceeded request deadline after %s seconds total', 171 time.time() - start_time) 172 raise 173 except self.retriable_exceptions, e: 174 pass 175 176 if n == 1: 177 logging.debug('Tasklet is %r', tasklet) 178 179 delay = self.retry_params.delay(n, start_time) 180 181 if delay <= 0: 182 logging.debug( 183 'Tasklet failed after %s attempts and %s seconds in total', 184 n, time.time() - start_time) 185 if got_result: 186 raise ndb.Return(result) 187 elif e is not None: 188 raise e 189 else: 190 assert False, 'Should never reach here.' 191 192 if got_result: 193 logging.debug( 194 'Got result %r from tasklet.', result) 195 else: 196 logging.debug( 197 'Got exception "%r" from tasklet.', e) 198 logging.debug('Retry in %s seconds.', delay) 199 n += 1 200 yield tasklets.sleep(delay) 201 202 203 class RetryParams(object): 204 """Retry configuration parameters.""" 205 206 _DEFAULT_USER_AGENT = 'App Engine Python GCS Client' 207 208 @datastore_rpc._positional(1) 209 def __init__(self, 210 backoff_factor=2.0, 211 initial_delay=0.1, 212 max_delay=10.0, 213 min_retries=3, 214 max_retries=6, 215 max_retry_period=30.0, 216 urlfetch_timeout=None, 217 save_access_token=False, 218 _user_agent=None): 219 """Init. 220 221 This object is unique per request per thread. 222 223 Library will retry according to this setting when App Engine Server 224 can't call urlfetch, urlfetch timed out, or urlfetch got a 408 or 225 500-600 response. 226 227 Args: 228 backoff_factor: exponential backoff multiplier. 229 initial_delay: seconds to delay for the first retry. 230 max_delay: max seconds to delay for every retry. 231 min_retries: min number of times to retry. This value is automatically 232 capped by max_retries. 233 max_retries: max number of times to retry. Set this to 0 for no retry. 234 max_retry_period: max total seconds spent on retry. Retry stops when 235 this period passed AND min_retries has been attempted. 236 urlfetch_timeout: timeout for urlfetch in seconds. Could be None, 237 in which case the value will be chosen by urlfetch module. 238 save_access_token: persist access token to datastore to avoid 239 excessive usage of GetAccessToken API. Usually the token is cached 240 in process and in memcache. In some cases, memcache isn't very 241 reliable. 242 _user_agent: The user agent string that you want to use in your requests. 243 """ 244 self.backoff_factor = self._check('backoff_factor', backoff_factor) 245 self.initial_delay = self._check('initial_delay', initial_delay) 246 self.max_delay = self._check('max_delay', max_delay) 247 self.max_retry_period = self._check('max_retry_period', max_retry_period) 248 self.max_retries = self._check('max_retries', max_retries, True, int) 249 self.min_retries = self._check('min_retries', min_retries, True, int) 250 if self.min_retries > self.max_retries: 251 self.min_retries = self.max_retries 252 253 self.urlfetch_timeout = None 254 if urlfetch_timeout is not None: 255 self.urlfetch_timeout = self._check('urlfetch_timeout', urlfetch_timeout) 256 self.save_access_token = self._check('save_access_token', save_access_token, 257 True, bool) 258 self._user_agent = _user_agent or self._DEFAULT_USER_AGENT 259 260 self._request_id = os.getenv('REQUEST_LOG_ID') 261 262 def __eq__(self, other): 263 if not isinstance(other, self.__class__): 264 return False 265 return self.__dict__ == other.__dict__ 266 267 def __ne__(self, other): 268 return not self.__eq__(other) 269 270 @classmethod 271 def _check(cls, name, val, can_be_zero=False, val_type=float): 272 """Check init arguments. 273 274 Args: 275 name: name of the argument. For logging purpose. 276 val: value. Value has to be non negative number. 277 can_be_zero: whether value can be zero. 278 val_type: Python type of the value. 279 280 Returns: 281 The value. 282 283 Raises: 284 ValueError: when invalid value is passed in. 285 TypeError: when invalid value type is passed in. 286 """ 287 valid_types = [val_type] 288 if val_type is float: 289 valid_types.append(int) 290 291 if type(val) not in valid_types: 292 raise TypeError( 293 'Expect type %s for parameter %s' % (val_type.__name__, name)) 294 if val < 0: 295 raise ValueError( 296 'Value for parameter %s has to be greater than 0' % name) 297 if not can_be_zero and val == 0: 298 raise ValueError( 299 'Value for parameter %s can not be 0' % name) 300 return val 301 302 def belong_to_current_request(self): 303 return os.getenv('REQUEST_LOG_ID') == self._request_id 304 305 def delay(self, n, start_time): 306 """Calculate delay before the next retry. 307 308 Args: 309 n: the number of current attempt. The first attempt should be 1. 310 start_time: the time when retry started in unix time. 311 312 Returns: 313 Number of seconds to wait before next retry. -1 if retry should give up. 314 """ 315 if (n > self.max_retries or 316 (n > self.min_retries and 317 time.time() - start_time > self.max_retry_period)): 318 return -1 319 return min( 320 math.pow(self.backoff_factor, n-1) * self.initial_delay, 321 self.max_delay) 322 323 324 def _run_until_rpc(): 325 """Eagerly evaluate tasklets until it is blocking on some RPC. 326 327 Usually ndb eventloop el isn't run until some code calls future.get_result(). 328 329 When an async tasklet is called, the tasklet wrapper evaluates the tasklet 330 code into a generator, enqueues a callback _help_tasklet_along onto 331 the el.current queue, and returns a future. 332 333 _help_tasklet_along, when called by the el, will 334 get one yielded value from the generator. If the value if another future, 335 set up a callback _on_future_complete to invoke _help_tasklet_along 336 when the dependent future fulfills. If the value if a RPC, set up a 337 callback _on_rpc_complete to invoke _help_tasklet_along when the RPC fulfills. 338 Thus _help_tasklet_along drills down 339 the chain of futures until some future is blocked by RPC. El runs 340 all callbacks and constantly check pending RPC status. 341 """ 342 el = eventloop.get_event_loop() 343 while el.current: 344 el.run0() 345 346 347 def _eager_tasklet(tasklet): 348 """Decorator to turn tasklet to run eagerly.""" 349 350 @utils.wrapping(tasklet) 351 def eager_wrapper(*args, **kwds): 352 fut = tasklet(*args, **kwds) 353 _run_until_rpc() 354 return fut 355 356 return eager_wrapper