k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/gubernator/gcs_async.py (about) 1 # Copyright 2016 The Kubernetes Authors. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 15 import json 16 import logging 17 import urlparse 18 import zlib 19 20 import google.appengine.ext.ndb as ndb 21 from google.appengine.api import app_identity 22 23 24 GCS_API_URL = 'https://storage.googleapis.com' 25 STORAGE_API_URL = 'https://www.googleapis.com/storage/v1/b' 26 MAX_SIZE = 30 * 1024 ** 2 # 30MiB 27 28 @ndb.tasklet 29 def get(url): 30 context = ndb.get_context() 31 32 headers = { 33 'accept-encoding': 'gzip, *', 34 'x-goog-api-version': '2', 35 } 36 37 url_result = urlparse.urlparse(url) 38 if url_result.netloc.endswith('.googleapis.com'): 39 auth_token, _ = app_identity.get_access_token( 40 'https://www.googleapis.com/auth/cloud-platform') 41 if auth_token: 42 headers['Authorization'] = 'OAuth %s' % auth_token 43 44 for retry in xrange(6): 45 result = yield context.urlfetch(url, headers=headers) 46 status = result.status_code 47 if status == 429 or 500 <= status < 600: 48 yield ndb.sleep(2 ** retry) 49 continue 50 if status in (200, 206): 51 content = result.content 52 if result.headers.get('content-encoding') == 'gzip': 53 dec = zlib.decompressobj(15 | 16) 54 content = dec.decompress(result.content, MAX_SIZE) 55 if dec.unconsumed_tail: 56 logging.warning('only decompressed %d KB, %d KB remain in buffer.', 57 len(content) / 1024, 58 len(dec.unconsumed_tail) / 1024) 59 raise ndb.Return(content) 60 logging.error("unable to fetch '%s': status code %d", url, status) 61 raise ndb.Return(None) 62 63 64 def read(path): 65 """Asynchronously reads a file from GCS. 66 67 NOTE: for large files (>10MB), this may return a truncated response due to 68 urlfetch API limits. We don't want to read large files currently, so this 69 is not yet a problem. 70 71 Args: 72 path: the location of the object to read 73 Returns: 74 a Future that resolves to the file's data, or None if an error occurred. 75 """ 76 url = GCS_API_URL + path 77 return get(url) 78 79 80 @ndb.tasklet 81 def listdirs(path): 82 """Asynchronously list directories present on GCS. 83 84 NOTE: This returns at most 1000 results. The API supports pagination, but 85 it's not implemented here. 86 87 Args: 88 path: the GCS bucket directory to list subdirectories of 89 Returns: 90 a Future that resolves to a list of directories, or None if an error 91 occurred. 92 """ 93 if path[-1] != '/': 94 path += '/' 95 assert path[0] != '/' 96 bucket, prefix = path.split('/', 1) 97 url = '%s/%s/o?delimiter=/&prefix=%s' % (STORAGE_API_URL, bucket, prefix) 98 res = yield get(url) 99 if res is None: 100 raise ndb.Return(None) 101 prefixes = json.loads(res).get('prefixes', []) 102 raise ndb.Return(['%s/%s' % (bucket, prefix) for prefix in prefixes])