k8s.io/test-infra@v0.0.0-20240520184403-27c6b4c223d8/gubernator/gcs_async.py (about)

     1  # Copyright 2016 The Kubernetes Authors.
     2  #
     3  # Licensed under the Apache License, Version 2.0 (the "License");
     4  # you may not use this file except in compliance with the License.
     5  # You may obtain a copy of the License at
     6  #
     7  #     http://www.apache.org/licenses/LICENSE-2.0
     8  #
     9  # Unless required by applicable law or agreed to in writing, software
    10  # distributed under the License is distributed on an "AS IS" BASIS,
    11  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  # See the License for the specific language governing permissions and
    13  # limitations under the License.
    14  
    15  import json
    16  import logging
    17  import urlparse
    18  import zlib
    19  
    20  import google.appengine.ext.ndb as ndb
    21  from google.appengine.api import app_identity
    22  
    23  
    24  GCS_API_URL = 'https://storage.googleapis.com'
    25  STORAGE_API_URL = 'https://www.googleapis.com/storage/v1/b'
    26  MAX_SIZE = 30 * 1024 ** 2  # 30MiB
    27  
    28  @ndb.tasklet
    29  def get(url):
    30      context = ndb.get_context()
    31  
    32      headers = {
    33          'accept-encoding': 'gzip, *',
    34          'x-goog-api-version': '2',
    35          }
    36  
    37      url_result = urlparse.urlparse(url)
    38      if url_result.netloc.endswith('.googleapis.com'):
    39          auth_token, _ = app_identity.get_access_token(
    40              'https://www.googleapis.com/auth/cloud-platform')
    41          if auth_token:
    42              headers['Authorization'] = 'OAuth %s' % auth_token
    43  
    44      for retry in xrange(6):
    45          result = yield context.urlfetch(url, headers=headers)
    46          status = result.status_code
    47          if status == 429 or 500 <= status < 600:
    48              yield ndb.sleep(2 ** retry)
    49              continue
    50          if status in (200, 206):
    51              content = result.content
    52              if result.headers.get('content-encoding') == 'gzip':
    53                  dec = zlib.decompressobj(15 | 16)
    54                  content = dec.decompress(result.content, MAX_SIZE)
    55                  if dec.unconsumed_tail:
    56                      logging.warning('only decompressed %d KB, %d KB remain in buffer.',
    57                                      len(content) / 1024,
    58                                      len(dec.unconsumed_tail) / 1024)
    59              raise ndb.Return(content)
    60          logging.error("unable to fetch '%s': status code %d", url, status)
    61          raise ndb.Return(None)
    62  
    63  
    64  def read(path):
    65      """Asynchronously reads a file from GCS.
    66  
    67      NOTE: for large files (>10MB), this may return a truncated response due to
    68      urlfetch API limits. We don't want to read large files currently, so this
    69      is not yet a problem.
    70  
    71      Args:
    72          path: the location of the object to read
    73      Returns:
    74          a Future that resolves to the file's data, or None if an error occurred.
    75      """
    76      url = GCS_API_URL + path
    77      return get(url)
    78  
    79  
    80  @ndb.tasklet
    81  def listdirs(path):
    82      """Asynchronously list directories present on GCS.
    83  
    84      NOTE: This returns at most 1000 results. The API supports pagination, but
    85      it's not implemented here.
    86  
    87      Args:
    88          path: the GCS bucket directory to list subdirectories of
    89      Returns:
    90          a Future that resolves to a list of directories, or None if an error
    91          occurred.
    92      """
    93      if path[-1] != '/':
    94          path += '/'
    95      assert path[0] != '/'
    96      bucket, prefix = path.split('/', 1)
    97      url = '%s/%s/o?delimiter=/&prefix=%s' % (STORAGE_API_URL, bucket, prefix)
    98      res = yield get(url)
    99      if res is None:
   100          raise ndb.Return(None)
   101      prefixes = json.loads(res).get('prefixes', [])
   102      raise ndb.Return(['%s/%s' % (bucket, prefix) for prefix in prefixes])