github.com/shashidharatd/test-infra@v0.0.0-20171006011030-71304e1ca560/gubernator/gcs_async.py

github.com/shashidharatd/test-infra@v0.0.0-20171006011030-71304e1ca560/gubernator/gcs_async.py (about)

     1  # Copyright 2016 The Kubernetes Authors.
     2  #
     3  # Licensed under the Apache License, Version 2.0 (the "License");
     4  # you may not use this file except in compliance with the License.
     5  # You may obtain a copy of the License at
     6  #
     7  #     http://www.apache.org/licenses/LICENSE-2.0
     8  #
     9  # Unless required by applicable law or agreed to in writing, software
    10  # distributed under the License is distributed on an "AS IS" BASIS,
    11  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  # See the License for the specific language governing permissions and
    13  # limitations under the License.
    14  
    15  import json
    16  import logging
    17  import zlib
    18  
    19  import google.appengine.ext.ndb as ndb
    20  
    21  
    22  GCS_API_URL = 'https://storage.googleapis.com'
    23  STORAGE_API_URL = 'https://www.googleapis.com/storage/v1/b'
    24  
    25  
    26  @ndb.tasklet
    27  def get(url):
    28      context = ndb.get_context()
    29      headers = {'accept-encoding': 'gzip, *', 'x-goog-api-version': '2'}
    30      for retry in xrange(6):
    31          result = yield context.urlfetch(url, headers=headers)
    32          status = result.status_code
    33          if status == 429 or 500 <= status < 600:
    34              yield ndb.sleep(2 ** retry)
    35              continue
    36          if status in (200, 206):
    37              content = result.content
    38              if result.headers.get('content-encoding') == 'gzip':
    39                  content = zlib.decompress(result.content, 15 | 16)
    40              raise ndb.Return(content)
    41          logging.error("unable to fetch '%s': status code %d", url, status)
    42          raise ndb.Return(None)
    43  
    44  
    45  def read(path):
    46      """Asynchronously reads a file from GCS.
    47  
    48      NOTE: for large files (>10MB), this may return a truncated response due to
    49      urlfetch API limits. We don't want to read large files currently, so this
    50      is not yet a problem.
    51  
    52      Args:
    53          path: the location of the object to read
    54      Returns:
    55          a Future that resolves to the file's data, or None if an error occurred.
    56      """
    57      url = GCS_API_URL + path
    58      return get(url)
    59  
    60  
    61  @ndb.tasklet
    62  def listdirs(path):
    63      """Asynchronously list directories present on GCS.
    64  
    65      NOTE: This returns at most 1000 results. The API supports pagination, but
    66      it's not implemented here.
    67  
    68      Args:
    69          path: the GCS bucket directory to list subdirectories of
    70      Returns:
    71          a Future that resolves to a list of directories, or None if an error
    72          occurred.
    73      """
    74      if path[-1] != '/':
    75          path += '/'
    76      assert path[0] != '/'
    77      bucket, prefix = path.split('/', 1)
    78      url = '%s/%s/o?delimiter=/&prefix=%s' % (STORAGE_API_URL, bucket, prefix)
    79      res = yield get(url)
    80      if res is None:
    81          raise ndb.Return(None)
    82      prefixes = json.loads(res).get('prefixes', [])
    83      raise ndb.Return(['%s/%s' % (bucket, prefix) for prefix in prefixes])