github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/gubernator/third_party/cloudstorage/storage_api.py

github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/gubernator/third_party/cloudstorage/storage_api.py (about)

     1  # Copyright 2012 Google Inc. All Rights Reserved.
     2  #
     3  # Licensed under the Apache License, Version 2.0 (the "License");
     4  # you may not use this file except in compliance with the License.
     5  # You may obtain a copy of the License at
     6  #
     7  #    http://www.apache.org/licenses/LICENSE-2.0
     8  #
     9  # Unless required by applicable law or agreed to in writing,
    10  # software distributed under the License is distributed on an
    11  # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
    12  # either express or implied. See the License for the specific
    13  # language governing permissions and limitations under the License.
    14  
    15  """Python wrappers for the Google Storage RESTful API."""
    16  
    17  
    18  
    19  
    20  
    21  __all__ = ['ReadBuffer',
    22             'StreamingBuffer',
    23            ]
    24  
    25  import collections
    26  import os
    27  import urlparse
    28  
    29  from . import api_utils
    30  from . import common
    31  from . import errors
    32  from . import rest_api
    33  
    34  try:
    35    from google.appengine.api import urlfetch
    36    from google.appengine.ext import ndb
    37  except ImportError:
    38    from google.appengine.api import urlfetch
    39    from google.appengine.ext import ndb
    40  
    41  
    42  
    43  def _get_storage_api(retry_params, account_id=None):
    44    """Returns storage_api instance for API methods.
    45  
    46    Args:
    47      retry_params: An instance of api_utils.RetryParams. If none,
    48       thread's default will be used.
    49      account_id: Internal-use only.
    50  
    51    Returns:
    52      A storage_api instance to handle urlfetch work to GCS.
    53      On dev appserver, this instance by default will talk to a local stub
    54      unless common.ACCESS_TOKEN is set. That token will be used to talk
    55      to the real GCS.
    56    """
    57  
    58  
    59    api = _StorageApi(_StorageApi.full_control_scope,
    60                      service_account_id=account_id,
    61                      retry_params=retry_params)
    62    if common.local_run() and not common.get_access_token():
    63      api.api_url = common.local_api_url()
    64    if common.get_access_token():
    65      api.token = common.get_access_token()
    66    return api
    67  
    68  
    69  class _StorageApi(rest_api._RestApi):
    70    """A simple wrapper for the Google Storage RESTful API.
    71  
    72    WARNING: Do NOT directly use this api. It's an implementation detail
    73    and is subject to change at any release.
    74  
    75    All async methods have similar args and returns.
    76  
    77    Args:
    78      path: The path to the Google Storage object or bucket, e.g.
    79        '/mybucket/myfile' or '/mybucket'.
    80      **kwd: Options for urlfetch. e.g.
    81        headers={'content-type': 'text/plain'}, payload='blah'.
    82  
    83    Returns:
    84      A ndb Future. When fulfilled, future.get_result() should return
    85      a tuple of (status, headers, content) that represents a HTTP response
    86      of Google Cloud Storage XML API.
    87    """
    88  
    89    api_url = 'https://storage.googleapis.com'
    90    read_only_scope = 'https://www.googleapis.com/auth/devstorage.read_only'
    91    read_write_scope = 'https://www.googleapis.com/auth/devstorage.read_write'
    92    full_control_scope = 'https://www.googleapis.com/auth/devstorage.full_control'
    93  
    94    def __getstate__(self):
    95      """Store state as part of serialization/pickling.
    96  
    97      Returns:
    98        A tuple (of dictionaries) with the state of this object
    99      """
   100      return (super(_StorageApi, self).__getstate__(), {'api_url': self.api_url})
   101  
   102    def __setstate__(self, state):
   103      """Restore state as part of deserialization/unpickling.
   104  
   105      Args:
   106        state: the tuple from a __getstate__ call
   107      """
   108      superstate, localstate = state
   109      super(_StorageApi, self).__setstate__(superstate)
   110      self.api_url = localstate['api_url']
   111  
   112    @api_utils._eager_tasklet
   113    @ndb.tasklet
   114    def do_request_async(self, url, method='GET', headers=None, payload=None,
   115                         deadline=None, callback=None):
   116      """Inherit docs.
   117  
   118      This method translates urlfetch exceptions to more service specific ones.
   119      """
   120      if headers is None:
   121        headers = {}
   122      if 'x-goog-api-version' not in headers:
   123        headers['x-goog-api-version'] = '2'
   124      headers['accept-encoding'] = 'gzip, *'
   125      try:
   126        resp_tuple = yield super(_StorageApi, self).do_request_async(
   127            url, method=method, headers=headers, payload=payload,
   128            deadline=deadline, callback=callback)
   129      except urlfetch.DownloadError, e:
   130        raise errors.TimeoutError(
   131            'Request to Google Cloud Storage timed out.', e)
   132  
   133      raise ndb.Return(resp_tuple)
   134  
   135  
   136    def post_object_async(self, path, **kwds):
   137      """POST to an object."""
   138      return self.do_request_async(self.api_url + path, 'POST', **kwds)
   139  
   140    def put_object_async(self, path, **kwds):
   141      """PUT an object."""
   142      return self.do_request_async(self.api_url + path, 'PUT', **kwds)
   143  
   144    def get_object_async(self, path, **kwds):
   145      """GET an object.
   146  
   147      Note: No payload argument is supported.
   148      """
   149      return self.do_request_async(self.api_url + path, 'GET', **kwds)
   150  
   151    def delete_object_async(self, path, **kwds):
   152      """DELETE an object.
   153  
   154      Note: No payload argument is supported.
   155      """
   156      return self.do_request_async(self.api_url + path, 'DELETE', **kwds)
   157  
   158    def head_object_async(self, path, **kwds):
   159      """HEAD an object.
   160  
   161      Depending on request headers, HEAD returns various object properties,
   162      e.g. Content-Length, Last-Modified, and ETag.
   163  
   164      Note: No payload argument is supported.
   165      """
   166      return self.do_request_async(self.api_url + path, 'HEAD', **kwds)
   167  
   168    def get_bucket_async(self, path, **kwds):
   169      """GET a bucket."""
   170      return self.do_request_async(self.api_url + path, 'GET', **kwds)
   171  
   172    def compose_object(self, file_list, destination_file, content_type):
   173      """COMPOSE multiple objects together.
   174  
   175      Using the given list of files, calls the put object with the compose flag.
   176      This call merges all the files into the destination file.
   177  
   178      Args:
   179        file_list: list of dicts with the file name.
   180        destination_file: Path to the destination file.
   181        content_type: Content type for the destination file.
   182      """
   183  
   184      xml_setting_list = ['<ComposeRequest>']
   185  
   186      for meta_data in file_list:
   187        xml_setting_list.append('<Component>')
   188        for key, val in meta_data.iteritems():
   189          xml_setting_list.append('<%s>%s</%s>' % (key, val, key))
   190        xml_setting_list.append('</Component>')
   191      xml_setting_list.append('</ComposeRequest>')
   192      xml = ''.join(xml_setting_list)
   193  
   194      if content_type is not None:
   195        headers = {'Content-Type': content_type}
   196      else:
   197        headers = None
   198      status, resp_headers, content = self.put_object(
   199          api_utils._quote_filename(destination_file) + '?compose',
   200          payload=xml,
   201          headers=headers)
   202      errors.check_status(status, [200], destination_file, resp_headers,
   203                          body=content)
   204  
   205  
   206  _StorageApi = rest_api.add_sync_methods(_StorageApi)
   207  
   208  
   209  class ReadBuffer(object):
   210    """A class for reading Google storage files."""
   211  
   212    DEFAULT_BUFFER_SIZE = 1024 * 1024
   213    MAX_REQUEST_SIZE = 30 * DEFAULT_BUFFER_SIZE
   214  
   215    def __init__(self,
   216                 api,
   217                 path,
   218                 buffer_size=DEFAULT_BUFFER_SIZE,
   219                 max_request_size=MAX_REQUEST_SIZE,
   220                 offset=0):
   221      """Constructor.
   222  
   223      Args:
   224        api: A StorageApi instance.
   225        path: Quoted/escaped path to the object, e.g. /mybucket/myfile
   226        buffer_size: buffer size. The ReadBuffer keeps
   227          one buffer. But there may be a pending future that contains
   228          a second buffer. This size must be less than max_request_size.
   229        max_request_size: Max bytes to request in one urlfetch.
   230        offset: Number of bytes to skip at the start of the file. If None, 0 is
   231          used.
   232      """
   233      self._api = api
   234      self._path = path
   235      self.name = api_utils._unquote_filename(path)
   236      self.closed = False
   237  
   238      assert buffer_size <= max_request_size
   239      self._buffer_size = buffer_size
   240      self._max_request_size = max_request_size
   241      self._offset = offset
   242  
   243      self._buffer = _Buffer()
   244      self._etag = None
   245  
   246      get_future = self._get_segment(offset, self._buffer_size, check_response=False)
   247  
   248      status, headers, content = self._api.head_object(path)
   249      errors.check_status(status, [200], path, resp_headers=headers, body=content)
   250      self._file_size = long(common.get_stored_content_length(headers))
   251      self._check_etag(headers.get('etag'))
   252  
   253      self._buffer_future = None
   254  
   255      if self._file_size != 0:
   256        content, check_response_closure = get_future.get_result()
   257        check_response_closure()
   258        self._buffer.reset(content)
   259        self._request_next_buffer()
   260  
   261    def __getstate__(self):
   262      """Store state as part of serialization/pickling.
   263  
   264      The contents of the read buffer are not stored, only the current offset for
   265      data read by the client. A new read buffer is established at unpickling.
   266      The head information for the object (file size and etag) are stored to
   267      reduce startup and ensure the file has not changed.
   268  
   269      Returns:
   270        A dictionary with the state of this object
   271      """
   272      return {'api': self._api,
   273              'path': self._path,
   274              'buffer_size': self._buffer_size,
   275              'request_size': self._max_request_size,
   276              'etag': self._etag,
   277              'size': self._file_size,
   278              'offset': self._offset,
   279              'closed': self.closed}
   280  
   281    def __setstate__(self, state):
   282      """Restore state as part of deserialization/unpickling.
   283  
   284      Args:
   285        state: the dictionary from a __getstate__ call
   286  
   287      Along with restoring the state, pre-fetch the next read buffer.
   288      """
   289      self._api = state['api']
   290      self._path = state['path']
   291      self.name = api_utils._unquote_filename(self._path)
   292      self._buffer_size = state['buffer_size']
   293      self._max_request_size = state['request_size']
   294      self._etag = state['etag']
   295      self._file_size = state['size']
   296      self._offset = state['offset']
   297      self._buffer = _Buffer()
   298      self.closed = state['closed']
   299      self._buffer_future = None
   300      if self._remaining() and not self.closed:
   301        self._request_next_buffer()
   302  
   303    def __iter__(self):
   304      """Iterator interface.
   305  
   306      Note the ReadBuffer container itself is the iterator. It's
   307      (quote PEP0234)
   308      'destructive: they consumes all the values and a second iterator
   309      cannot easily be created that iterates independently over the same values.
   310      You could open the file for the second time, or seek() to the beginning.'
   311  
   312      Returns:
   313        Self.
   314      """
   315      return self
   316  
   317    def next(self):
   318      line = self.readline()
   319      if not line:
   320        raise StopIteration()
   321      return line
   322  
   323    def readline(self, size=-1):
   324      """Read one line delimited by '\n' from the file.
   325  
   326      A trailing newline character is kept in the string. It may be absent when a
   327      file ends with an incomplete line. If the size argument is non-negative,
   328      it specifies the maximum string size (counting the newline) to return.
   329      A negative size is the same as unspecified. Empty string is returned
   330      only when EOF is encountered immediately.
   331  
   332      Args:
   333        size: Maximum number of bytes to read. If not specified, readline stops
   334          only on '\n' or EOF.
   335  
   336      Returns:
   337        The data read as a string.
   338  
   339      Raises:
   340        IOError: When this buffer is closed.
   341      """
   342      self._check_open()
   343      if size == 0 or not self._remaining():
   344        return ''
   345  
   346      data_list = []
   347      newline_offset = self._buffer.find_newline(size)
   348      while newline_offset < 0:
   349        data = self._buffer.read(size)
   350        size -= len(data)
   351        self._offset += len(data)
   352        data_list.append(data)
   353        if size == 0 or not self._remaining():
   354          return ''.join(data_list)
   355        self._buffer.reset(self._buffer_future.get_result())
   356        self._request_next_buffer()
   357        newline_offset = self._buffer.find_newline(size)
   358  
   359      data = self._buffer.read_to_offset(newline_offset + 1)
   360      self._offset += len(data)
   361      data_list.append(data)
   362  
   363      return ''.join(data_list)
   364  
   365    def read(self, size=-1):
   366      """Read data from RAW file.
   367  
   368      Args:
   369        size: Number of bytes to read as integer. Actual number of bytes
   370          read is always equal to size unless EOF is reached. If size is
   371          negative or unspecified, read the entire file.
   372  
   373      Returns:
   374        data read as str.
   375  
   376      Raises:
   377        IOError: When this buffer is closed.
   378      """
   379      self._check_open()
   380      if not self._remaining():
   381        return ''
   382  
   383      data_list = []
   384      while True:
   385        remaining = self._buffer.remaining()
   386        if size >= 0 and size < remaining:
   387          data_list.append(self._buffer.read(size))
   388          self._offset += size
   389          break
   390        else:
   391          size -= remaining
   392          self._offset += remaining
   393          data_list.append(self._buffer.read())
   394  
   395          if self._buffer_future is None:
   396            if size < 0 or size >= self._remaining():
   397              needs = self._remaining()
   398            else:
   399              needs = size
   400            data_list.extend(self._get_segments(self._offset, needs))
   401            self._offset += needs
   402            break
   403  
   404          if self._buffer_future:
   405            self._buffer.reset(self._buffer_future.get_result())
   406            self._buffer_future = None
   407  
   408      if self._buffer_future is None:
   409        self._request_next_buffer()
   410      return ''.join(data_list)
   411  
   412    def _remaining(self):
   413      return self._file_size - self._offset
   414  
   415    def _request_next_buffer(self):
   416      """Request next buffer.
   417  
   418      Requires self._offset and self._buffer are in consistent state.
   419      """
   420      self._buffer_future = None
   421      next_offset = self._offset + self._buffer.remaining()
   422      if next_offset != self._file_size:
   423        self._buffer_future = self._get_segment(next_offset,
   424                                                self._buffer_size)
   425  
   426    def _get_segments(self, start, request_size):
   427      """Get segments of the file from Google Storage as a list.
   428  
   429      A large request is broken into segments to avoid hitting urlfetch
   430      response size limit. Each segment is returned from a separate urlfetch.
   431  
   432      Args:
   433        start: start offset to request. Inclusive. Have to be within the
   434          range of the file.
   435        request_size: number of bytes to request.
   436  
   437      Returns:
   438        A list of file segments in order
   439      """
   440      if not request_size:
   441        return []
   442  
   443      end = start + request_size
   444      futures = []
   445  
   446      while request_size > self._max_request_size:
   447        futures.append(self._get_segment(start, self._max_request_size))
   448        request_size -= self._max_request_size
   449        start += self._max_request_size
   450      if start < end:
   451        futures.append(self._get_segment(start, end - start))
   452      return [fut.get_result() for fut in futures]
   453  
   454    @ndb.tasklet
   455    def _get_segment(self, start, request_size, check_response=True):
   456      """Get a segment of the file from Google Storage.
   457  
   458      Args:
   459        start: start offset of the segment. Inclusive. Have to be within the
   460          range of the file.
   461        request_size: number of bytes to request. Have to be small enough
   462          for a single urlfetch request. May go over the logical range of the
   463          file.
   464        check_response: True to check the validity of GCS response automatically
   465          before the future returns. False otherwise. See Yields section.
   466  
   467      Yields:
   468        If check_response is True, the segment [start, start + request_size)
   469        of the file.
   470        Otherwise, a tuple. The first element is the unverified file segment.
   471        The second element is a closure that checks response. Caller should
   472        first invoke the closure before consuing the file segment.
   473  
   474      Raises:
   475        ValueError: if the file has changed while reading.
   476      """
   477      end = start + request_size - 1
   478      content_range = '%d-%d' % (start, end)
   479      headers = {'Range': 'bytes=' + content_range}
   480      status, resp_headers, content = yield self._api.get_object_async(
   481          self._path, headers=headers)
   482      def _checker():
   483        errors.check_status(status, [200, 206], self._path, headers,
   484                            resp_headers, body=content)
   485        self._check_etag(resp_headers.get('etag'))
   486      if check_response:
   487        _checker()
   488        raise ndb.Return(content)
   489      raise ndb.Return(content, _checker)
   490  
   491    def _check_etag(self, etag):
   492      """Check if etag is the same across requests to GCS.
   493  
   494      If self._etag is None, set it. If etag is set, check that the new
   495      etag equals the old one.
   496  
   497      In the __init__ method, we fire one HEAD and one GET request using
   498      ndb tasklet. One of them would return first and set the first value.
   499  
   500      Args:
   501        etag: etag from a GCS HTTP response. None if etag is not part of the
   502          response header. It could be None for example in the case of GCS
   503          composite file.
   504  
   505      Raises:
   506        ValueError: if two etags are not equal.
   507      """
   508      if etag is None:
   509        return
   510      elif self._etag is None:
   511        self._etag = etag
   512      elif self._etag != etag:
   513        raise ValueError('File on GCS has changed while reading.')
   514  
   515    def close(self):
   516      self.closed = True
   517      self._buffer = None
   518      self._buffer_future = None
   519  
   520    def __enter__(self):
   521      return self
   522  
   523    def __exit__(self, atype, value, traceback):
   524      self.close()
   525      return False
   526  
   527    def seek(self, offset, whence=os.SEEK_SET):
   528      """Set the file's current offset.
   529  
   530      Note if the new offset is out of bound, it is adjusted to either 0 or EOF.
   531  
   532      Args:
   533        offset: seek offset as number.
   534        whence: seek mode. Supported modes are os.SEEK_SET (absolute seek),
   535          os.SEEK_CUR (seek relative to the current position), and os.SEEK_END
   536          (seek relative to the end, offset should be negative).
   537  
   538      Raises:
   539        IOError: When this buffer is closed.
   540        ValueError: When whence is invalid.
   541      """
   542      self._check_open()
   543  
   544      self._buffer.reset()
   545      self._buffer_future = None
   546  
   547      if whence == os.SEEK_SET:
   548        self._offset = offset
   549      elif whence == os.SEEK_CUR:
   550        self._offset += offset
   551      elif whence == os.SEEK_END:
   552        self._offset = self._file_size + offset
   553      else:
   554        raise ValueError('Whence mode %s is invalid.' % str(whence))
   555  
   556      self._offset = min(self._offset, self._file_size)
   557      self._offset = max(self._offset, 0)
   558      if self._remaining():
   559        self._request_next_buffer()
   560  
   561    def tell(self):
   562      """Tell the file's current offset.
   563  
   564      Returns:
   565        current offset in reading this file.
   566  
   567      Raises:
   568        IOError: When this buffer is closed.
   569      """
   570      self._check_open()
   571      return self._offset
   572  
   573    def _check_open(self):
   574      if self.closed:
   575        raise IOError('Buffer is closed.')
   576  
   577    def seekable(self):
   578      return True
   579  
   580    def readable(self):
   581      return True
   582  
   583    def writable(self):
   584      return False
   585  
   586  
   587  class _Buffer(object):
   588    """In memory buffer."""
   589  
   590    def __init__(self):
   591      self.reset()
   592  
   593    def reset(self, content='', offset=0):
   594      self._buffer = content
   595      self._offset = offset
   596  
   597    def read(self, size=-1):
   598      """Returns bytes from self._buffer and update related offsets.
   599  
   600      Args:
   601        size: number of bytes to read starting from current offset.
   602          Read the entire buffer if negative.
   603  
   604      Returns:
   605        Requested bytes from buffer.
   606      """
   607      if size < 0:
   608        offset = len(self._buffer)
   609      else:
   610        offset = self._offset + size
   611      return self.read_to_offset(offset)
   612  
   613    def read_to_offset(self, offset):
   614      """Returns bytes from self._buffer and update related offsets.
   615  
   616      Args:
   617        offset: read from current offset to this offset, exclusive.
   618  
   619      Returns:
   620        Requested bytes from buffer.
   621      """
   622      assert offset >= self._offset
   623      result = self._buffer[self._offset: offset]
   624      self._offset += len(result)
   625      return result
   626  
   627    def remaining(self):
   628      return len(self._buffer) - self._offset
   629  
   630    def find_newline(self, size=-1):
   631      """Search for newline char in buffer starting from current offset.
   632  
   633      Args:
   634        size: number of bytes to search. -1 means all.
   635  
   636      Returns:
   637        offset of newline char in buffer. -1 if doesn't exist.
   638      """
   639      if size < 0:
   640        return self._buffer.find('\n', self._offset)
   641      return self._buffer.find('\n', self._offset, self._offset + size)
   642  
   643  
   644  class StreamingBuffer(object):
   645    """A class for creating large objects using the 'resumable' API.
   646  
   647    The API is a subset of the Python writable stream API sufficient to
   648    support writing zip files using the zipfile module.
   649  
   650    The exact sequence of calls and use of headers is documented at
   651    https://developers.google.com/storage/docs/developer-guide#unknownresumables
   652    """
   653  
   654    _blocksize = 256 * 1024
   655  
   656    _flushsize = 8 * _blocksize
   657  
   658    _maxrequestsize = 9 * 4 * _blocksize
   659  
   660    def __init__(self,
   661                 api,
   662                 path,
   663                 content_type=None,
   664                 gcs_headers=None):
   665      """Constructor.
   666  
   667      Args:
   668        api: A StorageApi instance.
   669        path: Quoted/escaped path to the object, e.g. /mybucket/myfile
   670        content_type: Optional content-type; Default value is
   671          delegate to Google Cloud Storage.
   672        gcs_headers: additional gs headers as a str->str dict, e.g
   673          {'x-goog-acl': 'private', 'x-goog-meta-foo': 'foo'}.
   674      Raises:
   675        IOError: When this location can not be found.
   676      """
   677      assert self._maxrequestsize > self._blocksize
   678      assert self._maxrequestsize % self._blocksize == 0
   679      assert self._maxrequestsize >= self._flushsize
   680  
   681      self._api = api
   682      self._path = path
   683  
   684      self.name = api_utils._unquote_filename(path)
   685      self.closed = False
   686  
   687      self._buffer = collections.deque()
   688      self._buffered = 0
   689      self._written = 0
   690      self._offset = 0
   691  
   692      headers = {'x-goog-resumable': 'start'}
   693      if content_type:
   694        headers['content-type'] = content_type
   695      if gcs_headers:
   696        headers.update(gcs_headers)
   697      status, resp_headers, content = self._api.post_object(path, headers=headers)
   698      errors.check_status(status, [201], path, headers, resp_headers,
   699                          body=content)
   700      loc = resp_headers.get('location')
   701      if not loc:
   702        raise IOError('No location header found in 201 response')
   703      parsed = urlparse.urlparse(loc)
   704      self._path_with_token = '%s?%s' % (self._path, parsed.query)
   705  
   706    def __getstate__(self):
   707      """Store state as part of serialization/pickling.
   708  
   709      The contents of the write buffer are stored. Writes to the underlying
   710      storage are required to be on block boundaries (_blocksize) except for the
   711      last write. In the worst case the pickled version of this object may be
   712      slightly larger than the blocksize.
   713  
   714      Returns:
   715        A dictionary with the state of this object
   716  
   717      """
   718      return {'api': self._api,
   719              'path': self._path,
   720              'path_token': self._path_with_token,
   721              'buffer': self._buffer,
   722              'buffered': self._buffered,
   723              'written': self._written,
   724              'offset': self._offset,
   725              'closed': self.closed}
   726  
   727    def __setstate__(self, state):
   728      """Restore state as part of deserialization/unpickling.
   729  
   730      Args:
   731        state: the dictionary from a __getstate__ call
   732      """
   733      self._api = state['api']
   734      self._path_with_token = state['path_token']
   735      self._buffer = state['buffer']
   736      self._buffered = state['buffered']
   737      self._written = state['written']
   738      self._offset = state['offset']
   739      self.closed = state['closed']
   740      self._path = state['path']
   741      self.name = api_utils._unquote_filename(self._path)
   742  
   743    def write(self, data):
   744      """Write some bytes.
   745  
   746      Args:
   747        data: data to write. str.
   748  
   749      Raises:
   750        TypeError: if data is not of type str.
   751      """
   752      self._check_open()
   753      if not isinstance(data, str):
   754        raise TypeError('Expected str but got %s.' % type(data))
   755      if not data:
   756        return
   757      self._buffer.append(data)
   758      self._buffered += len(data)
   759      self._offset += len(data)
   760      if self._buffered >= self._flushsize:
   761        self._flush()
   762  
   763    def flush(self):
   764      """Flush as much as possible to GCS.
   765  
   766      GCS *requires* that all writes except for the final one align on
   767      256KB boundaries. So the internal buffer may still have < 256KB bytes left
   768      after flush.
   769      """
   770      self._check_open()
   771      self._flush(finish=False)
   772  
   773    def tell(self):
   774      """Return the total number of bytes passed to write() so far.
   775  
   776      (There is no seek() method.)
   777      """
   778      return self._offset
   779  
   780    def close(self):
   781      """Flush the buffer and finalize the file.
   782  
   783      When this returns the new file is available for reading.
   784      """
   785      if not self.closed:
   786        self.closed = True
   787        self._flush(finish=True)
   788        self._buffer = None
   789  
   790    def __enter__(self):
   791      return self
   792  
   793    def __exit__(self, atype, value, traceback):
   794      self.close()
   795      return False
   796  
   797    def _flush(self, finish=False):
   798      """Internal API to flush.
   799  
   800      Buffer is flushed to GCS only when the total amount of buffered data is at
   801      least self._blocksize, or to flush the final (incomplete) block of
   802      the file with finish=True.
   803      """
   804      while ((finish and self._buffered >= 0) or
   805             (not finish and self._buffered >= self._blocksize)):
   806        tmp_buffer = []
   807        tmp_buffer_len = 0
   808  
   809        excess = 0
   810        while self._buffer:
   811          buf = self._buffer.popleft()
   812          size = len(buf)
   813          self._buffered -= size
   814          tmp_buffer.append(buf)
   815          tmp_buffer_len += size
   816          if tmp_buffer_len >= self._maxrequestsize:
   817            excess = tmp_buffer_len - self._maxrequestsize
   818            break
   819          if not finish and (
   820              tmp_buffer_len % self._blocksize + self._buffered <
   821              self._blocksize):
   822            excess = tmp_buffer_len % self._blocksize
   823            break
   824  
   825        if excess:
   826          over = tmp_buffer.pop()
   827          size = len(over)
   828          assert size >= excess
   829          tmp_buffer_len -= size
   830          head, tail = over[:-excess], over[-excess:]
   831          self._buffer.appendleft(tail)
   832          self._buffered += len(tail)
   833          if head:
   834            tmp_buffer.append(head)
   835            tmp_buffer_len += len(head)
   836  
   837        data = ''.join(tmp_buffer)
   838        file_len = '*'
   839        if finish and not self._buffered:
   840          file_len = self._written + len(data)
   841        self._send_data(data, self._written, file_len)
   842        self._written += len(data)
   843        if file_len != '*':
   844          break
   845  
   846    def _send_data(self, data, start_offset, file_len):
   847      """Send the block to the storage service.
   848  
   849      This is a utility method that does not modify self.
   850  
   851      Args:
   852        data: data to send in str.
   853        start_offset: start offset of the data in relation to the file.
   854        file_len: an int if this is the last data to append to the file.
   855          Otherwise '*'.
   856      """
   857      headers = {}
   858      end_offset = start_offset + len(data) - 1
   859  
   860      if data:
   861        headers['content-range'] = ('bytes %d-%d/%s' %
   862                                    (start_offset, end_offset, file_len))
   863      else:
   864        headers['content-range'] = ('bytes */%s' % file_len)
   865  
   866      status, response_headers, content = self._api.put_object(
   867          self._path_with_token, payload=data, headers=headers)
   868      if file_len == '*':
   869        expected = 308
   870      else:
   871        expected = 200
   872      errors.check_status(status, [expected], self._path, headers,
   873                          response_headers, content,
   874                          {'upload_path': self._path_with_token})
   875  
   876    def _get_offset_from_gcs(self):
   877      """Get the last offset that has been written to GCS.
   878  
   879      This is a utility method that does not modify self.
   880  
   881      Returns:
   882        an int of the last offset written to GCS by this upload, inclusive.
   883        -1 means nothing has been written.
   884      """
   885      headers = {'content-range': 'bytes */*'}
   886      status, response_headers, content = self._api.put_object(
   887          self._path_with_token, headers=headers)
   888      errors.check_status(status, [308], self._path, headers,
   889                          response_headers, content,
   890                          {'upload_path': self._path_with_token})
   891      val = response_headers.get('range')
   892      if val is None:
   893        return -1
   894      _, offset = val.rsplit('-', 1)
   895      return int(offset)
   896  
   897    def _force_close(self, file_length=None):
   898      """Close this buffer on file_length.
   899  
   900      Finalize this upload immediately on file_length.
   901      Contents that are still in memory will not be uploaded.
   902  
   903      This is a utility method that does not modify self.
   904  
   905      Args:
   906        file_length: file length. Must match what has been uploaded. If None,
   907          it will be queried from GCS.
   908      """
   909      if file_length is None:
   910        file_length = self._get_offset_from_gcs() + 1
   911      self._send_data('', 0, file_length)
   912  
   913    def _check_open(self):
   914      if self.closed:
   915        raise IOError('Buffer is closed.')
   916  
   917    def seekable(self):
   918      return False
   919  
   920    def readable(self):
   921      return False
   922  
   923    def writable(self):
   924      return True