github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/gubernator/third_party/cloudstorage/storage_api.py (about) 1 # Copyright 2012 Google Inc. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, 10 # software distributed under the License is distributed on an 11 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 12 # either express or implied. See the License for the specific 13 # language governing permissions and limitations under the License. 14 15 """Python wrappers for the Google Storage RESTful API.""" 16 17 18 19 20 21 __all__ = ['ReadBuffer', 22 'StreamingBuffer', 23 ] 24 25 import collections 26 import os 27 import urlparse 28 29 from . import api_utils 30 from . import common 31 from . import errors 32 from . import rest_api 33 34 try: 35 from google.appengine.api import urlfetch 36 from google.appengine.ext import ndb 37 except ImportError: 38 from google.appengine.api import urlfetch 39 from google.appengine.ext import ndb 40 41 42 43 def _get_storage_api(retry_params, account_id=None): 44 """Returns storage_api instance for API methods. 45 46 Args: 47 retry_params: An instance of api_utils.RetryParams. If none, 48 thread's default will be used. 49 account_id: Internal-use only. 50 51 Returns: 52 A storage_api instance to handle urlfetch work to GCS. 53 On dev appserver, this instance by default will talk to a local stub 54 unless common.ACCESS_TOKEN is set. That token will be used to talk 55 to the real GCS. 56 """ 57 58 59 api = _StorageApi(_StorageApi.full_control_scope, 60 service_account_id=account_id, 61 retry_params=retry_params) 62 if common.local_run() and not common.get_access_token(): 63 api.api_url = common.local_api_url() 64 if common.get_access_token(): 65 api.token = common.get_access_token() 66 return api 67 68 69 class _StorageApi(rest_api._RestApi): 70 """A simple wrapper for the Google Storage RESTful API. 71 72 WARNING: Do NOT directly use this api. It's an implementation detail 73 and is subject to change at any release. 74 75 All async methods have similar args and returns. 76 77 Args: 78 path: The path to the Google Storage object or bucket, e.g. 79 '/mybucket/myfile' or '/mybucket'. 80 **kwd: Options for urlfetch. e.g. 81 headers={'content-type': 'text/plain'}, payload='blah'. 82 83 Returns: 84 A ndb Future. When fulfilled, future.get_result() should return 85 a tuple of (status, headers, content) that represents a HTTP response 86 of Google Cloud Storage XML API. 87 """ 88 89 api_url = 'https://storage.googleapis.com' 90 read_only_scope = 'https://www.googleapis.com/auth/devstorage.read_only' 91 read_write_scope = 'https://www.googleapis.com/auth/devstorage.read_write' 92 full_control_scope = 'https://www.googleapis.com/auth/devstorage.full_control' 93 94 def __getstate__(self): 95 """Store state as part of serialization/pickling. 96 97 Returns: 98 A tuple (of dictionaries) with the state of this object 99 """ 100 return (super(_StorageApi, self).__getstate__(), {'api_url': self.api_url}) 101 102 def __setstate__(self, state): 103 """Restore state as part of deserialization/unpickling. 104 105 Args: 106 state: the tuple from a __getstate__ call 107 """ 108 superstate, localstate = state 109 super(_StorageApi, self).__setstate__(superstate) 110 self.api_url = localstate['api_url'] 111 112 @api_utils._eager_tasklet 113 @ndb.tasklet 114 def do_request_async(self, url, method='GET', headers=None, payload=None, 115 deadline=None, callback=None): 116 """Inherit docs. 117 118 This method translates urlfetch exceptions to more service specific ones. 119 """ 120 if headers is None: 121 headers = {} 122 if 'x-goog-api-version' not in headers: 123 headers['x-goog-api-version'] = '2' 124 headers['accept-encoding'] = 'gzip, *' 125 try: 126 resp_tuple = yield super(_StorageApi, self).do_request_async( 127 url, method=method, headers=headers, payload=payload, 128 deadline=deadline, callback=callback) 129 except urlfetch.DownloadError, e: 130 raise errors.TimeoutError( 131 'Request to Google Cloud Storage timed out.', e) 132 133 raise ndb.Return(resp_tuple) 134 135 136 def post_object_async(self, path, **kwds): 137 """POST to an object.""" 138 return self.do_request_async(self.api_url + path, 'POST', **kwds) 139 140 def put_object_async(self, path, **kwds): 141 """PUT an object.""" 142 return self.do_request_async(self.api_url + path, 'PUT', **kwds) 143 144 def get_object_async(self, path, **kwds): 145 """GET an object. 146 147 Note: No payload argument is supported. 148 """ 149 return self.do_request_async(self.api_url + path, 'GET', **kwds) 150 151 def delete_object_async(self, path, **kwds): 152 """DELETE an object. 153 154 Note: No payload argument is supported. 155 """ 156 return self.do_request_async(self.api_url + path, 'DELETE', **kwds) 157 158 def head_object_async(self, path, **kwds): 159 """HEAD an object. 160 161 Depending on request headers, HEAD returns various object properties, 162 e.g. Content-Length, Last-Modified, and ETag. 163 164 Note: No payload argument is supported. 165 """ 166 return self.do_request_async(self.api_url + path, 'HEAD', **kwds) 167 168 def get_bucket_async(self, path, **kwds): 169 """GET a bucket.""" 170 return self.do_request_async(self.api_url + path, 'GET', **kwds) 171 172 def compose_object(self, file_list, destination_file, content_type): 173 """COMPOSE multiple objects together. 174 175 Using the given list of files, calls the put object with the compose flag. 176 This call merges all the files into the destination file. 177 178 Args: 179 file_list: list of dicts with the file name. 180 destination_file: Path to the destination file. 181 content_type: Content type for the destination file. 182 """ 183 184 xml_setting_list = ['<ComposeRequest>'] 185 186 for meta_data in file_list: 187 xml_setting_list.append('<Component>') 188 for key, val in meta_data.iteritems(): 189 xml_setting_list.append('<%s>%s</%s>' % (key, val, key)) 190 xml_setting_list.append('</Component>') 191 xml_setting_list.append('</ComposeRequest>') 192 xml = ''.join(xml_setting_list) 193 194 if content_type is not None: 195 headers = {'Content-Type': content_type} 196 else: 197 headers = None 198 status, resp_headers, content = self.put_object( 199 api_utils._quote_filename(destination_file) + '?compose', 200 payload=xml, 201 headers=headers) 202 errors.check_status(status, [200], destination_file, resp_headers, 203 body=content) 204 205 206 _StorageApi = rest_api.add_sync_methods(_StorageApi) 207 208 209 class ReadBuffer(object): 210 """A class for reading Google storage files.""" 211 212 DEFAULT_BUFFER_SIZE = 1024 * 1024 213 MAX_REQUEST_SIZE = 30 * DEFAULT_BUFFER_SIZE 214 215 def __init__(self, 216 api, 217 path, 218 buffer_size=DEFAULT_BUFFER_SIZE, 219 max_request_size=MAX_REQUEST_SIZE, 220 offset=0): 221 """Constructor. 222 223 Args: 224 api: A StorageApi instance. 225 path: Quoted/escaped path to the object, e.g. /mybucket/myfile 226 buffer_size: buffer size. The ReadBuffer keeps 227 one buffer. But there may be a pending future that contains 228 a second buffer. This size must be less than max_request_size. 229 max_request_size: Max bytes to request in one urlfetch. 230 offset: Number of bytes to skip at the start of the file. If None, 0 is 231 used. 232 """ 233 self._api = api 234 self._path = path 235 self.name = api_utils._unquote_filename(path) 236 self.closed = False 237 238 assert buffer_size <= max_request_size 239 self._buffer_size = buffer_size 240 self._max_request_size = max_request_size 241 self._offset = offset 242 243 self._buffer = _Buffer() 244 self._etag = None 245 246 get_future = self._get_segment(offset, self._buffer_size, check_response=False) 247 248 status, headers, content = self._api.head_object(path) 249 errors.check_status(status, [200], path, resp_headers=headers, body=content) 250 self._file_size = long(common.get_stored_content_length(headers)) 251 self._check_etag(headers.get('etag')) 252 253 self._buffer_future = None 254 255 if self._file_size != 0: 256 content, check_response_closure = get_future.get_result() 257 check_response_closure() 258 self._buffer.reset(content) 259 self._request_next_buffer() 260 261 def __getstate__(self): 262 """Store state as part of serialization/pickling. 263 264 The contents of the read buffer are not stored, only the current offset for 265 data read by the client. A new read buffer is established at unpickling. 266 The head information for the object (file size and etag) are stored to 267 reduce startup and ensure the file has not changed. 268 269 Returns: 270 A dictionary with the state of this object 271 """ 272 return {'api': self._api, 273 'path': self._path, 274 'buffer_size': self._buffer_size, 275 'request_size': self._max_request_size, 276 'etag': self._etag, 277 'size': self._file_size, 278 'offset': self._offset, 279 'closed': self.closed} 280 281 def __setstate__(self, state): 282 """Restore state as part of deserialization/unpickling. 283 284 Args: 285 state: the dictionary from a __getstate__ call 286 287 Along with restoring the state, pre-fetch the next read buffer. 288 """ 289 self._api = state['api'] 290 self._path = state['path'] 291 self.name = api_utils._unquote_filename(self._path) 292 self._buffer_size = state['buffer_size'] 293 self._max_request_size = state['request_size'] 294 self._etag = state['etag'] 295 self._file_size = state['size'] 296 self._offset = state['offset'] 297 self._buffer = _Buffer() 298 self.closed = state['closed'] 299 self._buffer_future = None 300 if self._remaining() and not self.closed: 301 self._request_next_buffer() 302 303 def __iter__(self): 304 """Iterator interface. 305 306 Note the ReadBuffer container itself is the iterator. It's 307 (quote PEP0234) 308 'destructive: they consumes all the values and a second iterator 309 cannot easily be created that iterates independently over the same values. 310 You could open the file for the second time, or seek() to the beginning.' 311 312 Returns: 313 Self. 314 """ 315 return self 316 317 def next(self): 318 line = self.readline() 319 if not line: 320 raise StopIteration() 321 return line 322 323 def readline(self, size=-1): 324 """Read one line delimited by '\n' from the file. 325 326 A trailing newline character is kept in the string. It may be absent when a 327 file ends with an incomplete line. If the size argument is non-negative, 328 it specifies the maximum string size (counting the newline) to return. 329 A negative size is the same as unspecified. Empty string is returned 330 only when EOF is encountered immediately. 331 332 Args: 333 size: Maximum number of bytes to read. If not specified, readline stops 334 only on '\n' or EOF. 335 336 Returns: 337 The data read as a string. 338 339 Raises: 340 IOError: When this buffer is closed. 341 """ 342 self._check_open() 343 if size == 0 or not self._remaining(): 344 return '' 345 346 data_list = [] 347 newline_offset = self._buffer.find_newline(size) 348 while newline_offset < 0: 349 data = self._buffer.read(size) 350 size -= len(data) 351 self._offset += len(data) 352 data_list.append(data) 353 if size == 0 or not self._remaining(): 354 return ''.join(data_list) 355 self._buffer.reset(self._buffer_future.get_result()) 356 self._request_next_buffer() 357 newline_offset = self._buffer.find_newline(size) 358 359 data = self._buffer.read_to_offset(newline_offset + 1) 360 self._offset += len(data) 361 data_list.append(data) 362 363 return ''.join(data_list) 364 365 def read(self, size=-1): 366 """Read data from RAW file. 367 368 Args: 369 size: Number of bytes to read as integer. Actual number of bytes 370 read is always equal to size unless EOF is reached. If size is 371 negative or unspecified, read the entire file. 372 373 Returns: 374 data read as str. 375 376 Raises: 377 IOError: When this buffer is closed. 378 """ 379 self._check_open() 380 if not self._remaining(): 381 return '' 382 383 data_list = [] 384 while True: 385 remaining = self._buffer.remaining() 386 if size >= 0 and size < remaining: 387 data_list.append(self._buffer.read(size)) 388 self._offset += size 389 break 390 else: 391 size -= remaining 392 self._offset += remaining 393 data_list.append(self._buffer.read()) 394 395 if self._buffer_future is None: 396 if size < 0 or size >= self._remaining(): 397 needs = self._remaining() 398 else: 399 needs = size 400 data_list.extend(self._get_segments(self._offset, needs)) 401 self._offset += needs 402 break 403 404 if self._buffer_future: 405 self._buffer.reset(self._buffer_future.get_result()) 406 self._buffer_future = None 407 408 if self._buffer_future is None: 409 self._request_next_buffer() 410 return ''.join(data_list) 411 412 def _remaining(self): 413 return self._file_size - self._offset 414 415 def _request_next_buffer(self): 416 """Request next buffer. 417 418 Requires self._offset and self._buffer are in consistent state. 419 """ 420 self._buffer_future = None 421 next_offset = self._offset + self._buffer.remaining() 422 if next_offset != self._file_size: 423 self._buffer_future = self._get_segment(next_offset, 424 self._buffer_size) 425 426 def _get_segments(self, start, request_size): 427 """Get segments of the file from Google Storage as a list. 428 429 A large request is broken into segments to avoid hitting urlfetch 430 response size limit. Each segment is returned from a separate urlfetch. 431 432 Args: 433 start: start offset to request. Inclusive. Have to be within the 434 range of the file. 435 request_size: number of bytes to request. 436 437 Returns: 438 A list of file segments in order 439 """ 440 if not request_size: 441 return [] 442 443 end = start + request_size 444 futures = [] 445 446 while request_size > self._max_request_size: 447 futures.append(self._get_segment(start, self._max_request_size)) 448 request_size -= self._max_request_size 449 start += self._max_request_size 450 if start < end: 451 futures.append(self._get_segment(start, end - start)) 452 return [fut.get_result() for fut in futures] 453 454 @ndb.tasklet 455 def _get_segment(self, start, request_size, check_response=True): 456 """Get a segment of the file from Google Storage. 457 458 Args: 459 start: start offset of the segment. Inclusive. Have to be within the 460 range of the file. 461 request_size: number of bytes to request. Have to be small enough 462 for a single urlfetch request. May go over the logical range of the 463 file. 464 check_response: True to check the validity of GCS response automatically 465 before the future returns. False otherwise. See Yields section. 466 467 Yields: 468 If check_response is True, the segment [start, start + request_size) 469 of the file. 470 Otherwise, a tuple. The first element is the unverified file segment. 471 The second element is a closure that checks response. Caller should 472 first invoke the closure before consuing the file segment. 473 474 Raises: 475 ValueError: if the file has changed while reading. 476 """ 477 end = start + request_size - 1 478 content_range = '%d-%d' % (start, end) 479 headers = {'Range': 'bytes=' + content_range} 480 status, resp_headers, content = yield self._api.get_object_async( 481 self._path, headers=headers) 482 def _checker(): 483 errors.check_status(status, [200, 206], self._path, headers, 484 resp_headers, body=content) 485 self._check_etag(resp_headers.get('etag')) 486 if check_response: 487 _checker() 488 raise ndb.Return(content) 489 raise ndb.Return(content, _checker) 490 491 def _check_etag(self, etag): 492 """Check if etag is the same across requests to GCS. 493 494 If self._etag is None, set it. If etag is set, check that the new 495 etag equals the old one. 496 497 In the __init__ method, we fire one HEAD and one GET request using 498 ndb tasklet. One of them would return first and set the first value. 499 500 Args: 501 etag: etag from a GCS HTTP response. None if etag is not part of the 502 response header. It could be None for example in the case of GCS 503 composite file. 504 505 Raises: 506 ValueError: if two etags are not equal. 507 """ 508 if etag is None: 509 return 510 elif self._etag is None: 511 self._etag = etag 512 elif self._etag != etag: 513 raise ValueError('File on GCS has changed while reading.') 514 515 def close(self): 516 self.closed = True 517 self._buffer = None 518 self._buffer_future = None 519 520 def __enter__(self): 521 return self 522 523 def __exit__(self, atype, value, traceback): 524 self.close() 525 return False 526 527 def seek(self, offset, whence=os.SEEK_SET): 528 """Set the file's current offset. 529 530 Note if the new offset is out of bound, it is adjusted to either 0 or EOF. 531 532 Args: 533 offset: seek offset as number. 534 whence: seek mode. Supported modes are os.SEEK_SET (absolute seek), 535 os.SEEK_CUR (seek relative to the current position), and os.SEEK_END 536 (seek relative to the end, offset should be negative). 537 538 Raises: 539 IOError: When this buffer is closed. 540 ValueError: When whence is invalid. 541 """ 542 self._check_open() 543 544 self._buffer.reset() 545 self._buffer_future = None 546 547 if whence == os.SEEK_SET: 548 self._offset = offset 549 elif whence == os.SEEK_CUR: 550 self._offset += offset 551 elif whence == os.SEEK_END: 552 self._offset = self._file_size + offset 553 else: 554 raise ValueError('Whence mode %s is invalid.' % str(whence)) 555 556 self._offset = min(self._offset, self._file_size) 557 self._offset = max(self._offset, 0) 558 if self._remaining(): 559 self._request_next_buffer() 560 561 def tell(self): 562 """Tell the file's current offset. 563 564 Returns: 565 current offset in reading this file. 566 567 Raises: 568 IOError: When this buffer is closed. 569 """ 570 self._check_open() 571 return self._offset 572 573 def _check_open(self): 574 if self.closed: 575 raise IOError('Buffer is closed.') 576 577 def seekable(self): 578 return True 579 580 def readable(self): 581 return True 582 583 def writable(self): 584 return False 585 586 587 class _Buffer(object): 588 """In memory buffer.""" 589 590 def __init__(self): 591 self.reset() 592 593 def reset(self, content='', offset=0): 594 self._buffer = content 595 self._offset = offset 596 597 def read(self, size=-1): 598 """Returns bytes from self._buffer and update related offsets. 599 600 Args: 601 size: number of bytes to read starting from current offset. 602 Read the entire buffer if negative. 603 604 Returns: 605 Requested bytes from buffer. 606 """ 607 if size < 0: 608 offset = len(self._buffer) 609 else: 610 offset = self._offset + size 611 return self.read_to_offset(offset) 612 613 def read_to_offset(self, offset): 614 """Returns bytes from self._buffer and update related offsets. 615 616 Args: 617 offset: read from current offset to this offset, exclusive. 618 619 Returns: 620 Requested bytes from buffer. 621 """ 622 assert offset >= self._offset 623 result = self._buffer[self._offset: offset] 624 self._offset += len(result) 625 return result 626 627 def remaining(self): 628 return len(self._buffer) - self._offset 629 630 def find_newline(self, size=-1): 631 """Search for newline char in buffer starting from current offset. 632 633 Args: 634 size: number of bytes to search. -1 means all. 635 636 Returns: 637 offset of newline char in buffer. -1 if doesn't exist. 638 """ 639 if size < 0: 640 return self._buffer.find('\n', self._offset) 641 return self._buffer.find('\n', self._offset, self._offset + size) 642 643 644 class StreamingBuffer(object): 645 """A class for creating large objects using the 'resumable' API. 646 647 The API is a subset of the Python writable stream API sufficient to 648 support writing zip files using the zipfile module. 649 650 The exact sequence of calls and use of headers is documented at 651 https://developers.google.com/storage/docs/developer-guide#unknownresumables 652 """ 653 654 _blocksize = 256 * 1024 655 656 _flushsize = 8 * _blocksize 657 658 _maxrequestsize = 9 * 4 * _blocksize 659 660 def __init__(self, 661 api, 662 path, 663 content_type=None, 664 gcs_headers=None): 665 """Constructor. 666 667 Args: 668 api: A StorageApi instance. 669 path: Quoted/escaped path to the object, e.g. /mybucket/myfile 670 content_type: Optional content-type; Default value is 671 delegate to Google Cloud Storage. 672 gcs_headers: additional gs headers as a str->str dict, e.g 673 {'x-goog-acl': 'private', 'x-goog-meta-foo': 'foo'}. 674 Raises: 675 IOError: When this location can not be found. 676 """ 677 assert self._maxrequestsize > self._blocksize 678 assert self._maxrequestsize % self._blocksize == 0 679 assert self._maxrequestsize >= self._flushsize 680 681 self._api = api 682 self._path = path 683 684 self.name = api_utils._unquote_filename(path) 685 self.closed = False 686 687 self._buffer = collections.deque() 688 self._buffered = 0 689 self._written = 0 690 self._offset = 0 691 692 headers = {'x-goog-resumable': 'start'} 693 if content_type: 694 headers['content-type'] = content_type 695 if gcs_headers: 696 headers.update(gcs_headers) 697 status, resp_headers, content = self._api.post_object(path, headers=headers) 698 errors.check_status(status, [201], path, headers, resp_headers, 699 body=content) 700 loc = resp_headers.get('location') 701 if not loc: 702 raise IOError('No location header found in 201 response') 703 parsed = urlparse.urlparse(loc) 704 self._path_with_token = '%s?%s' % (self._path, parsed.query) 705 706 def __getstate__(self): 707 """Store state as part of serialization/pickling. 708 709 The contents of the write buffer are stored. Writes to the underlying 710 storage are required to be on block boundaries (_blocksize) except for the 711 last write. In the worst case the pickled version of this object may be 712 slightly larger than the blocksize. 713 714 Returns: 715 A dictionary with the state of this object 716 717 """ 718 return {'api': self._api, 719 'path': self._path, 720 'path_token': self._path_with_token, 721 'buffer': self._buffer, 722 'buffered': self._buffered, 723 'written': self._written, 724 'offset': self._offset, 725 'closed': self.closed} 726 727 def __setstate__(self, state): 728 """Restore state as part of deserialization/unpickling. 729 730 Args: 731 state: the dictionary from a __getstate__ call 732 """ 733 self._api = state['api'] 734 self._path_with_token = state['path_token'] 735 self._buffer = state['buffer'] 736 self._buffered = state['buffered'] 737 self._written = state['written'] 738 self._offset = state['offset'] 739 self.closed = state['closed'] 740 self._path = state['path'] 741 self.name = api_utils._unquote_filename(self._path) 742 743 def write(self, data): 744 """Write some bytes. 745 746 Args: 747 data: data to write. str. 748 749 Raises: 750 TypeError: if data is not of type str. 751 """ 752 self._check_open() 753 if not isinstance(data, str): 754 raise TypeError('Expected str but got %s.' % type(data)) 755 if not data: 756 return 757 self._buffer.append(data) 758 self._buffered += len(data) 759 self._offset += len(data) 760 if self._buffered >= self._flushsize: 761 self._flush() 762 763 def flush(self): 764 """Flush as much as possible to GCS. 765 766 GCS *requires* that all writes except for the final one align on 767 256KB boundaries. So the internal buffer may still have < 256KB bytes left 768 after flush. 769 """ 770 self._check_open() 771 self._flush(finish=False) 772 773 def tell(self): 774 """Return the total number of bytes passed to write() so far. 775 776 (There is no seek() method.) 777 """ 778 return self._offset 779 780 def close(self): 781 """Flush the buffer and finalize the file. 782 783 When this returns the new file is available for reading. 784 """ 785 if not self.closed: 786 self.closed = True 787 self._flush(finish=True) 788 self._buffer = None 789 790 def __enter__(self): 791 return self 792 793 def __exit__(self, atype, value, traceback): 794 self.close() 795 return False 796 797 def _flush(self, finish=False): 798 """Internal API to flush. 799 800 Buffer is flushed to GCS only when the total amount of buffered data is at 801 least self._blocksize, or to flush the final (incomplete) block of 802 the file with finish=True. 803 """ 804 while ((finish and self._buffered >= 0) or 805 (not finish and self._buffered >= self._blocksize)): 806 tmp_buffer = [] 807 tmp_buffer_len = 0 808 809 excess = 0 810 while self._buffer: 811 buf = self._buffer.popleft() 812 size = len(buf) 813 self._buffered -= size 814 tmp_buffer.append(buf) 815 tmp_buffer_len += size 816 if tmp_buffer_len >= self._maxrequestsize: 817 excess = tmp_buffer_len - self._maxrequestsize 818 break 819 if not finish and ( 820 tmp_buffer_len % self._blocksize + self._buffered < 821 self._blocksize): 822 excess = tmp_buffer_len % self._blocksize 823 break 824 825 if excess: 826 over = tmp_buffer.pop() 827 size = len(over) 828 assert size >= excess 829 tmp_buffer_len -= size 830 head, tail = over[:-excess], over[-excess:] 831 self._buffer.appendleft(tail) 832 self._buffered += len(tail) 833 if head: 834 tmp_buffer.append(head) 835 tmp_buffer_len += len(head) 836 837 data = ''.join(tmp_buffer) 838 file_len = '*' 839 if finish and not self._buffered: 840 file_len = self._written + len(data) 841 self._send_data(data, self._written, file_len) 842 self._written += len(data) 843 if file_len != '*': 844 break 845 846 def _send_data(self, data, start_offset, file_len): 847 """Send the block to the storage service. 848 849 This is a utility method that does not modify self. 850 851 Args: 852 data: data to send in str. 853 start_offset: start offset of the data in relation to the file. 854 file_len: an int if this is the last data to append to the file. 855 Otherwise '*'. 856 """ 857 headers = {} 858 end_offset = start_offset + len(data) - 1 859 860 if data: 861 headers['content-range'] = ('bytes %d-%d/%s' % 862 (start_offset, end_offset, file_len)) 863 else: 864 headers['content-range'] = ('bytes */%s' % file_len) 865 866 status, response_headers, content = self._api.put_object( 867 self._path_with_token, payload=data, headers=headers) 868 if file_len == '*': 869 expected = 308 870 else: 871 expected = 200 872 errors.check_status(status, [expected], self._path, headers, 873 response_headers, content, 874 {'upload_path': self._path_with_token}) 875 876 def _get_offset_from_gcs(self): 877 """Get the last offset that has been written to GCS. 878 879 This is a utility method that does not modify self. 880 881 Returns: 882 an int of the last offset written to GCS by this upload, inclusive. 883 -1 means nothing has been written. 884 """ 885 headers = {'content-range': 'bytes */*'} 886 status, response_headers, content = self._api.put_object( 887 self._path_with_token, headers=headers) 888 errors.check_status(status, [308], self._path, headers, 889 response_headers, content, 890 {'upload_path': self._path_with_token}) 891 val = response_headers.get('range') 892 if val is None: 893 return -1 894 _, offset = val.rsplit('-', 1) 895 return int(offset) 896 897 def _force_close(self, file_length=None): 898 """Close this buffer on file_length. 899 900 Finalize this upload immediately on file_length. 901 Contents that are still in memory will not be uploaded. 902 903 This is a utility method that does not modify self. 904 905 Args: 906 file_length: file length. Must match what has been uploaded. If None, 907 it will be queried from GCS. 908 """ 909 if file_length is None: 910 file_length = self._get_offset_from_gcs() + 1 911 self._send_data('', 0, file_length) 912 913 def _check_open(self): 914 if self.closed: 915 raise IOError('Buffer is closed.') 916 917 def seekable(self): 918 return False 919 920 def readable(self): 921 return False 922 923 def writable(self): 924 return True