github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/clients/python-wrapper/lakefs/object.py (about) 1 """ 2 Module containing lakeFS reference implementation 3 """ 4 5 from __future__ import annotations 6 7 import base64 8 import binascii 9 import io 10 import json 11 import os 12 import tempfile 13 import urllib.parse 14 from abc import abstractmethod 15 from typing import AnyStr, IO, Iterator, List, Literal, Optional, Union, get_args 16 17 import lakefs_sdk 18 from lakefs_sdk import StagingMetadata 19 20 from lakefs.client import Client, _BaseLakeFSObject 21 from lakefs.exceptions import ( 22 api_exception_handler, 23 handle_http_error, 24 LakeFSException, 25 NotFoundException, 26 ObjectNotFoundException, 27 NotAuthorizedException, 28 ForbiddenException, 29 PermissionException, 30 ObjectExistsException, 31 InvalidRangeException, 32 ) 33 from lakefs.models import ObjectInfo 34 35 _LAKEFS_METADATA_PREFIX = "x-lakefs-meta-" 36 # _BUFFER_SIZE - Writer buffer size. While buffer size not exceed, data will be maintained in memory and file will 37 # not be created. 38 _WRITER_BUFFER_SIZE = 32 * 1024 * 1024 39 40 ReadModes = Literal['r', 'rb'] 41 WriteModes = Literal['x', 'xb', 'w', 'wb'] 42 AllModes = Union[ReadModes, WriteModes] 43 44 45 class LakeFSIOBase(_BaseLakeFSObject, IO): 46 """ 47 Base class for the lakeFS Reader and Writer classes 48 """ 49 _obj: StoredObject 50 _mode: AllModes 51 _pos: int 52 _pre_sign: Optional[bool] = None 53 54 def __init__(self, obj: StoredObject, mode: AllModes, pre_sign: Optional[bool] = None, 55 client: Optional[Client] = None) -> None: 56 self._obj = obj 57 self._mode = mode 58 self._pos = 0 59 super().__init__(client) 60 # must be set after super().__init__ to ensure the client is properly initialized. 61 self._pre_sign = pre_sign if pre_sign is not None else self._client.storage_config.pre_sign_support 62 63 @property 64 def mode(self) -> str: 65 """ 66 Returns the open mode for this object 67 """ 68 return self._mode 69 70 @property 71 def name(self) -> str: 72 """ 73 Returns the name of the object relative to the repo and reference 74 """ 75 return self._obj.path 76 77 def close(self) -> None: 78 """ 79 Finalizes any existing operations on object and close open descriptors 80 Inheriting classes need to override close() as needed. 81 """ 82 83 @abstractmethod 84 def _abort(self) -> None: 85 """ 86 Specific implementation for when the IO object should be discarded 87 """ 88 raise NotImplementedError 89 90 def fileno(self) -> int: 91 """ 92 The file descriptor number as defined by the operating system. In the context of lakeFS it has no meaning 93 94 :raise io.UnsupportedOperation: Always, since fileno is not supported for lakeFS objects 95 """ 96 raise io.UnsupportedOperation 97 98 @abstractmethod 99 def flush(self) -> None: 100 raise NotImplementedError 101 102 def isatty(self) -> bool: 103 """ 104 Irrelevant for the lakeFS implementation 105 """ 106 return False 107 108 @abstractmethod 109 def readable(self) -> bool: 110 raise NotImplementedError 111 112 def readline(self, limit: int = -1): 113 """ 114 Must be explicitly implemented by inheriting class 115 """ 116 raise io.UnsupportedOperation 117 118 def readlines(self, hint: int = -1): 119 """ 120 Must be explicitly implemented by inheriting class 121 """ 122 raise io.UnsupportedOperation 123 124 @abstractmethod 125 def seekable(self) -> bool: 126 raise NotImplementedError 127 128 def truncate(self, size: int = None) -> int: 129 """ 130 Unsupported by lakeFS implementation 131 """ 132 raise io.UnsupportedOperation 133 134 @abstractmethod 135 def writable(self) -> bool: 136 raise NotImplementedError 137 138 @abstractmethod 139 def write(self, s: AnyStr) -> int: 140 raise NotImplementedError 141 142 def writelines(self, lines: List[AnyStr]) -> None: 143 """ 144 Unsupported by lakeFS implementation 145 """ 146 raise io.UnsupportedOperation 147 148 def __next__(self) -> AnyStr: 149 line = self.readline() 150 if len(line) == 0: 151 raise StopIteration 152 return line 153 154 def __iter__(self) -> Iterator[AnyStr]: 155 return self 156 157 def __enter__(self) -> LakeFSIOBase: 158 return self 159 160 def __exit__(self, typ, value, traceback) -> bool: 161 if typ is None: # Perform logic in case no exception was raised 162 self.close() 163 164 else: 165 self._abort() # perform logic regardless of exception 166 167 return False # Don't suppress an exception 168 169 @abstractmethod 170 def seek(self, offset: int, whence: int = 0) -> int: 171 raise NotImplementedError 172 173 @abstractmethod 174 def read(self, n: int = None) -> str | bytes: 175 raise NotImplementedError 176 177 def tell(self) -> int: 178 """ 179 For readers - read position, for writers can be used as an indication for bytes written 180 """ 181 return self._pos 182 183 184 class ObjectReader(LakeFSIOBase): 185 """ 186 ObjectReader provides read-only functionality for lakeFS objects with IO semantics. 187 This Object is instantiated and returned for immutable reference types (Commit, Tag...) 188 """ 189 _readlines_buf: io.BytesIO 190 191 def __init__(self, obj: StoredObject, mode: ReadModes, pre_sign: Optional[bool] = None, 192 client: Optional[Client] = None) -> None: 193 if mode not in get_args(ReadModes): 194 raise ValueError(f"invalid read mode: '{mode}'. ReadModes: {ReadModes}") 195 196 super().__init__(obj, mode, pre_sign, client) 197 self._readlines_buf = io.BytesIO(b"") 198 self._is_closed = False 199 200 @property 201 def pre_sign(self): 202 """ 203 Returns whether the pre_sign mode is enabled 204 """ 205 if self._pre_sign is None: 206 self._pre_sign = self._client.storage_config.pre_sign_support 207 return self._pre_sign 208 209 @pre_sign.setter 210 def pre_sign(self, value: bool) -> None: 211 """ 212 Set the pre_sign mode to value 213 214 :param value: The new value for pre_sign mode 215 """ 216 self._pre_sign = value 217 218 @property 219 def closed(self) -> bool: 220 """ 221 Returns True after the object is closed 222 """ 223 return self._is_closed 224 225 def readable(self) -> bool: 226 """ 227 Returns True always 228 """ 229 return True 230 231 def write(self, s: AnyStr) -> int: 232 """ 233 Unsupported for reader object 234 """ 235 raise io.UnsupportedOperation 236 237 def seekable(self) -> bool: 238 """ 239 Returns True always 240 """ 241 return True 242 243 def writable(self) -> bool: 244 """ 245 Unsupported - read only object 246 """ 247 return False 248 249 def seek(self, offset: int, whence: int = 0) -> int: 250 """ 251 Move the object's reading position 252 253 :param offset: The offset from the beginning of the file 254 :param whence: Optional. The whence argument is optional and defaults to 255 os.SEEK_SET or 0 (absolute file positioning); 256 other values are os.SEEK_CUR or 1 (seek relative to the current position) and os.SEEK_END or 2 257 (seek relative to the file’s end) 258 :raise ValueError: if reader is closed 259 :raise OSError: if calculated new position is negative 260 :raise io.UnsupportedOperation: If whence value is unsupported 261 """ 262 if self._is_closed: 263 raise ValueError("I/O operation on closed file") 264 265 if whence == os.SEEK_SET: 266 pos = offset 267 elif whence == os.SEEK_CUR: 268 pos = self._pos + offset 269 elif whence == os.SEEK_END: 270 size = self._obj.stat().size_bytes # Seek end requires us to know the size of the file 271 pos = size + offset 272 else: 273 raise io.UnsupportedOperation(f"whence={whence} is not supported") 274 275 if pos < 0: 276 raise OSError("position must be a non-negative integer") 277 self._pos = pos 278 return pos 279 280 def _cast_by_mode(self, retval): 281 if 'b' not in self.mode: 282 return retval.decode('utf-8') 283 return retval 284 285 def _read(self, read_range: str) -> str | bytes: 286 try: 287 with api_exception_handler(_io_exception_handler): 288 return self._client.sdk_client.objects_api.get_object(self._obj.repo, 289 self._obj.ref, 290 self._obj.path, 291 range=read_range, 292 presign=self.pre_sign) 293 294 except InvalidRangeException: 295 # This is done in order to behave like the built-in open() function 296 return b'' 297 298 def read(self, n: int = None) -> str | bytes: 299 """ 300 Read object data 301 302 :param n: How many bytes to read. If read_bytes is None, will read from current position to end. 303 If current position + read_bytes > object size. 304 :return: The bytes read 305 :raise ValueError: if reader is closed 306 :raise OSError: if read_bytes is non-positive 307 :raise ObjectNotFoundException: if repository id, reference id or object path does not exist 308 :raise PermissionException: if user is not authorized to perform this operation, or operation is forbidden 309 :raise ServerException: for any other errors 310 """ 311 if self._is_closed: 312 raise ValueError("I/O operation on closed file") 313 314 if n and n <= 0: 315 raise OSError("read_bytes must be a positive integer") 316 317 read_range = self._get_range_string(start=self._pos, read_bytes=n) 318 contents = self._read(read_range) 319 self._pos += len(contents) # Update pointer position 320 321 return self._cast_by_mode(contents) 322 323 def readline(self, limit: int = -1): 324 """ 325 Read and return a line from the stream. 326 327 :param limit: If limit > -1 returns at most limit bytes 328 :raise ValueError: if reader is closed 329 :raise ObjectNotFoundException: if repository id, reference id or object path does not exist 330 :raise PermissionException: if user is not authorized to perform this operation, or operation is forbidden 331 :raise ServerException: for any other errors 332 """ 333 if self._is_closed: 334 raise ValueError("I/O operation on closed file") 335 336 if self._readlines_buf.getbuffer().nbytes == 0: 337 self._readlines_buf = io.BytesIO(self._read(self._get_range_string(0))) 338 self._readlines_buf.seek(self._pos) 339 line = self._readlines_buf.readline(limit) 340 self._pos = self._readlines_buf.tell() 341 return self._cast_by_mode(line) 342 343 def flush(self) -> None: 344 """ 345 Nothing to do for reader 346 347 :raise ValueError: if reader is closed 348 """ 349 if self._is_closed: 350 raise ValueError("I/O operation on closed file") 351 352 def close(self) -> None: 353 """ 354 Close open descriptors 355 """ 356 if self._is_closed: 357 return 358 359 self._is_closed = True 360 self._readlines_buf.close() 361 362 def _abort(self): 363 """ 364 Closes reader 365 """ 366 self.close() 367 368 @staticmethod 369 def _get_range_string(start, read_bytes=None): 370 if start == 0 and read_bytes is None: 371 return None 372 if read_bytes is None: 373 return f"bytes={start}-" 374 return f"bytes={start}-{start + read_bytes - 1}" 375 376 def __str__(self): 377 return self._obj.path 378 379 def __repr__(self): 380 return f'ObjectReader(path="{self._obj.path}")' 381 382 383 class ObjectWriter(LakeFSIOBase): 384 """ 385 ObjectWriter provides write-only functionality for lakeFS objects with IO semantics. 386 This Object is instantiated and returned from the WriteableObject writer method. 387 For the data to be actually written to the lakeFS server the close() method must be invoked explicitly or 388 implicitly when using writer as a context. 389 """ 390 _fd: tempfile.SpooledTemporaryFile 391 _obj_stats: ObjectInfo = None 392 393 def __init__(self, 394 obj: StoredObject, 395 mode: WriteModes, 396 pre_sign: Optional[bool] = None, 397 content_type: Optional[str] = None, 398 metadata: Optional[dict[str, str]] = None, 399 client: Optional[Client] = None) -> None: 400 401 if 'x' in mode and obj.exists(): # Requires explicit create 402 raise ObjectExistsException 403 404 if mode not in get_args(WriteModes): 405 raise ValueError(f"invalid write mode: '{mode}'. WriteModes: {WriteModes}") 406 407 self.content_type = content_type 408 self.metadata = metadata 409 410 open_kwargs = { 411 "encoding": "utf-8" if 'b' not in mode else None, 412 "mode": 'wb+' if 'b' in mode else 'w+', 413 "max_size": _WRITER_BUFFER_SIZE, 414 } 415 self._fd = tempfile.SpooledTemporaryFile(**open_kwargs) # pylint: disable=consider-using-with 416 super().__init__(obj, mode, pre_sign, client) 417 418 @property 419 def pre_sign(self) -> bool: 420 """ 421 Returns whether the pre_sign mode is enabled 422 """ 423 if self._pre_sign is None: 424 self._pre_sign = self._client.storage_config.pre_sign_support 425 return self._pre_sign 426 427 @pre_sign.setter 428 def pre_sign(self, value: bool) -> None: 429 """ 430 Set the pre_sign mode to value 431 432 :param value: The new value for pre_sign mode 433 """ 434 self._pre_sign = value 435 436 @property 437 def closed(self) -> bool: 438 """ 439 Returns True after the object is closed 440 """ 441 return self._fd.closed 442 443 def flush(self) -> None: 444 """ 445 Flush buffer to file. Prevent flush if total write size is still smaller than _BUFFER_SIZE so that we avoid 446 unnecessary write to disk. 447 448 :raise ValueError: if writer is closed 449 """ 450 451 if self._fd.closed: 452 raise ValueError("I/O operation on closed file") 453 454 # Don't flush buffer to file if we didn't exceed buffer size 455 # We want to avoid using the file if possible 456 if self._pos > _WRITER_BUFFER_SIZE: 457 self._fd.flush() 458 459 def write(self, s: AnyStr) -> int: 460 """ 461 Write data to buffer 462 463 :param s: The data to write 464 :return: The number of bytes written to buffer 465 :raise ValueError: if writer is closed 466 """ 467 binary_mode = 'b' in self._mode 468 if binary_mode and isinstance(s, str): 469 contents = s.encode('utf-8') 470 elif not binary_mode and isinstance(s, bytes): 471 contents = s.decode('utf-8') 472 else: 473 contents = s 474 475 count = self._fd.write(contents) 476 self._pos += count 477 return count 478 479 def discard(self) -> None: 480 """ 481 Discards of the write buffer and closes writer 482 """ 483 self._abort() 484 485 def close(self) -> None: 486 """ 487 Write the data to the lakeFS server 488 """ 489 if self._fd.closed: 490 return 491 492 stats = self._upload_presign() if self.pre_sign else self._upload_raw() 493 self._obj_stats = ObjectInfo(**stats.dict()) 494 self._fd.close() 495 496 def _abort(self) -> None: 497 """ 498 Close open descriptors but create nothing on lakeFS. 499 """ 500 if not self._fd.closed: 501 self._fd.close() 502 503 @staticmethod 504 def _extract_etag_from_response(headers) -> str: 505 # prefer Content-MD5 if exists 506 content_md5 = headers.get("Content-MD5") 507 if content_md5 is not None and len(content_md5) > 0: 508 try: # decode base64, return as hex 509 decode_md5 = base64.b64decode(content_md5) 510 return binascii.hexlify(decode_md5).decode("utf-8") 511 except binascii.Error: 512 pass 513 514 # fallback to ETag 515 etag = headers.get("ETag", "").strip(' "') 516 return etag 517 518 def _upload_raw(self) -> lakefs_sdk.ObjectStats: 519 """ 520 Use raw upload API call to bypass validation of content parameter 521 """ 522 auth_settings = ['basic_auth', 'cookie_auth', 'oidc_auth', 'saml_auth', 'jwt_token'] 523 headers = { 524 "Accept": "application/json", 525 "Content-Type": self.content_type if self.content_type is not None else "application/octet-stream" 526 } 527 528 # Create user metadata headers 529 if self.metadata is not None: 530 for k, v in self.metadata.items(): 531 headers[_LAKEFS_METADATA_PREFIX + k] = v 532 533 self._fd.seek(0) 534 resource_path = urllib.parse.quote(f"/repositories/{self._obj.repo}/branches/{self._obj.ref}/objects", 535 encoding="utf-8") 536 query_params = urllib.parse.urlencode({"path": self._obj.path}, encoding="utf-8") 537 url = self._client.config.host + resource_path + f"?{query_params}" 538 self._client.sdk_client.objects_api.api_client.update_params_for_auth(headers, None, auth_settings, 539 resource_path, "POST", self._fd) 540 resp = self._client.sdk_client.objects_api.api_client.rest_client.pool_manager.request(url=url, 541 method="POST", 542 headers=headers, 543 body=self._fd) 544 545 handle_http_error(resp) 546 return lakefs_sdk.ObjectStats(**json.loads(resp.data)) 547 548 def _upload_presign(self) -> lakefs_sdk.ObjectStats: 549 staging_location = self._client.sdk_client.staging_api.get_physical_address(self._obj.repo, 550 self._obj.ref, 551 self._obj.path, 552 True) 553 url = staging_location.presigned_url 554 555 headers = {"Content-Length": self._pos} 556 if self.content_type: 557 headers["Content-Type"] = self.content_type 558 if self._client.storage_config.blockstore_type == "azure": 559 headers["x-ms-blob-type"] = "BlockBlob" 560 561 self._fd.seek(0) 562 resp = self._client.sdk_client.staging_api.api_client.rest_client.pool_manager.request(method="PUT", 563 url=url, 564 body=self._fd, 565 headers=headers) 566 handle_http_error(resp) 567 568 etag = ObjectWriter._extract_etag_from_response(resp.headers) 569 size_bytes = self._pos 570 staging_metadata = StagingMetadata(staging=staging_location, 571 size_bytes=size_bytes, 572 checksum=etag, 573 user_metadata=self.metadata, 574 content_type=self.content_type) 575 return self._client.sdk_client.staging_api.link_physical_address(self._obj.repo, 576 self._obj.ref, 577 self._obj.path, 578 staging_metadata=staging_metadata) 579 580 def readable(self) -> bool: 581 """ 582 ObjectWriter is write-only - return False always 583 """ 584 return False 585 586 def seekable(self) -> bool: 587 """ 588 ObjectWriter is not seekable. Returns False always 589 """ 590 return False 591 592 def writable(self) -> bool: 593 """ 594 Returns True always 595 """ 596 return True 597 598 def seek(self, offset: int, whence: int = 0) -> int: 599 """ 600 Unsupported for writer class 601 """ 602 raise io.UnsupportedOperation 603 604 def read(self, n: int = None) -> str | bytes: 605 """ 606 Unsupported for writer class 607 """ 608 raise io.UnsupportedOperation 609 610 def __repr__(self): 611 return f'ObjectWriter(path="{self._obj.path}")' 612 613 614 class StoredObject(_BaseLakeFSObject): 615 """ 616 Class representing an object in lakeFS. 617 """ 618 _repo_id: str 619 _ref_id: str 620 _path: str 621 _stats: Optional[ObjectInfo] = None 622 623 def __init__(self, repository_id: str, reference_id: str, path: str, client: Optional[Client] = None): 624 self._repo_id = repository_id 625 self._ref_id = reference_id 626 self._path = path 627 super().__init__(client) 628 629 def __str__(self) -> str: 630 return self.path 631 632 def __repr__(self): 633 return f'StoredObject(repository="{self.repo}", reference="{self.ref}", path="{self.path}")' 634 635 @property 636 def repo(self) -> str: 637 """ 638 Returns the object's repository id 639 """ 640 return self._repo_id 641 642 @property 643 def ref(self) -> str: 644 """ 645 Returns the object's reference id 646 """ 647 return self._ref_id 648 649 @property 650 def path(self) -> str: 651 """ 652 Returns the object's path relative to repository and reference ids 653 """ 654 return self._path 655 656 def reader(self, mode: ReadModes = 'rb', pre_sign: Optional[bool] = None) -> ObjectReader: 657 """ 658 Context manager which provide a file-descriptor like object that allow reading the given object. 659 660 Usage Example: 661 662 .. code-block:: python 663 664 import lakefs 665 666 obj = lakefs.repository("<repository_name>").branch("<branch_name>").object("file.txt") 667 file_size = obj.stat().size_bytes 668 669 with obj.reader(mode='r', pre_sign=True) as fd: 670 # print every other 10 chars 671 while fd.tell() < file_size 672 print(fd.read(10)) 673 fd.seek(10, os.SEEK_CUR) 674 675 :param mode: Read mode - as supported by ReadModes 676 :param pre_sign: (Optional), enforce the pre_sign mode on the lakeFS server. If not set, will probe server for 677 information. 678 :return: A Reader object 679 """ 680 return ObjectReader(self, mode=mode, pre_sign=pre_sign, client=self._client) 681 682 def stat(self) -> ObjectInfo: 683 """ 684 Return the Stat object representing this object 685 """ 686 if self._stats is None: 687 with api_exception_handler(_io_exception_handler): 688 stat = self._client.sdk_client.objects_api.stat_object(self._repo_id, self._ref_id, self._path) 689 self._stats = ObjectInfo(**stat.dict()) 690 return self._stats 691 692 def exists(self) -> bool: 693 """ 694 Returns True if object exists in lakeFS, False otherwise 695 """ 696 697 exists = False 698 699 def exist_handler(e: LakeFSException): 700 if isinstance(e, NotFoundException): 701 return None # exists = False 702 return _io_exception_handler(e) 703 704 with api_exception_handler(exist_handler): 705 self._client.sdk_client.objects_api.head_object(self._repo_id, self._ref_id, self._path) 706 exists = True 707 708 return exists 709 710 def copy(self, destination_branch_id: str, destination_path: str) -> WriteableObject: 711 """ 712 Copy the object to a destination branch 713 714 :param destination_branch_id: The destination branch to copy the object to 715 :param destination_path: The path of the copied object in the destination branch 716 :return: The newly copied Object 717 :raise ObjectNotFoundException: if repo id,reference id, destination branch id or object path does not exist 718 :raise PermissionException: if user is not authorized to perform this operation, or operation is forbidden 719 :raise ServerException: for any other errors 720 """ 721 722 with api_exception_handler(): 723 object_copy_creation = lakefs_sdk.ObjectCopyCreation(src_ref=self._ref_id, src_path=self._path) 724 self._client.sdk_client.objects_api.copy_object(repository=self._repo_id, 725 branch=destination_branch_id, 726 dest_path=destination_path, 727 object_copy_creation=object_copy_creation) 728 729 return WriteableObject(repository_id=self._repo_id, reference_id=destination_branch_id, path=destination_path, 730 client=self._client) 731 732 733 class WriteableObject(StoredObject): 734 """ 735 WriteableObject inherits from ReadableObject and provides read/write functionality for lakeFS objects 736 using IO semantics. 737 This Object is instantiated and returned upon invoking writer() on Branch reference type. 738 """ 739 740 def __init__(self, repository_id: str, reference_id: str, path: str, 741 client: Optional[Client] = None) -> None: 742 super().__init__(repository_id, reference_id, path, client=client) 743 744 def __repr__(self): 745 return f'WriteableObject(repository="{self.repo}", reference="{self.ref}", path="{self.path}")' 746 747 def upload(self, 748 data: str | bytes, 749 mode: WriteModes = 'w', 750 pre_sign: Optional[bool] = None, 751 content_type: Optional[str] = None, 752 metadata: Optional[dict[str, str]] = None) -> WriteableObject: 753 """ 754 Upload a new object or overwrites an existing object 755 756 :param data: The contents of the object to write (can be bytes or string) 757 :param mode: Write mode: 758 759 'x' - Open for exclusive creation 760 761 'xb' - Open for exclusive creation in binary mode 762 763 'w' - Create a new object or truncate if exists 764 765 'wb' - Create or truncate in binary mode 766 :param pre_sign: (Optional) Explicitly state whether to use pre_sign mode when uploading the object. 767 If None, will be taken from pre_sign property. 768 :param content_type: (Optional) Explicitly set the object Content-Type 769 :param metadata: (Optional) User metadata 770 :return: The Stat object representing the newly created object 771 :raise ObjectExistsException: if object exists and mode is exclusive ('x') 772 :raise ObjectNotFoundException: if repo id, reference id or object path does not exist 773 :raise PermissionException: if user is not authorized to perform this operation, or operation is forbidden 774 :raise ServerException: for any other errors 775 """ 776 with ObjectWriter(self, mode, pre_sign, content_type, metadata, self._client) as writer: 777 writer.write(data) 778 779 return self 780 781 def delete(self) -> None: 782 """ 783 Delete object from lakeFS 784 785 :raise ObjectNotFoundException: if repo id, reference id or object path does not exist 786 :raise PermissionException: if user is not authorized to perform this operation, or operation is forbidden 787 :raise ServerException: for any other errors 788 """ 789 with api_exception_handler(_io_exception_handler): 790 self._client.sdk_client.objects_api.delete_object(self._repo_id, self._ref_id, self._path) 791 self._stats = None 792 793 def writer(self, 794 mode: WriteModes = 'wb', 795 pre_sign: Optional[bool] = None, 796 content_type: Optional[str] = None, 797 metadata: Optional[dict[str, str]] = None) -> ObjectWriter: 798 """ 799 Context manager which provide a file-descriptor like object that allow writing the given object to lakeFS 800 The writes are saved in a buffer as long as the writer is open. Only when it closes it writes the data into 801 lakeFS. The optional parameters can be modified by accessing the respective fields as long as the writer is 802 still open. 803 804 Usage example of reading a file from local file system and writing it to lakeFS: 805 806 .. code-block:: python 807 808 import lakefs 809 810 obj = lakefs.repository("<repository_name>").branch("<branch_name>").object("my_image") 811 812 with open("my_local_image", mode='rb') as reader, obj.writer("wb") as writer: 813 writer.write(reader.read()) 814 815 :param mode: Write mode - as supported by WriteModes 816 :param pre_sign: (Optional), enforce the pre_sign mode on the lakeFS server. If not set, will probe server for 817 information. 818 :param content_type: (Optional) Specify the data media type 819 :param metadata: (Optional) User defined metadata to save on the object 820 :return: A Writer object 821 """ 822 return ObjectWriter(self, 823 mode=mode, 824 pre_sign=pre_sign, 825 content_type=content_type, 826 metadata=metadata, 827 client=self._client) 828 829 830 def _io_exception_handler(e: LakeFSException): 831 if isinstance(e, NotFoundException): 832 return ObjectNotFoundException(e.status_code, e.reason) 833 if isinstance(e, (NotAuthorizedException, ForbiddenException)): 834 return PermissionException(e.status_code, e.reason) 835 return e