github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/clients/python-wrapper/lakefs/object.py (about)

     1  """
     2  Module containing lakeFS reference implementation
     3  """
     4  
     5  from __future__ import annotations
     6  
     7  import base64
     8  import binascii
     9  import io
    10  import json
    11  import os
    12  import tempfile
    13  import urllib.parse
    14  from abc import abstractmethod
    15  from typing import AnyStr, IO, Iterator, List, Literal, Optional, Union, get_args
    16  
    17  import lakefs_sdk
    18  from lakefs_sdk import StagingMetadata
    19  
    20  from lakefs.client import Client, _BaseLakeFSObject
    21  from lakefs.exceptions import (
    22      api_exception_handler,
    23      handle_http_error,
    24      LakeFSException,
    25      NotFoundException,
    26      ObjectNotFoundException,
    27      NotAuthorizedException,
    28      ForbiddenException,
    29      PermissionException,
    30      ObjectExistsException,
    31      InvalidRangeException,
    32  )
    33  from lakefs.models import ObjectInfo
    34  
    35  _LAKEFS_METADATA_PREFIX = "x-lakefs-meta-"
    36  # _BUFFER_SIZE - Writer buffer size. While buffer size not exceed, data will be maintained in memory and file will
    37  #                not be created.
    38  _WRITER_BUFFER_SIZE = 32 * 1024 * 1024
    39  
    40  ReadModes = Literal['r', 'rb']
    41  WriteModes = Literal['x', 'xb', 'w', 'wb']
    42  AllModes = Union[ReadModes, WriteModes]
    43  
    44  
    45  class LakeFSIOBase(_BaseLakeFSObject, IO):
    46      """
    47      Base class for the lakeFS Reader and Writer classes
    48      """
    49      _obj: StoredObject
    50      _mode: AllModes
    51      _pos: int
    52      _pre_sign: Optional[bool] = None
    53  
    54      def __init__(self, obj: StoredObject, mode: AllModes, pre_sign: Optional[bool] = None,
    55                   client: Optional[Client] = None) -> None:
    56          self._obj = obj
    57          self._mode = mode
    58          self._pos = 0
    59          super().__init__(client)
    60          # must be set after super().__init__ to ensure the client is properly initialized.
    61          self._pre_sign = pre_sign if pre_sign is not None else self._client.storage_config.pre_sign_support
    62  
    63      @property
    64      def mode(self) -> str:
    65          """
    66          Returns the open mode for this object
    67          """
    68          return self._mode
    69  
    70      @property
    71      def name(self) -> str:
    72          """
    73          Returns the name of the object relative to the repo and reference
    74          """
    75          return self._obj.path
    76  
    77      def close(self) -> None:
    78          """
    79          Finalizes any existing operations on object and close open descriptors
    80          Inheriting classes need to override close() as needed.
    81          """
    82  
    83      @abstractmethod
    84      def _abort(self) -> None:
    85          """
    86          Specific implementation for when the IO object should be discarded
    87          """
    88          raise NotImplementedError
    89  
    90      def fileno(self) -> int:
    91          """
    92          The file descriptor number as defined by the operating system. In the context of lakeFS it has no meaning
    93  
    94          :raise io.UnsupportedOperation: Always, since fileno is not supported for lakeFS objects
    95          """
    96          raise io.UnsupportedOperation
    97  
    98      @abstractmethod
    99      def flush(self) -> None:
   100          raise NotImplementedError
   101  
   102      def isatty(self) -> bool:
   103          """
   104          Irrelevant for the lakeFS implementation
   105          """
   106          return False
   107  
   108      @abstractmethod
   109      def readable(self) -> bool:
   110          raise NotImplementedError
   111  
   112      def readline(self, limit: int = -1):
   113          """
   114          Must be explicitly implemented by inheriting class
   115          """
   116          raise io.UnsupportedOperation
   117  
   118      def readlines(self, hint: int = -1):
   119          """
   120          Must be explicitly implemented by inheriting class
   121          """
   122          raise io.UnsupportedOperation
   123  
   124      @abstractmethod
   125      def seekable(self) -> bool:
   126          raise NotImplementedError
   127  
   128      def truncate(self, size: int = None) -> int:
   129          """
   130          Unsupported by lakeFS implementation
   131          """
   132          raise io.UnsupportedOperation
   133  
   134      @abstractmethod
   135      def writable(self) -> bool:
   136          raise NotImplementedError
   137  
   138      @abstractmethod
   139      def write(self, s: AnyStr) -> int:
   140          raise NotImplementedError
   141  
   142      def writelines(self, lines: List[AnyStr]) -> None:
   143          """
   144          Unsupported by lakeFS implementation
   145          """
   146          raise io.UnsupportedOperation
   147  
   148      def __next__(self) -> AnyStr:
   149          line = self.readline()
   150          if len(line) == 0:
   151              raise StopIteration
   152          return line
   153  
   154      def __iter__(self) -> Iterator[AnyStr]:
   155          return self
   156  
   157      def __enter__(self) -> LakeFSIOBase:
   158          return self
   159  
   160      def __exit__(self, typ, value, traceback) -> bool:
   161          if typ is None:  # Perform logic in case no exception was raised
   162              self.close()
   163  
   164          else:
   165              self._abort()  # perform logic regardless of exception
   166  
   167          return False  # Don't suppress an exception
   168  
   169      @abstractmethod
   170      def seek(self, offset: int, whence: int = 0) -> int:
   171          raise NotImplementedError
   172  
   173      @abstractmethod
   174      def read(self, n: int = None) -> str | bytes:
   175          raise NotImplementedError
   176  
   177      def tell(self) -> int:
   178          """
   179          For readers - read position, for writers can be used as an indication for bytes written
   180          """
   181          return self._pos
   182  
   183  
   184  class ObjectReader(LakeFSIOBase):
   185      """
   186      ObjectReader provides read-only functionality for lakeFS objects with IO semantics.
   187      This Object is instantiated and returned for immutable reference types (Commit, Tag...)
   188      """
   189      _readlines_buf: io.BytesIO
   190  
   191      def __init__(self, obj: StoredObject, mode: ReadModes, pre_sign: Optional[bool] = None,
   192                   client: Optional[Client] = None) -> None:
   193          if mode not in get_args(ReadModes):
   194              raise ValueError(f"invalid read mode: '{mode}'. ReadModes: {ReadModes}")
   195  
   196          super().__init__(obj, mode, pre_sign, client)
   197          self._readlines_buf = io.BytesIO(b"")
   198          self._is_closed = False
   199  
   200      @property
   201      def pre_sign(self):
   202          """
   203          Returns whether the pre_sign mode is enabled
   204          """
   205          if self._pre_sign is None:
   206              self._pre_sign = self._client.storage_config.pre_sign_support
   207          return self._pre_sign
   208  
   209      @pre_sign.setter
   210      def pre_sign(self, value: bool) -> None:
   211          """
   212          Set the pre_sign mode to value
   213  
   214          :param value: The new value for pre_sign mode
   215          """
   216          self._pre_sign = value
   217  
   218      @property
   219      def closed(self) -> bool:
   220          """
   221          Returns True after the object is closed
   222          """
   223          return self._is_closed
   224  
   225      def readable(self) -> bool:
   226          """
   227          Returns True always
   228          """
   229          return True
   230  
   231      def write(self, s: AnyStr) -> int:
   232          """
   233          Unsupported for reader object
   234          """
   235          raise io.UnsupportedOperation
   236  
   237      def seekable(self) -> bool:
   238          """
   239          Returns True always
   240          """
   241          return True
   242  
   243      def writable(self) -> bool:
   244          """
   245          Unsupported - read only object
   246          """
   247          return False
   248  
   249      def seek(self, offset: int, whence: int = 0) -> int:
   250          """
   251          Move the object's reading position
   252  
   253          :param offset: The offset from the beginning of the file
   254          :param whence: Optional. The whence argument is optional and defaults to
   255              os.SEEK_SET or 0 (absolute file positioning);
   256              other values are os.SEEK_CUR or 1 (seek relative to the current position) and os.SEEK_END or 2
   257              (seek relative to the file’s end)
   258          :raise ValueError: if reader is closed
   259          :raise OSError: if calculated new position is negative
   260          :raise io.UnsupportedOperation: If whence value is unsupported
   261          """
   262          if self._is_closed:
   263              raise ValueError("I/O operation on closed file")
   264  
   265          if whence == os.SEEK_SET:
   266              pos = offset
   267          elif whence == os.SEEK_CUR:
   268              pos = self._pos + offset
   269          elif whence == os.SEEK_END:
   270              size = self._obj.stat().size_bytes  # Seek end requires us to know the size of the file
   271              pos = size + offset
   272          else:
   273              raise io.UnsupportedOperation(f"whence={whence} is not supported")
   274  
   275          if pos < 0:
   276              raise OSError("position must be a non-negative integer")
   277          self._pos = pos
   278          return pos
   279  
   280      def _cast_by_mode(self, retval):
   281          if 'b' not in self.mode:
   282              return retval.decode('utf-8')
   283          return retval
   284  
   285      def _read(self, read_range: str) -> str | bytes:
   286          try:
   287              with api_exception_handler(_io_exception_handler):
   288                  return self._client.sdk_client.objects_api.get_object(self._obj.repo,
   289                                                                        self._obj.ref,
   290                                                                        self._obj.path,
   291                                                                        range=read_range,
   292                                                                        presign=self.pre_sign)
   293  
   294          except InvalidRangeException:
   295              # This is done in order to behave like the built-in open() function
   296              return b''
   297  
   298      def read(self, n: int = None) -> str | bytes:
   299          """
   300          Read object data
   301  
   302          :param n: How many bytes to read. If read_bytes is None, will read from current position to end.
   303              If current position + read_bytes > object size.
   304          :return: The bytes read
   305          :raise ValueError: if reader is closed
   306          :raise OSError: if read_bytes is non-positive
   307          :raise ObjectNotFoundException: if repository id, reference id or object path does not exist
   308          :raise PermissionException: if user is not authorized to perform this operation, or operation is forbidden
   309          :raise ServerException: for any other errors
   310          """
   311          if self._is_closed:
   312              raise ValueError("I/O operation on closed file")
   313  
   314          if n and n <= 0:
   315              raise OSError("read_bytes must be a positive integer")
   316  
   317          read_range = self._get_range_string(start=self._pos, read_bytes=n)
   318          contents = self._read(read_range)
   319          self._pos += len(contents)  # Update pointer position
   320  
   321          return self._cast_by_mode(contents)
   322  
   323      def readline(self, limit: int = -1):
   324          """
   325          Read and return a line from the stream.
   326  
   327          :param limit: If limit > -1 returns at most limit bytes
   328          :raise ValueError: if reader is closed
   329          :raise ObjectNotFoundException: if repository id, reference id or object path does not exist
   330          :raise PermissionException: if user is not authorized to perform this operation, or operation is forbidden
   331          :raise ServerException: for any other errors
   332          """
   333          if self._is_closed:
   334              raise ValueError("I/O operation on closed file")
   335  
   336          if self._readlines_buf.getbuffer().nbytes == 0:
   337              self._readlines_buf = io.BytesIO(self._read(self._get_range_string(0)))
   338          self._readlines_buf.seek(self._pos)
   339          line = self._readlines_buf.readline(limit)
   340          self._pos = self._readlines_buf.tell()
   341          return self._cast_by_mode(line)
   342  
   343      def flush(self) -> None:
   344          """
   345          Nothing to do for reader
   346  
   347          :raise ValueError: if reader is closed
   348          """
   349          if self._is_closed:
   350              raise ValueError("I/O operation on closed file")
   351  
   352      def close(self) -> None:
   353          """
   354          Close open descriptors
   355          """
   356          if self._is_closed:
   357              return
   358  
   359          self._is_closed = True
   360          self._readlines_buf.close()
   361  
   362      def _abort(self):
   363          """
   364          Closes reader
   365          """
   366          self.close()
   367  
   368      @staticmethod
   369      def _get_range_string(start, read_bytes=None):
   370          if start == 0 and read_bytes is None:
   371              return None
   372          if read_bytes is None:
   373              return f"bytes={start}-"
   374          return f"bytes={start}-{start + read_bytes - 1}"
   375  
   376      def __str__(self):
   377          return self._obj.path
   378  
   379      def __repr__(self):
   380          return f'ObjectReader(path="{self._obj.path}")'
   381  
   382  
   383  class ObjectWriter(LakeFSIOBase):
   384      """
   385      ObjectWriter provides write-only functionality for lakeFS objects with IO semantics.
   386      This Object is instantiated and returned from the WriteableObject writer method.
   387      For the data to be actually written to the lakeFS server the close() method must be invoked explicitly or
   388      implicitly when using writer as a context.
   389      """
   390      _fd: tempfile.SpooledTemporaryFile
   391      _obj_stats: ObjectInfo = None
   392  
   393      def __init__(self,
   394                   obj: StoredObject,
   395                   mode: WriteModes,
   396                   pre_sign: Optional[bool] = None,
   397                   content_type: Optional[str] = None,
   398                   metadata: Optional[dict[str, str]] = None,
   399                   client: Optional[Client] = None) -> None:
   400  
   401          if 'x' in mode and obj.exists():  # Requires explicit create
   402              raise ObjectExistsException
   403  
   404          if mode not in get_args(WriteModes):
   405              raise ValueError(f"invalid write mode: '{mode}'. WriteModes: {WriteModes}")
   406  
   407          self.content_type = content_type
   408          self.metadata = metadata
   409  
   410          open_kwargs = {
   411              "encoding": "utf-8" if 'b' not in mode else None,
   412              "mode": 'wb+' if 'b' in mode else 'w+',
   413              "max_size": _WRITER_BUFFER_SIZE,
   414          }
   415          self._fd = tempfile.SpooledTemporaryFile(**open_kwargs)  # pylint: disable=consider-using-with
   416          super().__init__(obj, mode, pre_sign, client)
   417  
   418      @property
   419      def pre_sign(self) -> bool:
   420          """
   421          Returns whether the pre_sign mode is enabled
   422          """
   423          if self._pre_sign is None:
   424              self._pre_sign = self._client.storage_config.pre_sign_support
   425          return self._pre_sign
   426  
   427      @pre_sign.setter
   428      def pre_sign(self, value: bool) -> None:
   429          """
   430          Set the pre_sign mode to value
   431  
   432          :param value: The new value for pre_sign mode
   433          """
   434          self._pre_sign = value
   435  
   436      @property
   437      def closed(self) -> bool:
   438          """
   439          Returns True after the object is closed
   440          """
   441          return self._fd.closed
   442  
   443      def flush(self) -> None:
   444          """
   445          Flush buffer to file. Prevent flush if total write size is still smaller than _BUFFER_SIZE so that we avoid
   446          unnecessary write to disk.
   447  
   448          :raise ValueError: if writer is closed
   449          """
   450  
   451          if self._fd.closed:
   452              raise ValueError("I/O operation on closed file")
   453  
   454          # Don't flush buffer to file if we didn't exceed buffer size
   455          # We want to avoid using the file if possible
   456          if self._pos > _WRITER_BUFFER_SIZE:
   457              self._fd.flush()
   458  
   459      def write(self, s: AnyStr) -> int:
   460          """
   461          Write data to buffer
   462  
   463          :param s: The data to write
   464          :return: The number of bytes written to buffer
   465          :raise ValueError: if writer is closed
   466          """
   467          binary_mode = 'b' in self._mode
   468          if binary_mode and isinstance(s, str):
   469              contents = s.encode('utf-8')
   470          elif not binary_mode and isinstance(s, bytes):
   471              contents = s.decode('utf-8')
   472          else:
   473              contents = s
   474  
   475          count = self._fd.write(contents)
   476          self._pos += count
   477          return count
   478  
   479      def discard(self) -> None:
   480          """
   481          Discards of the write buffer and closes writer
   482          """
   483          self._abort()
   484  
   485      def close(self) -> None:
   486          """
   487          Write the data to the lakeFS server
   488          """
   489          if self._fd.closed:
   490              return
   491  
   492          stats = self._upload_presign() if self.pre_sign else self._upload_raw()
   493          self._obj_stats = ObjectInfo(**stats.dict())
   494          self._fd.close()
   495  
   496      def _abort(self) -> None:
   497          """
   498          Close open descriptors but create nothing on lakeFS.
   499          """
   500          if not self._fd.closed:
   501              self._fd.close()
   502  
   503      @staticmethod
   504      def _extract_etag_from_response(headers) -> str:
   505          # prefer Content-MD5 if exists
   506          content_md5 = headers.get("Content-MD5")
   507          if content_md5 is not None and len(content_md5) > 0:
   508              try:  # decode base64, return as hex
   509                  decode_md5 = base64.b64decode(content_md5)
   510                  return binascii.hexlify(decode_md5).decode("utf-8")
   511              except binascii.Error:
   512                  pass
   513  
   514          # fallback to ETag
   515          etag = headers.get("ETag", "").strip(' "')
   516          return etag
   517  
   518      def _upload_raw(self) -> lakefs_sdk.ObjectStats:
   519          """
   520          Use raw upload API call to bypass validation of content parameter
   521          """
   522          auth_settings = ['basic_auth', 'cookie_auth', 'oidc_auth', 'saml_auth', 'jwt_token']
   523          headers = {
   524              "Accept": "application/json",
   525              "Content-Type": self.content_type if self.content_type is not None else "application/octet-stream"
   526          }
   527  
   528          # Create user metadata headers
   529          if self.metadata is not None:
   530              for k, v in self.metadata.items():
   531                  headers[_LAKEFS_METADATA_PREFIX + k] = v
   532  
   533          self._fd.seek(0)
   534          resource_path = urllib.parse.quote(f"/repositories/{self._obj.repo}/branches/{self._obj.ref}/objects",
   535                                             encoding="utf-8")
   536          query_params = urllib.parse.urlencode({"path": self._obj.path}, encoding="utf-8")
   537          url = self._client.config.host + resource_path + f"?{query_params}"
   538          self._client.sdk_client.objects_api.api_client.update_params_for_auth(headers, None, auth_settings,
   539                                                                                resource_path, "POST", self._fd)
   540          resp = self._client.sdk_client.objects_api.api_client.rest_client.pool_manager.request(url=url,
   541                                                                                                 method="POST",
   542                                                                                                 headers=headers,
   543                                                                                                 body=self._fd)
   544  
   545          handle_http_error(resp)
   546          return lakefs_sdk.ObjectStats(**json.loads(resp.data))
   547  
   548      def _upload_presign(self) -> lakefs_sdk.ObjectStats:
   549          staging_location = self._client.sdk_client.staging_api.get_physical_address(self._obj.repo,
   550                                                                                      self._obj.ref,
   551                                                                                      self._obj.path,
   552                                                                                      True)
   553          url = staging_location.presigned_url
   554  
   555          headers = {"Content-Length": self._pos}
   556          if self.content_type:
   557              headers["Content-Type"] = self.content_type
   558          if self._client.storage_config.blockstore_type == "azure":
   559              headers["x-ms-blob-type"] = "BlockBlob"
   560  
   561          self._fd.seek(0)
   562          resp = self._client.sdk_client.staging_api.api_client.rest_client.pool_manager.request(method="PUT",
   563                                                                                                 url=url,
   564                                                                                                 body=self._fd,
   565                                                                                                 headers=headers)
   566          handle_http_error(resp)
   567  
   568          etag = ObjectWriter._extract_etag_from_response(resp.headers)
   569          size_bytes = self._pos
   570          staging_metadata = StagingMetadata(staging=staging_location,
   571                                             size_bytes=size_bytes,
   572                                             checksum=etag,
   573                                             user_metadata=self.metadata,
   574                                             content_type=self.content_type)
   575          return self._client.sdk_client.staging_api.link_physical_address(self._obj.repo,
   576                                                                           self._obj.ref,
   577                                                                           self._obj.path,
   578                                                                           staging_metadata=staging_metadata)
   579  
   580      def readable(self) -> bool:
   581          """
   582          ObjectWriter is write-only - return False always
   583          """
   584          return False
   585  
   586      def seekable(self) -> bool:
   587          """
   588          ObjectWriter is not seekable. Returns False always
   589          """
   590          return False
   591  
   592      def writable(self) -> bool:
   593          """
   594          Returns True always
   595          """
   596          return True
   597  
   598      def seek(self, offset: int, whence: int = 0) -> int:
   599          """
   600          Unsupported for writer class
   601          """
   602          raise io.UnsupportedOperation
   603  
   604      def read(self, n: int = None) -> str | bytes:
   605          """
   606          Unsupported for writer class
   607          """
   608          raise io.UnsupportedOperation
   609  
   610      def __repr__(self):
   611          return f'ObjectWriter(path="{self._obj.path}")'
   612  
   613  
   614  class StoredObject(_BaseLakeFSObject):
   615      """
   616      Class representing an object in lakeFS.
   617      """
   618      _repo_id: str
   619      _ref_id: str
   620      _path: str
   621      _stats: Optional[ObjectInfo] = None
   622  
   623      def __init__(self, repository_id: str, reference_id: str, path: str, client: Optional[Client] = None):
   624          self._repo_id = repository_id
   625          self._ref_id = reference_id
   626          self._path = path
   627          super().__init__(client)
   628  
   629      def __str__(self) -> str:
   630          return self.path
   631  
   632      def __repr__(self):
   633          return f'StoredObject(repository="{self.repo}", reference="{self.ref}", path="{self.path}")'
   634  
   635      @property
   636      def repo(self) -> str:
   637          """
   638          Returns the object's repository id
   639          """
   640          return self._repo_id
   641  
   642      @property
   643      def ref(self) -> str:
   644          """
   645          Returns the object's reference id
   646          """
   647          return self._ref_id
   648  
   649      @property
   650      def path(self) -> str:
   651          """
   652          Returns the object's path relative to repository and reference ids
   653          """
   654          return self._path
   655  
   656      def reader(self, mode: ReadModes = 'rb', pre_sign: Optional[bool] = None) -> ObjectReader:
   657          """
   658          Context manager which provide a file-descriptor like object that allow reading the given object.
   659  
   660          Usage Example:
   661  
   662          .. code-block:: python
   663  
   664              import lakefs
   665  
   666              obj = lakefs.repository("<repository_name>").branch("<branch_name>").object("file.txt")
   667              file_size = obj.stat().size_bytes
   668  
   669              with obj.reader(mode='r', pre_sign=True) as fd:
   670                  # print every other 10 chars
   671                  while fd.tell() < file_size
   672                      print(fd.read(10))
   673                      fd.seek(10, os.SEEK_CUR)
   674  
   675          :param mode: Read mode - as supported by ReadModes
   676          :param pre_sign: (Optional), enforce the pre_sign mode on the lakeFS server. If not set, will probe server for
   677              information.
   678          :return: A Reader object
   679          """
   680          return ObjectReader(self, mode=mode, pre_sign=pre_sign, client=self._client)
   681  
   682      def stat(self) -> ObjectInfo:
   683          """
   684          Return the Stat object representing this object
   685          """
   686          if self._stats is None:
   687              with api_exception_handler(_io_exception_handler):
   688                  stat = self._client.sdk_client.objects_api.stat_object(self._repo_id, self._ref_id, self._path)
   689                  self._stats = ObjectInfo(**stat.dict())
   690          return self._stats
   691  
   692      def exists(self) -> bool:
   693          """
   694          Returns True if object exists in lakeFS, False otherwise
   695          """
   696  
   697          exists = False
   698  
   699          def exist_handler(e: LakeFSException):
   700              if isinstance(e, NotFoundException):
   701                  return None  # exists = False
   702              return _io_exception_handler(e)
   703  
   704          with api_exception_handler(exist_handler):
   705              self._client.sdk_client.objects_api.head_object(self._repo_id, self._ref_id, self._path)
   706              exists = True
   707  
   708          return exists
   709  
   710      def copy(self, destination_branch_id: str, destination_path: str) -> WriteableObject:
   711          """
   712          Copy the object to a destination branch
   713  
   714          :param destination_branch_id: The destination branch to copy the object to
   715          :param destination_path: The path of the copied object in the destination branch
   716          :return: The newly copied Object
   717          :raise ObjectNotFoundException: if repo id,reference id, destination branch id or object path does not exist
   718          :raise PermissionException: if user is not authorized to perform this operation, or operation is forbidden
   719          :raise ServerException: for any other errors
   720          """
   721  
   722          with api_exception_handler():
   723              object_copy_creation = lakefs_sdk.ObjectCopyCreation(src_ref=self._ref_id, src_path=self._path)
   724              self._client.sdk_client.objects_api.copy_object(repository=self._repo_id,
   725                                                              branch=destination_branch_id,
   726                                                              dest_path=destination_path,
   727                                                              object_copy_creation=object_copy_creation)
   728  
   729          return WriteableObject(repository_id=self._repo_id, reference_id=destination_branch_id, path=destination_path,
   730                                 client=self._client)
   731  
   732  
   733  class WriteableObject(StoredObject):
   734      """
   735      WriteableObject inherits from ReadableObject and provides read/write functionality for lakeFS objects
   736      using IO semantics.
   737      This Object is instantiated and returned upon invoking writer() on Branch reference type.
   738      """
   739  
   740      def __init__(self, repository_id: str, reference_id: str, path: str,
   741                   client: Optional[Client] = None) -> None:
   742          super().__init__(repository_id, reference_id, path, client=client)
   743  
   744      def __repr__(self):
   745          return f'WriteableObject(repository="{self.repo}", reference="{self.ref}", path="{self.path}")'
   746  
   747      def upload(self,
   748                 data: str | bytes,
   749                 mode: WriteModes = 'w',
   750                 pre_sign: Optional[bool] = None,
   751                 content_type: Optional[str] = None,
   752                 metadata: Optional[dict[str, str]] = None) -> WriteableObject:
   753          """
   754          Upload a new object or overwrites an existing object
   755  
   756          :param data: The contents of the object to write (can be bytes or string)
   757          :param mode: Write mode:
   758  
   759              'x'     - Open for exclusive creation
   760  
   761              'xb'    - Open for exclusive creation in binary mode
   762  
   763              'w'     - Create a new object or truncate if exists
   764  
   765              'wb'    - Create or truncate in binary mode
   766          :param pre_sign: (Optional) Explicitly state whether to use pre_sign mode when uploading the object.
   767              If None, will be taken from pre_sign property.
   768          :param content_type: (Optional) Explicitly set the object Content-Type
   769          :param metadata: (Optional) User metadata
   770          :return: The Stat object representing the newly created object
   771          :raise ObjectExistsException: if object exists and mode is exclusive ('x')
   772          :raise ObjectNotFoundException: if repo id, reference id or object path does not exist
   773          :raise PermissionException: if user is not authorized to perform this operation, or operation is forbidden
   774          :raise ServerException: for any other errors
   775          """
   776          with ObjectWriter(self, mode, pre_sign, content_type, metadata, self._client) as writer:
   777              writer.write(data)
   778  
   779          return self
   780  
   781      def delete(self) -> None:
   782          """
   783          Delete object from lakeFS
   784  
   785          :raise ObjectNotFoundException: if repo id, reference id or object path does not exist
   786          :raise PermissionException: if user is not authorized to perform this operation, or operation is forbidden
   787          :raise ServerException: for any other errors
   788          """
   789          with api_exception_handler(_io_exception_handler):
   790              self._client.sdk_client.objects_api.delete_object(self._repo_id, self._ref_id, self._path)
   791              self._stats = None
   792  
   793      def writer(self,
   794                 mode: WriteModes = 'wb',
   795                 pre_sign: Optional[bool] = None,
   796                 content_type: Optional[str] = None,
   797                 metadata: Optional[dict[str, str]] = None) -> ObjectWriter:
   798          """
   799          Context manager which provide a file-descriptor like object that allow writing the given object to lakeFS
   800          The writes are saved in a buffer as long as the writer is open. Only when it closes it writes the data into
   801          lakeFS. The optional parameters can be modified by accessing the respective fields as long as the writer is
   802          still open.
   803  
   804          Usage example of reading a file from local file system and writing it to lakeFS:
   805  
   806          .. code-block:: python
   807  
   808              import lakefs
   809  
   810              obj = lakefs.repository("<repository_name>").branch("<branch_name>").object("my_image")
   811  
   812              with open("my_local_image", mode='rb') as reader, obj.writer("wb") as writer:
   813                  writer.write(reader.read())
   814  
   815          :param mode: Write mode - as supported by WriteModes
   816          :param pre_sign: (Optional), enforce the pre_sign mode on the lakeFS server. If not set, will probe server for
   817              information.
   818          :param content_type: (Optional) Specify the data media type
   819          :param metadata: (Optional) User defined metadata to save on the object
   820          :return: A Writer object
   821          """
   822          return ObjectWriter(self,
   823                              mode=mode,
   824                              pre_sign=pre_sign,
   825                              content_type=content_type,
   826                              metadata=metadata,
   827                              client=self._client)
   828  
   829  
   830  def _io_exception_handler(e: LakeFSException):
   831      if isinstance(e, NotFoundException):
   832          return ObjectNotFoundException(e.status_code, e.reason)
   833      if isinstance(e, (NotAuthorizedException, ForbiddenException)):
   834          return PermissionException(e.status_code, e.reason)
   835      return e