github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/graveler/committed/value.go (about)

     1  package committed
     2  
     3  import (
     4  	"encoding/binary"
     5  	"errors"
     6  	"fmt"
     7  
     8  	"github.com/treeverse/lakefs/pkg/graveler"
     9  )
    10  
    11  // ErrBadValueBytes is an error that is probably returned when unmarshalling bytes that are
    12  // supposed to encode a Value.
    13  var ErrBadValueBytes = errors.New("bad bytes format for graveler.Value")
    14  
    15  // ErrTooLong is an error that is returned when trying to marshal too long a key or value.
    16  // This should never normally happen in graveler files generated by graveler.
    17  var ErrTooLong = errors.New("too long")
    18  
    19  // MaxValueComponentBytes is the longest size allowed for the data length of a graveler value
    20  // (or its identity, but that is controlled by code here, so less likely).  It (only) protects
    21  // the process from unbounded serialization.  "640 KB should be enough for anyone" - even at a
    22  // few 10s of KiBs you may be better served with some other format or implementation.
    23  const MaxValueComponentBytes = 640 << 16
    24  
    25  /*
    26   * Value is serialized in a trivial fixed-order format:
    27   *
    28   *    | len(Identity) | Identity | len(Value) | Value |
    29   *
    30   * where each length is serialized as a varint and additional bytes after Value are silently
    31   * ignored.
    32   */
    33  
    34  func varintBytes(i int) []byte {
    35  	e := make([]byte, binary.MaxVarintLen64)
    36  	l := binary.PutVarint(e, int64(i))
    37  	return e[:l]
    38  }
    39  
    40  func putBytes(buf *[]byte, b []byte) {
    41  	*buf = append(*buf, varintBytes(len(b))...)
    42  	*buf = append(*buf, b...)
    43  }
    44  
    45  // MarshalValue returns bytes that uniquely unmarshal into a Value equal to v.
    46  func MarshalValue(v *graveler.Value) ([]byte, error) {
    47  	if len(v.Identity) > MaxValueComponentBytes || len(v.Data) > MaxValueComponentBytes {
    48  		return nil, ErrTooLong
    49  	}
    50  	ret := make([]byte, 0, len(v.Identity)+len(v.Data)+2*binary.MaxVarintLen32)
    51  	putBytes(&ret, v.Identity)
    52  	putBytes(&ret, v.Data)
    53  	return ret, nil
    54  }
    55  
    56  // MustMarshalValue a MarshalValue that will panic on error
    57  func MustMarshalValue(v *graveler.Value) []byte {
    58  	val, err := MarshalValue(v)
    59  	if err != nil {
    60  		panic(err)
    61  	}
    62  	return val
    63  }
    64  
    65  // splitBytes splits a given byte slice into two: the first part defined by the interpreted length (provided in the
    66  // slice), and the second part is the remainder of bytes from the slice
    67  func splitBytes(b []byte) ([]byte, []byte, error) {
    68  	l, o := binary.Varint(b)
    69  	if o < 0 {
    70  		return nil, nil, fmt.Errorf("read length: %w", ErrBadValueBytes)
    71  	}
    72  	remainedBuf := b[o:]
    73  	if len(remainedBuf) < int(l) {
    74  		return nil, nil, fmt.Errorf("not enough bytes to read %d bytes: %w", l, ErrBadValueBytes)
    75  	}
    76  	if l < 0 {
    77  		return nil, nil, fmt.Errorf("impossible negative length %d: %w", l, ErrBadValueBytes)
    78  	}
    79  	value := remainedBuf[:l]
    80  	rest := remainedBuf[l:]
    81  	return value, rest, nil
    82  }
    83  
    84  func UnmarshalValue(b []byte) (*graveler.Value, error) {
    85  	ret := &graveler.Value{}
    86  	var err error
    87  	data := b
    88  	if ret.Identity, data, err = splitBytes(data); err != nil {
    89  		return nil, fmt.Errorf("identity field: %w", err)
    90  	}
    91  	if ret.Data, _, err = splitBytes(data); err != nil {
    92  		return nil, fmt.Errorf("data field: %w", err)
    93  	}
    94  	return ret, nil
    95  }