github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/graveler/committed/value.go (about) 1 package committed 2 3 import ( 4 "encoding/binary" 5 "errors" 6 "fmt" 7 8 "github.com/treeverse/lakefs/pkg/graveler" 9 ) 10 11 // ErrBadValueBytes is an error that is probably returned when unmarshalling bytes that are 12 // supposed to encode a Value. 13 var ErrBadValueBytes = errors.New("bad bytes format for graveler.Value") 14 15 // ErrTooLong is an error that is returned when trying to marshal too long a key or value. 16 // This should never normally happen in graveler files generated by graveler. 17 var ErrTooLong = errors.New("too long") 18 19 // MaxValueComponentBytes is the longest size allowed for the data length of a graveler value 20 // (or its identity, but that is controlled by code here, so less likely). It (only) protects 21 // the process from unbounded serialization. "640 KB should be enough for anyone" - even at a 22 // few 10s of KiBs you may be better served with some other format or implementation. 23 const MaxValueComponentBytes = 640 << 16 24 25 /* 26 * Value is serialized in a trivial fixed-order format: 27 * 28 * | len(Identity) | Identity | len(Value) | Value | 29 * 30 * where each length is serialized as a varint and additional bytes after Value are silently 31 * ignored. 32 */ 33 34 func varintBytes(i int) []byte { 35 e := make([]byte, binary.MaxVarintLen64) 36 l := binary.PutVarint(e, int64(i)) 37 return e[:l] 38 } 39 40 func putBytes(buf *[]byte, b []byte) { 41 *buf = append(*buf, varintBytes(len(b))...) 42 *buf = append(*buf, b...) 43 } 44 45 // MarshalValue returns bytes that uniquely unmarshal into a Value equal to v. 46 func MarshalValue(v *graveler.Value) ([]byte, error) { 47 if len(v.Identity) > MaxValueComponentBytes || len(v.Data) > MaxValueComponentBytes { 48 return nil, ErrTooLong 49 } 50 ret := make([]byte, 0, len(v.Identity)+len(v.Data)+2*binary.MaxVarintLen32) 51 putBytes(&ret, v.Identity) 52 putBytes(&ret, v.Data) 53 return ret, nil 54 } 55 56 // MustMarshalValue a MarshalValue that will panic on error 57 func MustMarshalValue(v *graveler.Value) []byte { 58 val, err := MarshalValue(v) 59 if err != nil { 60 panic(err) 61 } 62 return val 63 } 64 65 // splitBytes splits a given byte slice into two: the first part defined by the interpreted length (provided in the 66 // slice), and the second part is the remainder of bytes from the slice 67 func splitBytes(b []byte) ([]byte, []byte, error) { 68 l, o := binary.Varint(b) 69 if o < 0 { 70 return nil, nil, fmt.Errorf("read length: %w", ErrBadValueBytes) 71 } 72 remainedBuf := b[o:] 73 if len(remainedBuf) < int(l) { 74 return nil, nil, fmt.Errorf("not enough bytes to read %d bytes: %w", l, ErrBadValueBytes) 75 } 76 if l < 0 { 77 return nil, nil, fmt.Errorf("impossible negative length %d: %w", l, ErrBadValueBytes) 78 } 79 value := remainedBuf[:l] 80 rest := remainedBuf[l:] 81 return value, rest, nil 82 } 83 84 func UnmarshalValue(b []byte) (*graveler.Value, error) { 85 ret := &graveler.Value{} 86 var err error 87 data := b 88 if ret.Identity, data, err = splitBytes(data); err != nil { 89 return nil, fmt.Errorf("identity field: %w", err) 90 } 91 if ret.Data, _, err = splitBytes(data); err != nil { 92 return nil, fmt.Errorf("data field: %w", err) 93 } 94 return ret, nil 95 }