github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/message/prolly_map.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package message 16 17 import ( 18 "context" 19 "encoding/binary" 20 "fmt" 21 22 fb "github.com/dolthub/flatbuffers/v23/go" 23 24 "github.com/dolthub/dolt/go/gen/fb/serial" 25 "github.com/dolthub/dolt/go/store/hash" 26 "github.com/dolthub/dolt/go/store/pool" 27 "github.com/dolthub/dolt/go/store/val" 28 ) 29 30 const ( 31 // These constants are mirrored from serial.ProllyTreeNode.KeyOffsets() 32 // and serial.ProllyTreeNode.ValueOffsets() respectively. 33 // They are only as stable as the flatbuffers schema that define them. 34 prollyMapKeyItemBytesVOffset fb.VOffsetT = 4 35 prollyMapKeyOffsetsVOffset fb.VOffsetT = 6 36 prollyMapValueItemBytesVOffset fb.VOffsetT = 10 37 prollyMapValueOffsetsVOffset fb.VOffsetT = 12 38 prollyMapAddressArrayBytesVOffset fb.VOffsetT = 18 39 ) 40 41 var prollyMapFileID = []byte(serial.ProllyTreeNodeFileID) 42 43 func NewProllyMapSerializer(valueDesc val.TupleDesc, pool pool.BuffPool) ProllyMapSerializer { 44 return ProllyMapSerializer{valDesc: valueDesc, pool: pool} 45 } 46 47 type ProllyMapSerializer struct { 48 valDesc val.TupleDesc 49 pool pool.BuffPool 50 } 51 52 var _ Serializer = ProllyMapSerializer{} 53 54 func (s ProllyMapSerializer) Serialize(keys, values [][]byte, subtrees []uint64, level int) serial.Message { 55 var ( 56 keyTups, keyOffs fb.UOffsetT 57 valTups, valOffs fb.UOffsetT 58 valAddrOffs fb.UOffsetT 59 refArr, cardArr fb.UOffsetT 60 ) 61 62 keySz, valSz, bufSz := estimateProllyMapSize(keys, values, subtrees, s.valDesc.AddressFieldCount()) 63 b := getFlatbufferBuilder(s.pool, bufSz) 64 65 // serialize keys and offStart 66 keyTups = writeItemBytes(b, keys, keySz) 67 serial.ProllyTreeNodeStartKeyOffsetsVector(b, len(keys)+1) 68 keyOffs = writeItemOffsets(b, keys, keySz) 69 70 if level == 0 { 71 // serialize value tuples for leaf nodes 72 valTups = writeItemBytes(b, values, valSz) 73 serial.ProllyTreeNodeStartValueOffsetsVector(b, len(values)+1) 74 valOffs = writeItemOffsets(b, values, valSz) 75 // serialize offStart of chunk addresses within |valTups| 76 if s.valDesc.AddressFieldCount() > 0 { 77 serial.ProllyTreeNodeStartValueAddressOffsetsVector(b, countAddresses(values, s.valDesc)) 78 valAddrOffs = writeAddressOffsets(b, values, valSz, s.valDesc) 79 } 80 } else { 81 // serialize child refs and subtree counts for internal nodes 82 refArr = writeItemBytes(b, values, valSz) 83 cardArr = writeCountArray(b, subtrees) 84 } 85 86 // populate the node's vtable 87 serial.ProllyTreeNodeStart(b) 88 serial.ProllyTreeNodeAddKeyItems(b, keyTups) 89 serial.ProllyTreeNodeAddKeyOffsets(b, keyOffs) 90 if level == 0 { 91 serial.ProllyTreeNodeAddValueItems(b, valTups) 92 serial.ProllyTreeNodeAddValueOffsets(b, valOffs) 93 serial.ProllyTreeNodeAddTreeCount(b, uint64(len(keys))) 94 serial.ProllyTreeNodeAddValueAddressOffsets(b, valAddrOffs) 95 } else { 96 serial.ProllyTreeNodeAddAddressArray(b, refArr) 97 serial.ProllyTreeNodeAddSubtreeCounts(b, cardArr) 98 serial.ProllyTreeNodeAddTreeCount(b, sumSubtrees(subtrees)) 99 } 100 serial.ProllyTreeNodeAddKeyType(b, serial.ItemTypeTupleFormatAlpha) 101 serial.ProllyTreeNodeAddValueType(b, serial.ItemTypeTupleFormatAlpha) 102 serial.ProllyTreeNodeAddTreeLevel(b, uint8(level)) 103 104 return serial.FinishMessage(b, serial.ProllyTreeNodeEnd(b), prollyMapFileID) 105 } 106 107 func getProllyMapKeysAndValues(msg serial.Message) (keys, values ItemAccess, level, count uint16, err error) { 108 var pm serial.ProllyTreeNode 109 err = serial.InitProllyTreeNodeRoot(&pm, msg, serial.MessagePrefixSz) 110 if err != nil { 111 return 112 } 113 keys.bufStart = lookupVectorOffset(prollyMapKeyItemBytesVOffset, pm.Table()) 114 keys.bufLen = uint16(pm.KeyItemsLength()) 115 keys.offStart = lookupVectorOffset(prollyMapKeyOffsetsVOffset, pm.Table()) 116 keys.offLen = uint16(pm.KeyOffsetsLength() * uint16Size) 117 118 count = (keys.offLen / 2) - 1 119 level = uint16(pm.TreeLevel()) 120 121 vv := pm.ValueItemsBytes() 122 if vv != nil { 123 values.bufStart = lookupVectorOffset(prollyMapValueItemBytesVOffset, pm.Table()) 124 values.bufLen = uint16(pm.ValueItemsLength()) 125 values.offStart = lookupVectorOffset(prollyMapValueOffsetsVOffset, pm.Table()) 126 values.offLen = uint16(pm.ValueOffsetsLength() * uint16Size) 127 } else { 128 values.bufStart = lookupVectorOffset(prollyMapAddressArrayBytesVOffset, pm.Table()) 129 values.bufLen = uint16(pm.AddressArrayLength()) 130 values.itemWidth = hash.ByteLen 131 } 132 return 133 } 134 135 func walkProllyMapAddresses(ctx context.Context, msg serial.Message, cb func(ctx context.Context, addr hash.Hash) error) error { 136 var pm serial.ProllyTreeNode 137 err := serial.InitProllyTreeNodeRoot(&pm, msg, serial.MessagePrefixSz) 138 if err != nil { 139 return err 140 } 141 arr := pm.AddressArrayBytes() 142 for i := 0; i < len(arr)/hash.ByteLen; i++ { 143 addr := hash.New(arr[i*addrSize : (i+1)*addrSize]) 144 if err := cb(ctx, addr); err != nil { 145 return err 146 } 147 } 148 149 cnt := pm.ValueAddressOffsetsLength() 150 arr2 := pm.ValueItemsBytes() 151 for i := 0; i < cnt; i++ { 152 o := pm.ValueAddressOffsets(i) 153 addr := hash.New(arr2[o : o+addrSize]) 154 if err := cb(ctx, addr); err != nil { 155 return err 156 } 157 } 158 assertFalse((arr != nil) && (arr2 != nil), "cannot WalkAddresses for ProllyTreeNode with both AddressArray and ValueAddressOffsets") 159 return nil 160 } 161 162 func getProllyMapCount(msg serial.Message) (uint16, error) { 163 var pm serial.ProllyTreeNode 164 err := serial.InitProllyTreeNodeRoot(&pm, msg, serial.MessagePrefixSz) 165 if err != nil { 166 return 0, err 167 } 168 return uint16(pm.KeyOffsetsLength() - 1), nil 169 } 170 171 func getProllyMapTreeLevel(msg serial.Message) (int, error) { 172 var pm serial.ProllyTreeNode 173 err := serial.InitProllyTreeNodeRoot(&pm, msg, serial.MessagePrefixSz) 174 if err != nil { 175 return 0, fb.ErrTableHasUnknownFields 176 } 177 return int(pm.TreeLevel()), nil 178 } 179 180 func getProllyMapTreeCount(msg serial.Message) (int, error) { 181 var pm serial.ProllyTreeNode 182 err := serial.InitProllyTreeNodeRoot(&pm, msg, serial.MessagePrefixSz) 183 if err != nil { 184 return 0, fb.ErrTableHasUnknownFields 185 } 186 return int(pm.TreeCount()), nil 187 } 188 189 func getProllyMapSubtrees(msg serial.Message) ([]uint64, error) { 190 sz, err := getProllyMapCount(msg) 191 if err != nil { 192 return nil, err 193 } 194 195 var pm serial.ProllyTreeNode 196 n := fb.GetUOffsetT(msg[serial.MessagePrefixSz:]) 197 pm.Init(msg, serial.MessagePrefixSz+n) 198 if serial.ProllyTreeNodeNumFields < pm.Table().NumFields() { 199 return nil, fb.ErrTableHasUnknownFields 200 } 201 202 counts := make([]uint64, sz) 203 204 return decodeVarints(pm.SubtreeCountsBytes(), counts), nil 205 } 206 207 // estimateProllyMapSize returns the exact Size of the tuple vectors for keys and values, 208 // and an estimate of the overall Size of the final flatbuffer. 209 func estimateProllyMapSize(keys, values [][]byte, subtrees []uint64, valAddrsCnt int) (int, int, int) { 210 var keySz, valSz, bufSz int 211 for i := range keys { 212 keySz += len(keys[i]) 213 valSz += len(values[i]) 214 } 215 subtreesSz := len(subtrees) * binary.MaxVarintLen64 216 217 // constraints enforced upstream 218 if keySz > int(MaxVectorOffset) { 219 panic(fmt.Sprintf("key vector exceeds Size limit ( %d > %d )", keySz, MaxVectorOffset)) 220 } 221 if valSz > int(MaxVectorOffset) { 222 panic(fmt.Sprintf("value vector exceeds Size limit ( %d > %d )", valSz, MaxVectorOffset)) 223 } 224 225 bufSz += keySz + valSz // tuples 226 bufSz += subtreesSz // subtree counts 227 bufSz += len(keys)*2 + len(values)*2 // offStart 228 bufSz += 8 + 1 + 1 + 1 // metadata 229 bufSz += 72 // vtable (approx) 230 bufSz += 100 // padding? 231 bufSz += valAddrsCnt * len(values) * 2 232 bufSz += serial.MessagePrefixSz 233 234 return keySz, valSz, bufSz 235 }