github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/message/merge_artifacts.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package message 16 17 import ( 18 "context" 19 "encoding/binary" 20 "fmt" 21 22 fb "github.com/dolthub/flatbuffers/v23/go" 23 24 "github.com/dolthub/dolt/go/gen/fb/serial" 25 "github.com/dolthub/dolt/go/store/hash" 26 "github.com/dolthub/dolt/go/store/pool" 27 "github.com/dolthub/dolt/go/store/val" 28 ) 29 30 const ( 31 // These constants are mirrored from serial.MergeArtifacts.KeyOffsets() 32 // and serial.MergeArtifacts.ValueOffsets() respectively. 33 // They are only as stable as the flatbuffers schema that define them. 34 35 mergeArtifactKeyItemBytesVOffset fb.VOffsetT = 4 36 mergeArtifactKeyOffsetsVOffset fb.VOffsetT = 6 37 mergeArtifactValueItemBytesVOffset fb.VOffsetT = 10 38 mergeArtifactValueOffsetsVOffset fb.VOffsetT = 12 39 mergeArtifactAddressArrayVOffset fb.VOffsetT = 14 40 ) 41 42 var mergeArtifactFileID = []byte(serial.MergeArtifactsFileID) 43 44 func NewMergeArtifactSerializer(keyDesc val.TupleDesc, pool pool.BuffPool) MergeArtifactSerializer { 45 return MergeArtifactSerializer{ 46 keyDesc: keyDesc, 47 pool: pool, 48 } 49 } 50 51 type MergeArtifactSerializer struct { 52 keyDesc val.TupleDesc 53 pool pool.BuffPool 54 } 55 56 var _ Serializer = MergeArtifactSerializer{} 57 58 func (s MergeArtifactSerializer) Serialize(keys, values [][]byte, subtrees []uint64, level int) serial.Message { 59 var ( 60 keyTups, keyOffs fb.UOffsetT 61 valTups, valOffs fb.UOffsetT 62 keyAddrOffs fb.UOffsetT 63 refArr, cardArr fb.UOffsetT 64 ) 65 66 keySz, valSz, bufSz := estimateMergeArtifactSize(keys, values, subtrees, s.keyDesc.AddressFieldCount()) 67 b := getFlatbufferBuilder(s.pool, bufSz) 68 69 // serialize keys and offStart 70 keyTups = writeItemBytes(b, keys, keySz) 71 serial.MergeArtifactsStartKeyOffsetsVector(b, len(keys)+1) 72 keyOffs = writeItemOffsets(b, keys, keySz) 73 74 if level == 0 { 75 // serialize value tuples for leaf nodes 76 valTups = writeItemBytes(b, values, valSz) 77 serial.MergeArtifactsStartValueOffsetsVector(b, len(values)+1) 78 valOffs = writeItemOffsets(b, values, valSz) 79 // serialize offStart of chunk addresses within |keyTups| 80 if s.keyDesc.AddressFieldCount() > 0 { 81 serial.MergeArtifactsStartKeyAddressOffsetsVector(b, countAddresses(keys, s.keyDesc)) 82 keyAddrOffs = writeAddressOffsets(b, keys, keySz, s.keyDesc) 83 } 84 } else { 85 // serialize child refs and subtree counts for internal nodes 86 refArr = writeItemBytes(b, values, valSz) 87 cardArr = writeCountArray(b, subtrees) 88 } 89 90 // populate the node's vtable 91 serial.MergeArtifactsStart(b) 92 serial.MergeArtifactsAddKeyItems(b, keyTups) 93 serial.MergeArtifactsAddKeyOffsets(b, keyOffs) 94 if level == 0 { 95 serial.MergeArtifactsAddValueItems(b, valTups) 96 serial.MergeArtifactsAddValueOffsets(b, valOffs) 97 serial.MergeArtifactsAddTreeCount(b, uint64(len(keys))) 98 serial.MergeArtifactsAddKeyAddressOffsets(b, keyAddrOffs) 99 } else { 100 serial.MergeArtifactsAddAddressArray(b, refArr) 101 serial.MergeArtifactsAddSubtreeCounts(b, cardArr) 102 serial.MergeArtifactsAddTreeCount(b, sumSubtrees(subtrees)) 103 } 104 serial.MergeArtifactsAddTreeLevel(b, uint8(level)) 105 106 return serial.FinishMessage(b, serial.MergeArtifactsEnd(b), mergeArtifactFileID) 107 } 108 109 func getArtifactMapKeysAndValues(msg serial.Message) (keys, values ItemAccess, level, count uint16, err error) { 110 var ma serial.MergeArtifacts 111 err = serial.InitMergeArtifactsRoot(&ma, msg, serial.MessagePrefixSz) 112 if err != nil { 113 return 114 } 115 keys.bufStart = lookupVectorOffset(mergeArtifactKeyItemBytesVOffset, ma.Table()) 116 keys.bufLen = uint16(ma.KeyItemsLength()) 117 keys.offStart = lookupVectorOffset(mergeArtifactKeyOffsetsVOffset, ma.Table()) 118 keys.offLen = uint16(ma.KeyOffsetsLength() * uint16Size) 119 120 count = (keys.offLen / 2) - 1 121 level = uint16(ma.TreeLevel()) 122 123 vv := ma.ValueItemsBytes() 124 if vv != nil { 125 values.bufStart = lookupVectorOffset(mergeArtifactValueItemBytesVOffset, ma.Table()) 126 values.bufLen = uint16(ma.ValueItemsLength()) 127 values.offStart = lookupVectorOffset(mergeArtifactValueOffsetsVOffset, ma.Table()) 128 values.offLen = uint16(ma.ValueOffsetsLength() * uint16Size) 129 } else { 130 values.bufStart = lookupVectorOffset(mergeArtifactAddressArrayVOffset, ma.Table()) 131 values.bufLen = uint16(ma.AddressArrayLength()) 132 values.itemWidth = hash.ByteLen 133 } 134 return 135 } 136 137 func walkMergeArtifactAddresses(ctx context.Context, msg serial.Message, cb func(ctx context.Context, addr hash.Hash) error) error { 138 var ma serial.MergeArtifacts 139 err := serial.InitMergeArtifactsRoot(&ma, msg, serial.MessagePrefixSz) 140 if err != nil { 141 return err 142 } 143 arr := ma.AddressArrayBytes() 144 for i := 0; i < len(arr)/hash.ByteLen; i++ { 145 addr := hash.New(arr[i*addrSize : (i+1)*addrSize]) 146 if err := cb(ctx, addr); err != nil { 147 return err 148 } 149 } 150 151 cnt := ma.KeyAddressOffsetsLength() 152 arr2 := ma.KeyItemsBytes() 153 for i := 0; i < cnt; i++ { 154 o := ma.KeyAddressOffsets(i) 155 addr := hash.New(arr2[o : o+addrSize]) 156 if err := cb(ctx, addr); err != nil { 157 return err 158 } 159 } 160 161 return nil 162 } 163 164 func getMergeArtifactCount(msg serial.Message) (uint16, error) { 165 var ma serial.MergeArtifacts 166 err := serial.InitMergeArtifactsRoot(&ma, msg, serial.MessagePrefixSz) 167 if err != nil { 168 return 0, err 169 } 170 if ma.KeyItemsLength() == 0 { 171 return 0, nil 172 } 173 // zeroth offset ommitted from array 174 return uint16(ma.KeyOffsetsLength() + 1), nil 175 } 176 177 func getMergeArtifactTreeLevel(msg serial.Message) (int, error) { 178 var ma serial.MergeArtifacts 179 err := serial.InitMergeArtifactsRoot(&ma, msg, serial.MessagePrefixSz) 180 if err != nil { 181 return 0, err 182 } 183 return int(ma.TreeLevel()), nil 184 } 185 186 func getMergeArtifactTreeCount(msg serial.Message) (int, error) { 187 var ma serial.MergeArtifacts 188 err := serial.InitMergeArtifactsRoot(&ma, msg, serial.MessagePrefixSz) 189 if err != nil { 190 return 0, err 191 } 192 return int(ma.TreeCount()), nil 193 } 194 195 func getMergeArtifactSubtrees(msg serial.Message) ([]uint64, error) { 196 sz, err := getMergeArtifactCount(msg) 197 if err != nil { 198 return nil, err 199 } 200 counts := make([]uint64, sz) 201 var ma serial.MergeArtifacts 202 err = serial.InitMergeArtifactsRoot(&ma, msg, serial.MessagePrefixSz) 203 if err != nil { 204 return nil, err 205 } 206 return decodeVarints(ma.SubtreeCountsBytes(), counts), nil 207 } 208 209 // estimateMergeArtifact>Size returns the exact Size of the tuple vectors for keys and values, 210 // and an estimate of the overall Size of the final flatbuffer. 211 func estimateMergeArtifactSize(keys, values [][]byte, subtrees []uint64, keyAddrs int) (int, int, int) { 212 var keySz, valSz, bufSz int 213 for i := range keys { 214 keySz += len(keys[i]) 215 valSz += len(values[i]) 216 } 217 refCntSz := len(subtrees) * binary.MaxVarintLen64 218 219 // constraints enforced upstream 220 if keySz > int(MaxVectorOffset) { 221 panic(fmt.Sprintf("key vector exceeds Size limit ( %d > %d )", keySz, MaxVectorOffset)) 222 } 223 if valSz > int(MaxVectorOffset) { 224 panic(fmt.Sprintf("value vector exceeds Size limit ( %d > %d )", valSz, MaxVectorOffset)) 225 } 226 227 bufSz += keySz + valSz // tuples 228 bufSz += refCntSz // subtree counts 229 bufSz += len(keys)*2 + len(values)*2 // offStart 230 bufSz += 8 + 1 + 1 + 1 // metadata 231 bufSz += 72 // vtable (approx) 232 bufSz += 100 // padding? 233 bufSz += keyAddrs * len(keys) * 2 234 bufSz += serial.MessagePrefixSz 235 236 return keySz, valSz, bufSz 237 }