github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/blob_builder_test.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tree 16 17 import ( 18 "bytes" 19 "context" 20 "errors" 21 "fmt" 22 "math" 23 "testing" 24 25 "github.com/stretchr/testify/assert" 26 "github.com/stretchr/testify/require" 27 28 "github.com/dolthub/dolt/go/gen/fb/serial" 29 "github.com/dolthub/dolt/go/store/hash" 30 "github.com/dolthub/dolt/go/store/prolly/message" 31 "github.com/dolthub/dolt/go/store/val" 32 ) 33 34 func TestWriteImmutableTree(t *testing.T) { 35 tests := []struct { 36 inputSize int 37 chunkSize int 38 execErr error 39 initErr error 40 checkSum bool 41 }{ 42 { 43 inputSize: 100, 44 chunkSize: 40, 45 }, 46 { 47 inputSize: 100, 48 chunkSize: 100, 49 }, 50 { 51 inputSize: 100, 52 chunkSize: 100, 53 }, 54 { 55 inputSize: 255, 56 chunkSize: 40, 57 }, 58 { 59 inputSize: 243, 60 chunkSize: 40, 61 }, 62 { 63 inputSize: 47, 64 chunkSize: 40, 65 }, 66 { 67 inputSize: 200, 68 chunkSize: 40, 69 }, 70 { 71 inputSize: 200, 72 chunkSize: 40, 73 }, 74 { 75 inputSize: 1, 76 chunkSize: 40, 77 }, 78 { 79 inputSize: 20, 80 chunkSize: 500, 81 }, 82 { 83 inputSize: 1_000, 84 chunkSize: 40, 85 checkSum: false, 86 }, 87 { 88 inputSize: 1_000, 89 chunkSize: 60, 90 checkSum: false, 91 }, 92 { 93 inputSize: 1_000, 94 chunkSize: 80, 95 checkSum: false, 96 }, 97 { 98 inputSize: 10_000, 99 chunkSize: 100, 100 checkSum: false, 101 }, 102 { 103 inputSize: 50_000_000, 104 chunkSize: 4000, 105 checkSum: false, 106 }, 107 { 108 inputSize: 50_000_000, 109 chunkSize: 32_000, 110 checkSum: false, 111 }, 112 { 113 inputSize: 0, 114 chunkSize: 40, 115 }, 116 { 117 inputSize: 100, 118 chunkSize: 41, 119 initErr: ErrInvalidChunkSize, 120 }, 121 } 122 123 for _, tt := range tests { 124 t.Run(fmt.Sprintf("inputSize=%d; chunkSize=%d", tt.inputSize, tt.chunkSize), func(t *testing.T) { 125 buf := make([]byte, tt.inputSize) 126 for i := range buf { 127 buf[i] = byte(i) 128 } 129 ctx := context.Background() 130 r := bytes.NewReader(buf) 131 ns := NewTestNodeStore() 132 //serializer := message.NewBlobSerializer(ns.Pool()) 133 134 b, err := NewBlobBuilder(tt.chunkSize) 135 if tt.initErr != nil { 136 require.True(t, errors.Is(err, tt.initErr)) 137 return 138 } 139 b.SetNodeStore(ns) 140 b.Init(tt.inputSize) 141 root, _, err := b.Chunk(ctx, r) 142 143 if tt.execErr != nil { 144 require.True(t, errors.Is(err, tt.execErr)) 145 return 146 } 147 require.NoError(t, err) 148 149 expSubtrees := expectedSubtrees(tt.inputSize, tt.chunkSize) 150 expLevel := expectedLevel(tt.inputSize, tt.chunkSize) 151 expSum := expectedSum(tt.inputSize) 152 expUnfilled := expectedUnfilled(tt.inputSize, tt.chunkSize) 153 154 intChunkSize := int(math.Ceil(float64(tt.chunkSize) / float64(hash.ByteLen))) 155 156 unfilledCnt := 0 157 sum := 0 158 byteCnt := 0 159 WalkNodes(ctx, root, ns, func(ctx context.Context, n Node) error { 160 if n.empty() { 161 return nil 162 } 163 var keyCnt int 164 leaf := n.IsLeaf() 165 if leaf { 166 byteCnt += len(getBlobValues(n.msg)) 167 for _, i := range n.GetValue(0) { 168 sum += int(i) 169 } 170 keyCnt = len(getBlobValues(n.msg)) 171 if keyCnt != tt.chunkSize { 172 unfilledCnt += 1 173 } 174 } else { 175 keyCnt = n.Count() 176 if keyCnt < intChunkSize { 177 unfilledCnt += 1 178 } 179 } 180 return nil 181 }) 182 183 level := root.Level() 184 assert.Equal(t, expLevel, level) 185 if tt.checkSum { 186 assert.Equal(t, expSum, sum) 187 } 188 assert.Equal(t, tt.inputSize, byteCnt) 189 assert.Equal(t, expUnfilled, unfilledCnt) 190 if expLevel > 0 { 191 root, err = root.loadSubtrees() 192 require.NoError(t, err) 193 for i := range expSubtrees { 194 sc, err := root.getSubtreeCount(i) 195 require.NoError(t, err) 196 assert.Equal(t, expSubtrees[i], sc) 197 } 198 } 199 }) 200 } 201 } 202 203 func expectedLevel(size, chunk int) int { 204 if size <= chunk { 205 return 0 206 } 207 size = int(math.Ceil(float64(size) / float64(chunk))) 208 l := 1 209 intChunk := int(math.Ceil(float64(chunk) / float64(hash.ByteLen))) 210 for size > intChunk { 211 size = int(math.Ceil(float64(size) / float64(intChunk))) 212 l += 1 213 } 214 return l 215 } 216 217 func expectedSubtrees(size, chunk int) subtreeCounts { 218 if size <= chunk { 219 return subtreeCounts{0} 220 } 221 l := expectedLevel(size, chunk) 222 223 size = int(math.Ceil(float64(size) / float64(chunk))) 224 intChunk := int(math.Ceil(float64(chunk) / float64(hash.ByteLen))) 225 226 filledSubtree := int(math.Pow(float64(intChunk), float64(l-1))) 227 228 subtrees := make(subtreeCounts, 0) 229 for size > filledSubtree { 230 subtrees = append(subtrees, uint64(filledSubtree)) 231 size -= filledSubtree 232 } 233 if size > 0 { 234 subtrees = append(subtrees, uint64(size)) 235 } 236 if len(subtrees) > intChunk { 237 panic("unreachable") 238 } 239 return subtrees 240 } 241 242 func expectedSum(size int) int { 243 return (size * (size + 1) / 2) - size 244 } 245 246 func expectedUnfilled(size, chunk int) int { 247 if size == chunk || size == 0 { 248 return 0 249 } else if size < chunk { 250 return 1 251 } 252 253 var unfilled int 254 // level 0 is special case 255 if size%chunk != 0 { 256 unfilled += 1 257 } 258 size = int(math.Ceil(float64(size) / float64(chunk))) 259 260 intChunk := int(math.Ceil(float64(chunk) / float64(hash.ByteLen))) 261 for size > intChunk { 262 if size%intChunk != 0 { 263 unfilled += 1 264 } 265 size = int(math.Ceil(float64(size) / float64(intChunk))) 266 } 267 if size < intChunk { 268 unfilled += 1 269 } 270 return unfilled 271 } 272 273 func TestImmutableTreeWalk(t *testing.T) { 274 tests := []struct { 275 blobLen int 276 chunkSize int 277 keyCnt int 278 }{ 279 { 280 blobLen: 250, 281 chunkSize: 60, 282 keyCnt: 4, 283 }, 284 { 285 blobLen: 250, 286 chunkSize: 40, 287 keyCnt: 4, 288 }, 289 { 290 blobLen: 378, 291 chunkSize: 60, 292 keyCnt: 12, 293 }, 294 { 295 blobLen: 5000, 296 chunkSize: 40, 297 keyCnt: 6, 298 }, 299 { 300 blobLen: 1, 301 chunkSize: 40, 302 keyCnt: 6, 303 }, 304 { 305 blobLen: 50_000_000, 306 chunkSize: 4000, 307 keyCnt: 1, 308 }, 309 { 310 blobLen: 10_000, 311 chunkSize: 80, 312 keyCnt: 6, 313 }, 314 } 315 316 ns := NewTestNodeStore() 317 for _, tt := range tests { 318 t.Run(fmt.Sprintf("inputSize=%d; chunkSize=%d; keyCnt=%d", tt.blobLen, tt.chunkSize, tt.keyCnt), func(t *testing.T) { 319 r := newTree(t, ns, tt.keyCnt, tt.blobLen, tt.chunkSize) 320 var cnt int 321 walkOpaqueNodes(context.Background(), r, ns, func(ctx context.Context, n Node) error { 322 cnt++ 323 return nil 324 }) 325 require.Equal(t, blobAddrCnt(tt.blobLen, tt.chunkSize)*tt.keyCnt+1, cnt) 326 }) 327 } 328 } 329 330 func blobAddrCnt(size, chunk int) int { 331 if size == 0 { 332 return 0 333 } 334 if size <= chunk { 335 return 1 336 } 337 size = int(math.Ceil(float64(size) / float64(chunk))) 338 l := 1 339 sum := size 340 intChunk := int(math.Ceil(float64(chunk) / float64(hash.ByteLen))) 341 for size > intChunk { 342 size = int(math.Ceil(float64(size) / float64(intChunk))) 343 sum += size 344 l += 1 345 } 346 return sum + 1 347 } 348 349 func newTree(t *testing.T, ns NodeStore, keyCnt, blobLen, chunkSize int) Node { 350 ctx := context.Background() 351 352 keyDesc := val.NewTupleDescriptor(val.Type{Enc: val.Uint32Enc}) 353 valDesc := val.NewTupleDescriptor(val.Type{Enc: val.BytesAddrEnc}) 354 355 tuples := make([][2]val.Tuple, keyCnt) 356 keyBld := val.NewTupleBuilder(keyDesc) 357 valBld := val.NewTupleBuilder(valDesc) 358 for i := range tuples { 359 keyBld.PutUint32(0, uint32(i)) 360 tuples[i][0] = keyBld.Build(sharedPool) 361 362 addr := mustNewBlob(ctx, ns, blobLen, chunkSize) 363 valBld.PutBytesAddr(0, addr) 364 tuples[i][1] = valBld.Build(sharedPool) 365 } 366 367 s := message.NewProllyMapSerializer(valDesc, ns.Pool()) 368 chunker, err := newEmptyChunker(ctx, ns, s) 369 require.NoError(t, err) 370 for _, pair := range tuples { 371 err := chunker.AddPair(ctx, Item(pair[0]), Item(pair[1])) 372 require.NoError(t, err) 373 } 374 root, err := chunker.Done(ctx) 375 require.NoError(t, err) 376 return root 377 } 378 379 func mustNewBlob(ctx context.Context, ns NodeStore, len, chunkSize int) hash.Hash { 380 buf := make([]byte, len) 381 for i := range buf { 382 buf[i] = byte(i) 383 } 384 r := bytes.NewReader(buf) 385 b, err := NewBlobBuilder(chunkSize) 386 if err != nil { 387 panic(err) 388 } 389 b.SetNodeStore(ns) 390 b.Init(len) 391 _, addr, err := b.Chunk(ctx, r) 392 if err != nil { 393 panic(err) 394 } 395 return addr 396 } 397 398 func getBlobValues(msg serial.Message) []byte { 399 var b serial.Blob 400 err := serial.InitBlobRoot(&b, msg, serial.MessagePrefixSz) 401 if err != nil { 402 panic(err) 403 } 404 return b.PayloadBytes() 405 }