github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/table_test.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package nbs 23 24 import ( 25 "context" 26 "encoding/binary" 27 "fmt" 28 "math/rand" 29 "sort" 30 "testing" 31 32 "github.com/stretchr/testify/assert" 33 "github.com/stretchr/testify/require" 34 "golang.org/x/sync/errgroup" 35 36 "github.com/dolthub/dolt/go/store/chunks" 37 "github.com/dolthub/dolt/go/store/hash" 38 ) 39 40 func buildTable(chunks [][]byte) ([]byte, hash.Hash, error) { 41 totalData := uint64(0) 42 for _, chunk := range chunks { 43 totalData += uint64(len(chunk)) 44 } 45 capacity := maxTableSize(uint64(len(chunks)), totalData) 46 47 buff := make([]byte, capacity) 48 49 tw := newTableWriter(buff, nil) 50 51 for _, chunk := range chunks { 52 tw.addChunk(computeAddr(chunk), chunk) 53 } 54 55 length, blockHash, err := tw.finish() 56 57 if err != nil { 58 return nil, hash.Hash{}, err 59 } 60 61 return buff[:length], blockHash, nil 62 } 63 64 func mustGetString(assert *assert.Assertions, ctx context.Context, tr tableReader, data []byte) string { 65 bytes, err := tr.get(ctx, computeAddr(data), &Stats{}) 66 assert.NoError(err) 67 return string(bytes) 68 } 69 70 func TestSimple(t *testing.T) { 71 ctx := context.Background() 72 assert := assert.New(t) 73 74 chunks := [][]byte{ 75 []byte("hello2"), 76 []byte("goodbye2"), 77 []byte("badbye2"), 78 } 79 80 tableData, _, err := buildTable(chunks) 81 require.NoError(t, err) 82 ti, err := parseTableIndexByCopy(ctx, tableData, &UnlimitedQuotaProvider{}) 83 require.NoError(t, err) 84 tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) 85 require.NoError(t, err) 86 defer tr.close() 87 88 assertChunksInReader(chunks, tr, assert) 89 90 assert.Equal(string(chunks[0]), mustGetString(assert, context.Background(), tr, chunks[0])) 91 assert.Equal(string(chunks[1]), mustGetString(assert, context.Background(), tr, chunks[1])) 92 assert.Equal(string(chunks[2]), mustGetString(assert, context.Background(), tr, chunks[2])) 93 94 notPresent := [][]byte{ 95 []byte("yo"), 96 []byte("do"), 97 []byte("so much to do"), 98 } 99 100 assertChunksNotInReader(notPresent, tr, assert) 101 102 assert.NotEqual(string(notPresent[0]), mustGetString(assert, context.Background(), tr, notPresent[0])) 103 assert.NotEqual(string(notPresent[1]), mustGetString(assert, context.Background(), tr, notPresent[1])) 104 assert.NotEqual(string(notPresent[2]), mustGetString(assert, context.Background(), tr, notPresent[2])) 105 } 106 107 func assertChunksInReader(chunks [][]byte, r chunkReader, assert *assert.Assertions) { 108 for _, c := range chunks { 109 assert.True(r.has(computeAddr(c))) 110 } 111 } 112 113 func assertChunksNotInReader(chunks [][]byte, r chunkReader, assert *assert.Assertions) { 114 for _, c := range chunks { 115 assert.False(r.has(computeAddr(c))) 116 } 117 } 118 119 func TestHasMany(t *testing.T) { 120 ctx := context.Background() 121 assert := assert.New(t) 122 123 chunks := [][]byte{ 124 []byte("hello2"), 125 []byte("goodbye2"), 126 []byte("badbye2"), 127 } 128 129 tableData, _, err := buildTable(chunks) 130 require.NoError(t, err) 131 ti, err := parseTableIndexByCopy(ctx, tableData, &UnlimitedQuotaProvider{}) 132 require.NoError(t, err) 133 tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) 134 require.NoError(t, err) 135 defer tr.close() 136 137 addrs := hash.HashSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])} 138 hasAddrs := []hasRecord{ 139 {&addrs[0], binary.BigEndian.Uint64(addrs[0][:hash.PrefixLen]), 0, false}, 140 {&addrs[1], binary.BigEndian.Uint64(addrs[1][:hash.PrefixLen]), 1, false}, 141 {&addrs[2], binary.BigEndian.Uint64(addrs[2][:hash.PrefixLen]), 2, false}, 142 } 143 sort.Sort(hasRecordByPrefix(hasAddrs)) 144 145 _, err = tr.hasMany(hasAddrs) 146 require.NoError(t, err) 147 for _, ha := range hasAddrs { 148 assert.True(ha.has, "Nothing for prefix %d", ha.prefix) 149 } 150 } 151 152 func TestHasManySequentialPrefix(t *testing.T) { 153 ctx := context.Background() 154 assert := assert.New(t) 155 156 // Use bogus addrs so we can generate the case of sequentially non-unique prefixes in the index 157 // Note that these are already sorted 158 addrStrings := []string{ 159 "0rfgadopg6h3fk7d253ivbjsij4qo3nv", 160 "0rfgadopg6h3fk7d253ivbjsij4qo4nv", 161 "0rfgadopg6h3fk7d253ivbjsij4qo9nv", 162 } 163 164 addrs := make([]hash.Hash, len(addrStrings)) 165 for i, s := range addrStrings { 166 addrs[i] = hash.Parse(s) 167 } 168 169 bogusData := []byte("bogus") // doesn't matter what this is. hasMany() won't check chunkRecords 170 totalData := uint64(len(bogusData) * len(addrs)) 171 172 capacity := maxTableSize(uint64(len(addrs)), totalData) 173 buff := make([]byte, capacity) 174 tw := newTableWriter(buff, nil) 175 176 for _, a := range addrs { 177 tw.addChunk(a, bogusData) 178 } 179 180 length, _, err := tw.finish() 181 require.NoError(t, err) 182 buff = buff[:length] 183 184 ti, err := parseTableIndexByCopy(ctx, buff, &UnlimitedQuotaProvider{}) 185 require.NoError(t, err) 186 tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) 187 require.NoError(t, err) 188 defer tr.close() 189 190 hasAddrs := make([]hasRecord, 2) 191 // Leave out the first address 192 hasAddrs[0] = hasRecord{&addrs[1], addrs[1].Prefix(), 1, false} 193 hasAddrs[1] = hasRecord{&addrs[2], addrs[2].Prefix(), 2, false} 194 195 _, err = tr.hasMany(hasAddrs) 196 require.NoError(t, err) 197 198 for _, ha := range hasAddrs { 199 assert.True(ha.has, fmt.Sprintf("Nothing for prefix %x\n", ha.prefix)) 200 } 201 } 202 203 func BenchmarkHasMany(b *testing.B) { 204 const cnt = 64 * 1024 205 chnks := make([][]byte, cnt) 206 addrs := make(hash.HashSlice, cnt) 207 hrecs := make([]hasRecord, cnt) 208 sparse := make([]hasRecord, cnt/1024) 209 210 data := make([]byte, cnt*16) 211 rand.Read(data) 212 for i := range chnks { 213 chnks[i] = data[i*16 : (i+1)*16] 214 } 215 for i := range addrs { 216 addrs[i] = computeAddr(chnks[i]) 217 } 218 for i := range hrecs { 219 hrecs[i] = hasRecord{ 220 a: &addrs[i], 221 prefix: addrs[i].Prefix(), 222 order: i, 223 } 224 } 225 for i := range sparse { 226 j := i * 64 227 hrecs[i] = hasRecord{ 228 a: &addrs[j], 229 prefix: addrs[j].Prefix(), 230 order: j, 231 } 232 } 233 sort.Sort(hasRecordByPrefix(hrecs)) 234 sort.Sort(hasRecordByPrefix(sparse)) 235 236 ctx := context.Background() 237 tableData, _, err := buildTable(chnks) 238 require.NoError(b, err) 239 ti, err := parseTableIndexByCopy(ctx, tableData, &UnlimitedQuotaProvider{}) 240 require.NoError(b, err) 241 tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) 242 require.NoError(b, err) 243 defer tr.close() 244 245 b.ResetTimer() 246 b.Run("dense has many", func(b *testing.B) { 247 var ok bool 248 for i := 0; i < b.N; i++ { 249 ok, err = tr.hasMany(hrecs) 250 } 251 assert.False(b, ok) 252 assert.NoError(b, err) 253 }) 254 b.Run("sparse has many", func(b *testing.B) { 255 var ok bool 256 for i := 0; i < b.N; i++ { 257 ok, err = tr.hasMany(sparse) 258 } 259 assert.True(b, ok) 260 assert.NoError(b, err) 261 }) 262 } 263 264 func TestGetMany(t *testing.T) { 265 ctx := context.Background() 266 assert := assert.New(t) 267 268 data := [][]byte{ 269 []byte("hello2"), 270 []byte("goodbye2"), 271 []byte("badbye2"), 272 } 273 274 tableData, _, err := buildTable(data) 275 require.NoError(t, err) 276 ti, err := parseTableIndexByCopy(ctx, tableData, &UnlimitedQuotaProvider{}) 277 require.NoError(t, err) 278 tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) 279 require.NoError(t, err) 280 defer tr.close() 281 282 addrs := hash.HashSlice{computeAddr(data[0]), computeAddr(data[1]), computeAddr(data[2])} 283 getBatch := []getRecord{ 284 {&addrs[0], binary.BigEndian.Uint64(addrs[0][:hash.PrefixLen]), false}, 285 {&addrs[1], binary.BigEndian.Uint64(addrs[1][:hash.PrefixLen]), false}, 286 {&addrs[2], binary.BigEndian.Uint64(addrs[2][:hash.PrefixLen]), false}, 287 } 288 sort.Sort(getRecordByPrefix(getBatch)) 289 290 eg, ctx := errgroup.WithContext(context.Background()) 291 292 got := make([]*chunks.Chunk, 0) 293 _, err = tr.getMany(ctx, eg, getBatch, func(ctx context.Context, c *chunks.Chunk) { got = append(got, c) }, &Stats{}) 294 require.NoError(t, err) 295 require.NoError(t, eg.Wait()) 296 297 assert.True(len(got) == len(getBatch)) 298 } 299 300 func TestCalcReads(t *testing.T) { 301 ctx := context.Background() 302 assert := assert.New(t) 303 304 chunks := [][]byte{ 305 []byte("hello2"), 306 []byte("goodbye2"), 307 []byte("badbye2"), 308 } 309 310 tableData, _, err := buildTable(chunks) 311 require.NoError(t, err) 312 ti, err := parseTableIndexByCopy(ctx, tableData, &UnlimitedQuotaProvider{}) 313 require.NoError(t, err) 314 tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), 0) 315 require.NoError(t, err) 316 defer tr.close() 317 addrs := hash.HashSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])} 318 getBatch := []getRecord{ 319 {&addrs[0], binary.BigEndian.Uint64(addrs[0][:hash.PrefixLen]), false}, 320 {&addrs[1], binary.BigEndian.Uint64(addrs[1][:hash.PrefixLen]), false}, 321 {&addrs[2], binary.BigEndian.Uint64(addrs[2][:hash.PrefixLen]), false}, 322 } 323 324 gb2 := []getRecord{getBatch[0], getBatch[2]} 325 sort.Sort(getRecordByPrefix(getBatch)) 326 327 reads, remaining, err := tr.calcReads(getBatch, 0) 328 require.NoError(t, err) 329 assert.False(remaining) 330 assert.Equal(1, reads) 331 332 sort.Sort(getRecordByPrefix(gb2)) 333 reads, remaining, err = tr.calcReads(gb2, 0) 334 require.NoError(t, err) 335 assert.False(remaining) 336 assert.Equal(2, reads) 337 } 338 339 func TestExtract(t *testing.T) { 340 ctx := context.Background() 341 assert := assert.New(t) 342 343 chunks := [][]byte{ 344 []byte("hello2"), 345 []byte("goodbye2"), 346 []byte("badbye2"), 347 } 348 349 tableData, _, err := buildTable(chunks) 350 require.NoError(t, err) 351 ti, err := parseTableIndexByCopy(ctx, tableData, &UnlimitedQuotaProvider{}) 352 require.NoError(t, err) 353 tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) 354 require.NoError(t, err) 355 defer tr.close() 356 357 addrs := hash.HashSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])} 358 359 chunkChan := make(chan extractRecord) 360 go func() { 361 err := tr.extract(context.Background(), chunkChan) 362 require.NoError(t, err) 363 close(chunkChan) 364 }() 365 366 i := 0 367 for rec := range chunkChan { 368 assert.NotNil(rec.data, "Nothing for", addrs[i]) 369 assert.Equal(addrs[i], rec.a) 370 assert.Equal(chunks[i], rec.data) 371 i++ 372 } 373 } 374 375 func Test65k(t *testing.T) { 376 ctx := context.Background() 377 assert := assert.New(t) 378 379 count := 1 << 16 380 chunks := make([][]byte, count) 381 382 dataFn := func(i int) []byte { 383 return []byte(fmt.Sprintf("data%d", i*2)) 384 } 385 386 for i := 0; i < count; i++ { 387 chunks[i] = dataFn(i) 388 } 389 390 tableData, _, err := buildTable(chunks) 391 require.NoError(t, err) 392 ti, err := parseTableIndexByCopy(ctx, tableData, &UnlimitedQuotaProvider{}) 393 require.NoError(t, err) 394 tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) 395 require.NoError(t, err) 396 defer tr.close() 397 398 for i := 0; i < count; i++ { 399 data := dataFn(i) 400 h := computeAddr(data) 401 assert.True(tr.has(computeAddr(data))) 402 bytes, err := tr.get(context.Background(), h, &Stats{}) 403 require.NoError(t, err) 404 assert.Equal(string(data), string(bytes)) 405 } 406 407 for i := count; i < count*2; i++ { 408 data := dataFn(i) 409 h := computeAddr(data) 410 assert.False(tr.has(computeAddr(data))) 411 bytes, err := tr.get(context.Background(), h, &Stats{}) 412 require.NoError(t, err) 413 assert.NotEqual(string(data), string(bytes)) 414 } 415 } 416 417 // Ensure all addresses share the first 7 bytes. Useful for easily generating tests which have 418 // "prefix" collisions. 419 func computeAddrCommonPrefix(data []byte) hash.Hash { 420 a := computeHashDefault(data) 421 a[0] = 0x01 422 a[1] = 0x23 423 a[2] = 0x45 424 a[3] = 0x67 425 a[4] = 0x89 426 a[5] = 0xab 427 a[6] = 0xcd 428 return a 429 } 430 431 func doTestNGetMany(t *testing.T, count int) { 432 ctx := context.Background() 433 assert := assert.New(t) 434 435 data := make([][]byte, count) 436 437 dataFn := func(i int) []byte { 438 return []byte(fmt.Sprintf("data%d", i*2)) 439 } 440 441 for i := 0; i < count; i++ { 442 data[i] = dataFn(i) 443 } 444 445 tableData, _, err := buildTable(data) 446 require.NoError(t, err) 447 ti, err := parseTableIndexByCopy(ctx, tableData, &UnlimitedQuotaProvider{}) 448 require.NoError(t, err) 449 tr, err := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) 450 require.NoError(t, err) 451 defer tr.close() 452 453 getBatch := make([]getRecord, len(data)) 454 for i := 0; i < count; i++ { 455 a := computeAddr(dataFn(i)) 456 getBatch[i] = getRecord{&a, a.Prefix(), false} 457 } 458 459 sort.Sort(getRecordByPrefix(getBatch)) 460 461 eg, ctx := errgroup.WithContext(context.Background()) 462 463 got := make([]*chunks.Chunk, 0) 464 _, err = tr.getMany(ctx, eg, getBatch, func(ctx context.Context, c *chunks.Chunk) { got = append(got, c) }, &Stats{}) 465 require.NoError(t, err) 466 require.NoError(t, eg.Wait()) 467 468 assert.True(len(got) == len(getBatch)) 469 } 470 471 func Test65kGetMany(t *testing.T) { 472 doTestNGetMany(t, 1<<16) 473 } 474 475 func Test2kGetManyCommonPrefix(t *testing.T) { 476 computeAddr = computeAddrCommonPrefix 477 defer func() { 478 computeAddr = computeHashDefault 479 }() 480 481 doTestNGetMany(t, 1<<11) 482 } 483 484 func TestEmpty(t *testing.T) { 485 assert := assert.New(t) 486 487 buff := make([]byte, footerSize) 488 tw := newTableWriter(buff, nil) 489 length, _, err := tw.finish() 490 require.NoError(t, err) 491 assert.True(length == footerSize) 492 }