github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowcontainer/numbered_row_container_test.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rowcontainer 12 13 import ( 14 "context" 15 "fmt" 16 "math" 17 "math/rand" 18 "sort" 19 "testing" 20 21 "github.com/cockroachdb/cockroach/pkg/base" 22 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/diskmap" 23 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 24 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 25 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 26 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 27 "github.com/cockroachdb/cockroach/pkg/sql/types" 28 "github.com/cockroachdb/cockroach/pkg/storage" 29 "github.com/cockroachdb/cockroach/pkg/util/encoding" 30 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 31 "github.com/cockroachdb/cockroach/pkg/util/mon" 32 "github.com/cockroachdb/cockroach/pkg/util/randutil" 33 "github.com/stretchr/testify/require" 34 ) 35 36 // Tests the de-duping functionality of DiskBackedNumberedRowContainer. 37 func TestNumberedRowContainerDeDuping(t *testing.T) { 38 defer leaktest.AfterTest(t)() 39 40 ctx := context.Background() 41 st := cluster.MakeTestingClusterSettings() 42 evalCtx := tree.MakeTestingEvalContext(st) 43 tempEngine, _, err := storage.NewTempEngine(ctx, storage.DefaultStorageEngine, base.DefaultTestTempStorageConfig(st), base.DefaultTestStoreSpec) 44 if err != nil { 45 t.Fatal(err) 46 } 47 defer tempEngine.Close() 48 49 numRows := 20 50 const numCols = 2 51 const smallMemoryBudget = 40 52 rng, _ := randutil.NewPseudoRand() 53 54 memoryMonitor := mon.MakeMonitor( 55 "test-mem", 56 mon.MemoryResource, 57 nil, /* curCount */ 58 nil, /* maxHist */ 59 -1, /* increment */ 60 math.MaxInt64, /* noteworthy */ 61 st, 62 ) 63 diskMonitor := execinfra.NewTestDiskMonitor(ctx, st) 64 defer diskMonitor.Stop(ctx) 65 66 memoryBudget := math.MaxInt64 67 if rng.Intn(2) == 0 { 68 fmt.Printf("using smallMemoryBudget to spill to disk\n") 69 memoryBudget = smallMemoryBudget 70 } 71 memoryMonitor.Start(ctx, nil, mon.MakeStandaloneBudget(int64(memoryBudget))) 72 defer memoryMonitor.Stop(ctx) 73 74 // Use random types and random rows. 75 types := sqlbase.RandSortingTypes(rng, numCols) 76 ordering := sqlbase.ColumnOrdering{ 77 sqlbase.ColumnOrderInfo{ 78 ColIdx: 0, 79 Direction: encoding.Ascending, 80 }, 81 sqlbase.ColumnOrderInfo{ 82 ColIdx: 1, 83 Direction: encoding.Descending, 84 }, 85 } 86 numRows, rows := makeUniqueRows(t, &evalCtx, rng, numRows, types, ordering) 87 rc := NewDiskBackedNumberedRowContainer( 88 true /*deDup*/, types, &evalCtx, tempEngine, &memoryMonitor, diskMonitor, 89 0 /*rowCapacity*/) 90 defer rc.Close(ctx) 91 92 // Each pass does an UnsafeReset at the end. 93 for passWithReset := 0; passWithReset < 2; passWithReset++ { 94 // Insert rows. 95 for insertPass := 0; insertPass < 2; insertPass++ { 96 for i := 0; i < numRows; i++ { 97 idx, err := rc.AddRow(ctx, rows[i]) 98 require.NoError(t, err) 99 require.Equal(t, i, idx) 100 } 101 } 102 // Random access of the inserted rows. 103 var accesses []int 104 for i := 0; i < 2*numRows; i++ { 105 accesses = append(accesses, rng.Intn(numRows)) 106 } 107 rc.SetupForRead(ctx, [][]int{accesses}) 108 for i := 0; i < len(accesses); i++ { 109 skip := rng.Intn(10) == 0 110 row, err := rc.GetRow(ctx, accesses[i], skip) 111 require.NoError(t, err) 112 if skip { 113 continue 114 } 115 require.Equal(t, rows[accesses[i]].String(types), row.String(types)) 116 } 117 // Reset and reorder the rows for the next pass. 118 rand.Shuffle(numRows, func(i, j int) { 119 rows[i], rows[j] = rows[j], rows[i] 120 }) 121 require.NoError(t, rc.UnsafeReset(ctx)) 122 } 123 } 124 125 // Tests the iterator and iterator caching of DiskBackedNumberedRowContainer. 126 // Does not utilize the de-duping functionality since that is tested 127 // elsewhere. 128 func TestNumberedRowContainerIteratorCaching(t *testing.T) { 129 defer leaktest.AfterTest(t)() 130 131 ctx := context.Background() 132 st := cluster.MakeTestingClusterSettings() 133 evalCtx := tree.MakeTestingEvalContext(st) 134 tempEngine, _, err := storage.NewTempEngine(ctx, storage.DefaultStorageEngine, base.DefaultTestTempStorageConfig(st), base.DefaultTestStoreSpec) 135 if err != nil { 136 t.Fatal(err) 137 } 138 defer tempEngine.Close() 139 140 memoryMonitor := mon.MakeMonitor( 141 "test-mem", 142 mon.MemoryResource, 143 nil, /* curCount */ 144 nil, /* maxHist */ 145 -1, /* increment */ 146 math.MaxInt64, /* noteworthy */ 147 st, 148 ) 149 diskMonitor := execinfra.NewTestDiskMonitor(ctx, st) 150 defer diskMonitor.Stop(ctx) 151 152 numRows := 200 153 const numCols = 2 154 // This memory budget allows for some caching, but typically cannot 155 // cache all the rows. 156 const memoryBudget = 12000 157 memoryMonitor.Start(ctx, nil, mon.MakeStandaloneBudget(memoryBudget)) 158 defer memoryMonitor.Stop(ctx) 159 160 // Use random types and random rows. 161 rng, _ := randutil.NewPseudoRand() 162 163 types := sqlbase.RandSortingTypes(rng, numCols) 164 ordering := sqlbase.ColumnOrdering{ 165 sqlbase.ColumnOrderInfo{ 166 ColIdx: 0, 167 Direction: encoding.Ascending, 168 }, 169 sqlbase.ColumnOrderInfo{ 170 ColIdx: 1, 171 Direction: encoding.Descending, 172 }, 173 } 174 numRows, rows := makeUniqueRows(t, &evalCtx, rng, numRows, types, ordering) 175 rc := NewDiskBackedNumberedRowContainer( 176 false /*deDup*/, types, &evalCtx, tempEngine, &memoryMonitor, diskMonitor, 177 0 /*rowCapacity*/) 178 defer rc.Close(ctx) 179 180 // Each pass does an UnsafeReset at the end. 181 for passWithReset := 0; passWithReset < 2; passWithReset++ { 182 // Insert rows. 183 for i := 0; i < numRows; i++ { 184 idx, err := rc.AddRow(ctx, rows[i]) 185 require.NoError(t, err) 186 require.Equal(t, i, idx) 187 } 188 // We want all the memory to be usable by the cache, so spill to disk. 189 require.NoError(t, rc.testingSpillToDisk(ctx)) 190 require.True(t, rc.UsingDisk()) 191 // Random access of the inserted rows. 192 var accesses [][]int 193 for i := 0; i < 2*numRows; i++ { 194 var access []int 195 for j := 0; j < 4; j++ { 196 access = append(access, rng.Intn(numRows)) 197 } 198 accesses = append(accesses, access) 199 } 200 rc.SetupForRead(ctx, accesses) 201 for _, access := range accesses { 202 for _, index := range access { 203 skip := rng.Intn(10) == 0 204 row, err := rc.GetRow(ctx, index, skip) 205 require.NoError(t, err) 206 if skip { 207 continue 208 } 209 require.Equal(t, rows[index].String(types), row.String(types)) 210 } 211 } 212 fmt.Printf("hits: %d, misses: %d, maxCacheSize: %d\n", 213 rc.rowIter.hitCount, rc.rowIter.missCount, rc.rowIter.maxCacheSize) 214 // Reset and reorder the rows for the next pass. 215 rand.Shuffle(numRows, func(i, j int) { 216 rows[i], rows[j] = rows[j], rows[i] 217 }) 218 require.NoError(t, rc.UnsafeReset(ctx)) 219 } 220 } 221 222 // Tests that the DiskBackedNumberedRowContainer and 223 // DiskBackedIndexedRowContainer return the same results. 224 func TestCompareNumberedAndIndexedRowContainers(t *testing.T) { 225 defer leaktest.AfterTest(t)() 226 227 rng, _ := randutil.NewPseudoRand() 228 229 ctx := context.Background() 230 st := cluster.MakeTestingClusterSettings() 231 evalCtx := tree.MakeTestingEvalContext(st) 232 tempEngine, _, err := storage.NewTempEngine(ctx, storage.DefaultStorageEngine, base.DefaultTestTempStorageConfig(st), base.DefaultTestStoreSpec) 233 if err != nil { 234 t.Fatal(err) 235 } 236 defer tempEngine.Close() 237 238 diskMonitor := execinfra.NewTestDiskMonitor(ctx, st) 239 defer diskMonitor.Stop(ctx) 240 241 numRows := 200 242 const numCols = 2 243 // This memory budget allows for some caching, but typically cannot 244 // cache all the rows. 245 var memoryBudget int64 = 12000 246 if rng.Intn(2) == 0 { 247 memoryBudget = math.MaxInt64 248 } 249 250 // Use random types and random rows. 251 types := sqlbase.RandSortingTypes(rng, numCols) 252 ordering := sqlbase.ColumnOrdering{ 253 sqlbase.ColumnOrderInfo{ 254 ColIdx: 0, 255 Direction: encoding.Ascending, 256 }, 257 sqlbase.ColumnOrderInfo{ 258 ColIdx: 1, 259 Direction: encoding.Descending, 260 }, 261 } 262 numRows, rows := makeUniqueRows(t, &evalCtx, rng, numRows, types, ordering) 263 264 var containers [2]numberedContainer 265 containers[0] = makeNumberedContainerUsingIRC( 266 ctx, t, types, &evalCtx, tempEngine, st, memoryBudget, diskMonitor) 267 containers[1] = makeNumberedContainerUsingNRC( 268 ctx, t, types, &evalCtx, tempEngine, st, memoryBudget, diskMonitor) 269 defer func() { 270 for _, rc := range containers { 271 rc.close(ctx) 272 } 273 }() 274 275 // Each pass does an UnsafeReset at the end. 276 for passWithReset := 0; passWithReset < 2; passWithReset++ { 277 // Insert rows. 278 for i := 0; i < numRows; i++ { 279 for _, rc := range containers { 280 err := rc.addRow(ctx, rows[i]) 281 require.NoError(t, err) 282 } 283 } 284 // We want all the memory to be usable by the cache, so spill to disk. 285 if memoryBudget != math.MaxInt64 { 286 for _, rc := range containers { 287 require.NoError(t, rc.spillToDisk(ctx)) 288 } 289 } 290 291 // Random access of the inserted rows. 292 var accesses [][]int 293 for i := 0; i < 2*numRows; i++ { 294 var access []int 295 for j := 0; j < 4; j++ { 296 access = append(access, rng.Intn(numRows)) 297 } 298 accesses = append(accesses, access) 299 } 300 for _, rc := range containers { 301 rc.setupForRead(ctx, accesses) 302 } 303 for _, access := range accesses { 304 for _, index := range access { 305 skip := rng.Intn(10) == 0 306 var rows [2]sqlbase.EncDatumRow 307 for i, rc := range containers { 308 row, err := rc.getRow(ctx, index, skip) 309 require.NoError(t, err) 310 rows[i] = row 311 } 312 if skip { 313 continue 314 } 315 require.Equal(t, rows[0].String(types), rows[1].String(types)) 316 } 317 } 318 // Reset and reorder the rows for the next pass. 319 rand.Shuffle(numRows, func(i, j int) { 320 rows[i], rows[j] = rows[j], rows[i] 321 }) 322 for _, rc := range containers { 323 require.NoError(t, rc.unsafeReset(ctx)) 324 } 325 } 326 } 327 328 // Adapter interface that can be implemented using both DiskBackedNumberedRowContainer 329 // and DiskBackedIndexedRowContainer. 330 type numberedContainer interface { 331 addRow(context.Context, sqlbase.EncDatumRow) error 332 setupForRead(ctx context.Context, accesses [][]int) 333 getRow(ctx context.Context, idx int, skip bool) (sqlbase.EncDatumRow, error) 334 spillToDisk(context.Context) error 335 unsafeReset(context.Context) error 336 close(context.Context) 337 } 338 339 type numberedContainerUsingNRC struct { 340 rc *DiskBackedNumberedRowContainer 341 memoryMonitor *mon.BytesMonitor 342 } 343 344 func (d numberedContainerUsingNRC) addRow(ctx context.Context, row sqlbase.EncDatumRow) error { 345 _, err := d.rc.AddRow(ctx, row) 346 return err 347 } 348 func (d numberedContainerUsingNRC) setupForRead(ctx context.Context, accesses [][]int) { 349 d.rc.SetupForRead(ctx, accesses) 350 } 351 func (d numberedContainerUsingNRC) getRow( 352 ctx context.Context, idx int, skip bool, 353 ) (sqlbase.EncDatumRow, error) { 354 return d.rc.GetRow(ctx, idx, false) 355 } 356 func (d numberedContainerUsingNRC) spillToDisk(ctx context.Context) error { 357 return d.rc.testingSpillToDisk(ctx) 358 } 359 func (d numberedContainerUsingNRC) unsafeReset(ctx context.Context) error { 360 return d.rc.UnsafeReset(ctx) 361 } 362 func (d numberedContainerUsingNRC) close(ctx context.Context) { 363 d.rc.Close(ctx) 364 d.memoryMonitor.Stop(ctx) 365 } 366 func makeNumberedContainerUsingNRC( 367 ctx context.Context, 368 t testing.TB, 369 types []*types.T, 370 evalCtx *tree.EvalContext, 371 engine diskmap.Factory, 372 st *cluster.Settings, 373 memoryBudget int64, 374 diskMonitor *mon.BytesMonitor, 375 ) numberedContainerUsingNRC { 376 memoryMonitor := makeMemMonitorAndStart(ctx, st, memoryBudget) 377 rc := NewDiskBackedNumberedRowContainer( 378 false /* deDup */, types, evalCtx, engine, memoryMonitor, diskMonitor, 0 /* rowCapacity */) 379 require.NoError(t, rc.testingSpillToDisk(ctx)) 380 return numberedContainerUsingNRC{rc: rc, memoryMonitor: memoryMonitor} 381 } 382 383 type numberedContainerUsingIRC struct { 384 rc *DiskBackedIndexedRowContainer 385 memoryMonitor *mon.BytesMonitor 386 } 387 388 func (d numberedContainerUsingIRC) addRow(ctx context.Context, row sqlbase.EncDatumRow) error { 389 return d.rc.AddRow(ctx, row) 390 } 391 func (d numberedContainerUsingIRC) setupForRead(context.Context, [][]int) {} 392 func (d numberedContainerUsingIRC) getRow( 393 ctx context.Context, idx int, skip bool, 394 ) (sqlbase.EncDatumRow, error) { 395 if skip { 396 return nil, nil 397 } 398 row, err := d.rc.GetRow(ctx, idx) 399 if err != nil { 400 return nil, err 401 } 402 return row.(IndexedRow).Row, nil 403 } 404 func (d numberedContainerUsingIRC) spillToDisk(ctx context.Context) error { 405 if d.rc.UsingDisk() { 406 return nil 407 } 408 return d.rc.SpillToDisk(ctx) 409 } 410 func (d numberedContainerUsingIRC) unsafeReset(ctx context.Context) error { 411 return d.rc.UnsafeReset(ctx) 412 } 413 func (d numberedContainerUsingIRC) close(ctx context.Context) { 414 d.rc.Close(ctx) 415 d.memoryMonitor.Stop(ctx) 416 } 417 func makeNumberedContainerUsingIRC( 418 ctx context.Context, 419 t require.TestingT, 420 types []*types.T, 421 evalCtx *tree.EvalContext, 422 engine diskmap.Factory, 423 st *cluster.Settings, 424 memoryBudget int64, 425 diskMonitor *mon.BytesMonitor, 426 ) numberedContainerUsingIRC { 427 memoryMonitor := makeMemMonitorAndStart(ctx, st, memoryBudget) 428 rc := NewDiskBackedIndexedRowContainer( 429 nil /* ordering */, types, evalCtx, engine, memoryMonitor, diskMonitor, 0 /* rowCapacity */) 430 require.NoError(t, rc.SpillToDisk(ctx)) 431 return numberedContainerUsingIRC{rc: rc, memoryMonitor: memoryMonitor} 432 } 433 434 func makeMemMonitorAndStart( 435 ctx context.Context, st *cluster.Settings, budget int64, 436 ) *mon.BytesMonitor { 437 memoryMonitor := mon.MakeMonitor( 438 "test-mem", 439 mon.MemoryResource, 440 nil, /* curCount */ 441 nil, /* maxHist */ 442 -1, /* increment */ 443 math.MaxInt64, /* noteworthy */ 444 st, 445 ) 446 memoryMonitor.Start(ctx, nil, mon.MakeStandaloneBudget(budget)) 447 return &memoryMonitor 448 } 449 450 // Assume that join is using a batch of 100 left rows. 451 const leftRowsBatch = 100 452 453 // repeatAccesses is the number of times on average that each right row is accessed. 454 func generateLookupJoinAccessPattern( 455 rng *rand.Rand, rightRowsReadPerLeftRow int, repeatAccesses int, 456 ) [][]int { 457 // Unique rows accessed. 458 numRowsAccessed := (leftRowsBatch * rightRowsReadPerLeftRow) / repeatAccesses 459 out := make([][]int, leftRowsBatch) 460 for i := 0; i < len(out); i++ { 461 // Each left row sees a contiguous sequence of rows on the right since the 462 // rows are being retrieved and stored in the container in index order. 463 start := rng.Intn(numRowsAccessed - rightRowsReadPerLeftRow) 464 out[i] = make([]int, rightRowsReadPerLeftRow) 465 for j := start; j < start+rightRowsReadPerLeftRow; j++ { 466 out[i][j-start] = j 467 } 468 } 469 return out 470 } 471 472 // numRightRows is the number of rows in the container, of which a certain 473 // fraction of rows are accessed randomly (when using an inverted index for 474 // intersection the result set can be sparse). 475 // repeatAccesses is the number of times on average that each right row is accessed. 476 func generateInvertedJoinAccessPattern( 477 b *testing.B, rng *rand.Rand, numRightRows int, rightRowsReadPerLeftRow int, repeatAccesses int, 478 ) [][]int { 479 // Unique rows accessed. 480 numRowsAccessed := (leftRowsBatch * rightRowsReadPerLeftRow) / repeatAccesses 481 // Don't want each left row to access most of the right rows. 482 require.True(b, rightRowsReadPerLeftRow < numRowsAccessed/2) 483 accessedIndexes := make(map[int]struct{}) 484 for len(accessedIndexes) < numRowsAccessed { 485 accessedIndexes[rng.Intn(numRightRows)] = struct{}{} 486 } 487 accessedRightRows := make([]int, 0, numRowsAccessed) 488 for k := range accessedIndexes { 489 accessedRightRows = append(accessedRightRows, k) 490 } 491 out := make([][]int, leftRowsBatch) 492 for i := 0; i < len(out); i++ { 493 out[i] = make([]int, 0, rightRowsReadPerLeftRow) 494 uniqueRows := make(map[int]struct{}) 495 for len(uniqueRows) < rightRowsReadPerLeftRow { 496 idx := rng.Intn(len(accessedRightRows)) 497 if _, notUnique := uniqueRows[idx]; notUnique { 498 continue 499 } 500 uniqueRows[idx] = struct{}{} 501 out[i] = append(out[i], accessedRightRows[idx]) 502 } 503 // Sort since accesses by a left row are in ascending order. 504 sort.Slice(out[i], func(a, b int) bool { 505 return out[i][a] < out[i][b] 506 }) 507 } 508 return out 509 } 510 511 func accessPatternForBenchmarkIterations(totalAccesses int, accessPattern [][]int) [][]int { 512 var out [][]int 513 var i, j int 514 for count := 0; count < totalAccesses; { 515 if i >= len(accessPattern) { 516 i = 0 517 continue 518 } 519 if j >= len(accessPattern[i]) { 520 j = 0 521 i++ 522 continue 523 } 524 if j == 0 { 525 out = append(out, []int(nil)) 526 } 527 last := len(out) - 1 528 out[last] = append(out[last], accessPattern[i][j]) 529 count++ 530 j++ 531 } 532 return out 533 } 534 535 func BenchmarkNumberedContainerIteratorCaching(b *testing.B) { 536 const numRows = 10000 537 538 ctx := context.Background() 539 st := cluster.MakeTestingClusterSettings() 540 evalCtx := tree.MakeTestingEvalContext(st) 541 tempEngine, _, err := storage.NewTempEngine(ctx, storage.DefaultStorageEngine, base.TempStorageConfig{InMemory: true}, base.DefaultTestStoreSpec) 542 if err != nil { 543 b.Fatal(err) 544 } 545 defer tempEngine.Close() 546 547 diskMonitor := execinfra.NewTestDiskMonitor(ctx, st) 548 defer diskMonitor.Stop(ctx) 549 550 // Each row is 10 string columns. Each string has a mean length of 5, and the 551 // row encoded into bytes is ~64 bytes. So we approximate ~512 rows per ssblock. 552 // The in-memory decoded footprint in the cache is ~780 bytes. 553 var typs []*types.T 554 for i := 0; i < 10; i++ { 555 typs = append(typs, types.String) 556 } 557 rng, _ := randutil.NewPseudoRand() 558 rows := make([]sqlbase.EncDatumRow, numRows) 559 for i := 0; i < numRows; i++ { 560 rows[i] = make([]sqlbase.EncDatum, len(typs)) 561 for j := range typs { 562 rows[i][j] = sqlbase.DatumToEncDatum(typs[j], sqlbase.RandDatum(rng, typs[j], false)) 563 } 564 } 565 566 type accessPattern struct { 567 joinType string 568 paramStr string 569 accesses [][]int 570 } 571 var accessPatterns []accessPattern 572 // Lookup join access patterns. The highest number of unique rows accessed is 573 // when rightRowsReadPerLeftRow = 64 and repeatAccesses = 1, which with a left 574 // batch of 100 is 100 * 64 / 1 = 6400 rows accessed. The container has 575 // 10000 rows. If N unique rows are accessed these form a prefix of the rows 576 // in the container. 577 for _, rightRowsReadPerLeftRow := range []int{1, 2, 4, 8, 16, 32, 64} { 578 for _, repeatAccesses := range []int{1, 2} { 579 accessPatterns = append(accessPatterns, accessPattern{ 580 joinType: "lookup-join", 581 paramStr: fmt.Sprintf("matchRatio=%d/repeatAccesses=%d", 582 rightRowsReadPerLeftRow, repeatAccesses), 583 accesses: generateLookupJoinAccessPattern(rng, rightRowsReadPerLeftRow, repeatAccesses), 584 }) 585 } 586 } 587 // Inverted join access patterns. 588 // With a left batch of 100 rows, and rightRowsReadPerLeftRow = (25, 50, 100), the 589 // total accesses are (2500, 5000, 10000). Consider repeatAccesses = 2: the unique 590 // rows accessed are (1250, 2500, 5000), which will be randomly distributed over the 591 // 10000 rows. 592 for _, rightRowsReadPerLeftRow := range []int{1, 25, 50, 100} { 593 for _, repeatAccesses := range []int{1, 2, 4, 8} { 594 accessPatterns = append(accessPatterns, accessPattern{ 595 joinType: "inverted-join", 596 paramStr: fmt.Sprintf("matchRatio=%d/repeatAccesses=%d", 597 rightRowsReadPerLeftRow, repeatAccesses), 598 accesses: generateInvertedJoinAccessPattern( 599 b, rng, numRows, rightRowsReadPerLeftRow, repeatAccesses), 600 }) 601 } 602 } 603 604 // Observed cache behavior for a particular access pattern for each kind of 605 // join, to give some insight into performance. 606 // - The inverted join pattern has poor locality and the IndexedRowContainer 607 // does poorly. The NumberedRowContainer is able to use the knowledge that 608 // many rows will never be accessed. 609 // 11000 100KB 500KB 2.5MB 610 // IndexedRowContainer 0.00 0.00 0.00 0.00 611 // NumberedRowContainer 0.22 0.68 0.88 1.00 612 // - The lookup join access pattern and observed hit rates. The better 613 // locality improves the behavior of the IndexedRowContainer, but it 614 // is still significantly worse than the NumberedRowContainer. 615 // 11000 100KB 500KB 2.5MB 616 // IndexedRowContainer 0.00 0.00 0.10 0.35 617 // NumberedRowContainer 0.01 0.09 0.28 0.63 618 619 for _, pattern := range accessPatterns { 620 // Approx cache capacity in rows with these settings: 13, 132, 666, 3300. 621 for _, memoryBudget := range []int64{11000, 100 << 10, 500 << 10, 2500 << 10} { 622 for _, containerKind := range []string{"indexed", "numbered"} { 623 b.Run(fmt.Sprintf("%s/%s/mem=%d/%s", pattern.joinType, pattern.paramStr, memoryBudget, 624 containerKind), func(b *testing.B) { 625 var nc numberedContainer 626 switch containerKind { 627 case "indexed": 628 nc = makeNumberedContainerUsingIRC( 629 ctx, b, typs, &evalCtx, tempEngine, st, memoryBudget, diskMonitor) 630 case "numbered": 631 nc = makeNumberedContainerUsingNRC( 632 ctx, b, typs, &evalCtx, tempEngine, st, memoryBudget, diskMonitor) 633 } 634 defer nc.close(ctx) 635 for i := 0; i < len(rows); i++ { 636 require.NoError(b, nc.addRow(ctx, rows[i])) 637 } 638 accesses := accessPatternForBenchmarkIterations(b.N, pattern.accesses) 639 b.ResetTimer() 640 nc.setupForRead(ctx, accesses) 641 for i := 0; i < len(accesses); i++ { 642 for j := 0; j < len(accesses[i]); j++ { 643 if _, err := nc.getRow(ctx, accesses[i][j], false /* skip */); err != nil { 644 b.Fatal(err) 645 } 646 } 647 } 648 b.StopTimer() 649 // Disabled code block. Change to true to look at hit ratio and cache sizes 650 // for these benchmarks. 651 if false { 652 // Print statements for understanding the performance differences. 653 fmt.Printf("\n**%s/%s/%d/%s: iters: %d\n", pattern.joinType, pattern.paramStr, memoryBudget, containerKind, b.N) 654 switch rc := nc.(type) { 655 case numberedContainerUsingNRC: 656 fmt.Printf("hit rate: %.2f, maxCacheSize: %d\n", 657 float64(rc.rc.rowIter.hitCount)/float64(rc.rc.rowIter.missCount+rc.rc.rowIter.hitCount), 658 rc.rc.rowIter.maxCacheSize) 659 case numberedContainerUsingIRC: 660 fmt.Printf("hit rate: %.2f, maxCacheSize: %d\n", 661 float64(rc.rc.hitCount)/float64(rc.rc.missCount+rc.rc.hitCount), 662 rc.rc.maxCacheSize) 663 } 664 } 665 }) 666 } 667 } 668 } 669 } 670 671 // TODO(sumeer): 672 // - Benchmarks: 673 // - de-duping with and without spilling.