github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowcontainer/numbered_row_container_test.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rowcontainer
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"math"
    17  	"math/rand"
    18  	"sort"
    19  	"testing"
    20  
    21  	"github.com/cockroachdb/cockroach/pkg/base"
    22  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/diskmap"
    23  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    27  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    28  	"github.com/cockroachdb/cockroach/pkg/storage"
    29  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    30  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    31  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    32  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    33  	"github.com/stretchr/testify/require"
    34  )
    35  
    36  // Tests the de-duping functionality of DiskBackedNumberedRowContainer.
    37  func TestNumberedRowContainerDeDuping(t *testing.T) {
    38  	defer leaktest.AfterTest(t)()
    39  
    40  	ctx := context.Background()
    41  	st := cluster.MakeTestingClusterSettings()
    42  	evalCtx := tree.MakeTestingEvalContext(st)
    43  	tempEngine, _, err := storage.NewTempEngine(ctx, storage.DefaultStorageEngine, base.DefaultTestTempStorageConfig(st), base.DefaultTestStoreSpec)
    44  	if err != nil {
    45  		t.Fatal(err)
    46  	}
    47  	defer tempEngine.Close()
    48  
    49  	numRows := 20
    50  	const numCols = 2
    51  	const smallMemoryBudget = 40
    52  	rng, _ := randutil.NewPseudoRand()
    53  
    54  	memoryMonitor := mon.MakeMonitor(
    55  		"test-mem",
    56  		mon.MemoryResource,
    57  		nil,           /* curCount */
    58  		nil,           /* maxHist */
    59  		-1,            /* increment */
    60  		math.MaxInt64, /* noteworthy */
    61  		st,
    62  	)
    63  	diskMonitor := execinfra.NewTestDiskMonitor(ctx, st)
    64  	defer diskMonitor.Stop(ctx)
    65  
    66  	memoryBudget := math.MaxInt64
    67  	if rng.Intn(2) == 0 {
    68  		fmt.Printf("using smallMemoryBudget to spill to disk\n")
    69  		memoryBudget = smallMemoryBudget
    70  	}
    71  	memoryMonitor.Start(ctx, nil, mon.MakeStandaloneBudget(int64(memoryBudget)))
    72  	defer memoryMonitor.Stop(ctx)
    73  
    74  	// Use random types and random rows.
    75  	types := sqlbase.RandSortingTypes(rng, numCols)
    76  	ordering := sqlbase.ColumnOrdering{
    77  		sqlbase.ColumnOrderInfo{
    78  			ColIdx:    0,
    79  			Direction: encoding.Ascending,
    80  		},
    81  		sqlbase.ColumnOrderInfo{
    82  			ColIdx:    1,
    83  			Direction: encoding.Descending,
    84  		},
    85  	}
    86  	numRows, rows := makeUniqueRows(t, &evalCtx, rng, numRows, types, ordering)
    87  	rc := NewDiskBackedNumberedRowContainer(
    88  		true /*deDup*/, types, &evalCtx, tempEngine, &memoryMonitor, diskMonitor,
    89  		0 /*rowCapacity*/)
    90  	defer rc.Close(ctx)
    91  
    92  	// Each pass does an UnsafeReset at the end.
    93  	for passWithReset := 0; passWithReset < 2; passWithReset++ {
    94  		// Insert rows.
    95  		for insertPass := 0; insertPass < 2; insertPass++ {
    96  			for i := 0; i < numRows; i++ {
    97  				idx, err := rc.AddRow(ctx, rows[i])
    98  				require.NoError(t, err)
    99  				require.Equal(t, i, idx)
   100  			}
   101  		}
   102  		// Random access of the inserted rows.
   103  		var accesses []int
   104  		for i := 0; i < 2*numRows; i++ {
   105  			accesses = append(accesses, rng.Intn(numRows))
   106  		}
   107  		rc.SetupForRead(ctx, [][]int{accesses})
   108  		for i := 0; i < len(accesses); i++ {
   109  			skip := rng.Intn(10) == 0
   110  			row, err := rc.GetRow(ctx, accesses[i], skip)
   111  			require.NoError(t, err)
   112  			if skip {
   113  				continue
   114  			}
   115  			require.Equal(t, rows[accesses[i]].String(types), row.String(types))
   116  		}
   117  		// Reset and reorder the rows for the next pass.
   118  		rand.Shuffle(numRows, func(i, j int) {
   119  			rows[i], rows[j] = rows[j], rows[i]
   120  		})
   121  		require.NoError(t, rc.UnsafeReset(ctx))
   122  	}
   123  }
   124  
   125  // Tests the iterator and iterator caching of DiskBackedNumberedRowContainer.
   126  // Does not utilize the de-duping functionality since that is tested
   127  // elsewhere.
   128  func TestNumberedRowContainerIteratorCaching(t *testing.T) {
   129  	defer leaktest.AfterTest(t)()
   130  
   131  	ctx := context.Background()
   132  	st := cluster.MakeTestingClusterSettings()
   133  	evalCtx := tree.MakeTestingEvalContext(st)
   134  	tempEngine, _, err := storage.NewTempEngine(ctx, storage.DefaultStorageEngine, base.DefaultTestTempStorageConfig(st), base.DefaultTestStoreSpec)
   135  	if err != nil {
   136  		t.Fatal(err)
   137  	}
   138  	defer tempEngine.Close()
   139  
   140  	memoryMonitor := mon.MakeMonitor(
   141  		"test-mem",
   142  		mon.MemoryResource,
   143  		nil,           /* curCount */
   144  		nil,           /* maxHist */
   145  		-1,            /* increment */
   146  		math.MaxInt64, /* noteworthy */
   147  		st,
   148  	)
   149  	diskMonitor := execinfra.NewTestDiskMonitor(ctx, st)
   150  	defer diskMonitor.Stop(ctx)
   151  
   152  	numRows := 200
   153  	const numCols = 2
   154  	// This memory budget allows for some caching, but typically cannot
   155  	// cache all the rows.
   156  	const memoryBudget = 12000
   157  	memoryMonitor.Start(ctx, nil, mon.MakeStandaloneBudget(memoryBudget))
   158  	defer memoryMonitor.Stop(ctx)
   159  
   160  	// Use random types and random rows.
   161  	rng, _ := randutil.NewPseudoRand()
   162  
   163  	types := sqlbase.RandSortingTypes(rng, numCols)
   164  	ordering := sqlbase.ColumnOrdering{
   165  		sqlbase.ColumnOrderInfo{
   166  			ColIdx:    0,
   167  			Direction: encoding.Ascending,
   168  		},
   169  		sqlbase.ColumnOrderInfo{
   170  			ColIdx:    1,
   171  			Direction: encoding.Descending,
   172  		},
   173  	}
   174  	numRows, rows := makeUniqueRows(t, &evalCtx, rng, numRows, types, ordering)
   175  	rc := NewDiskBackedNumberedRowContainer(
   176  		false /*deDup*/, types, &evalCtx, tempEngine, &memoryMonitor, diskMonitor,
   177  		0 /*rowCapacity*/)
   178  	defer rc.Close(ctx)
   179  
   180  	// Each pass does an UnsafeReset at the end.
   181  	for passWithReset := 0; passWithReset < 2; passWithReset++ {
   182  		// Insert rows.
   183  		for i := 0; i < numRows; i++ {
   184  			idx, err := rc.AddRow(ctx, rows[i])
   185  			require.NoError(t, err)
   186  			require.Equal(t, i, idx)
   187  		}
   188  		// We want all the memory to be usable by the cache, so spill to disk.
   189  		require.NoError(t, rc.testingSpillToDisk(ctx))
   190  		require.True(t, rc.UsingDisk())
   191  		// Random access of the inserted rows.
   192  		var accesses [][]int
   193  		for i := 0; i < 2*numRows; i++ {
   194  			var access []int
   195  			for j := 0; j < 4; j++ {
   196  				access = append(access, rng.Intn(numRows))
   197  			}
   198  			accesses = append(accesses, access)
   199  		}
   200  		rc.SetupForRead(ctx, accesses)
   201  		for _, access := range accesses {
   202  			for _, index := range access {
   203  				skip := rng.Intn(10) == 0
   204  				row, err := rc.GetRow(ctx, index, skip)
   205  				require.NoError(t, err)
   206  				if skip {
   207  					continue
   208  				}
   209  				require.Equal(t, rows[index].String(types), row.String(types))
   210  			}
   211  		}
   212  		fmt.Printf("hits: %d, misses: %d, maxCacheSize: %d\n",
   213  			rc.rowIter.hitCount, rc.rowIter.missCount, rc.rowIter.maxCacheSize)
   214  		// Reset and reorder the rows for the next pass.
   215  		rand.Shuffle(numRows, func(i, j int) {
   216  			rows[i], rows[j] = rows[j], rows[i]
   217  		})
   218  		require.NoError(t, rc.UnsafeReset(ctx))
   219  	}
   220  }
   221  
   222  // Tests that the DiskBackedNumberedRowContainer and
   223  // DiskBackedIndexedRowContainer return the same results.
   224  func TestCompareNumberedAndIndexedRowContainers(t *testing.T) {
   225  	defer leaktest.AfterTest(t)()
   226  
   227  	rng, _ := randutil.NewPseudoRand()
   228  
   229  	ctx := context.Background()
   230  	st := cluster.MakeTestingClusterSettings()
   231  	evalCtx := tree.MakeTestingEvalContext(st)
   232  	tempEngine, _, err := storage.NewTempEngine(ctx, storage.DefaultStorageEngine, base.DefaultTestTempStorageConfig(st), base.DefaultTestStoreSpec)
   233  	if err != nil {
   234  		t.Fatal(err)
   235  	}
   236  	defer tempEngine.Close()
   237  
   238  	diskMonitor := execinfra.NewTestDiskMonitor(ctx, st)
   239  	defer diskMonitor.Stop(ctx)
   240  
   241  	numRows := 200
   242  	const numCols = 2
   243  	// This memory budget allows for some caching, but typically cannot
   244  	// cache all the rows.
   245  	var memoryBudget int64 = 12000
   246  	if rng.Intn(2) == 0 {
   247  		memoryBudget = math.MaxInt64
   248  	}
   249  
   250  	// Use random types and random rows.
   251  	types := sqlbase.RandSortingTypes(rng, numCols)
   252  	ordering := sqlbase.ColumnOrdering{
   253  		sqlbase.ColumnOrderInfo{
   254  			ColIdx:    0,
   255  			Direction: encoding.Ascending,
   256  		},
   257  		sqlbase.ColumnOrderInfo{
   258  			ColIdx:    1,
   259  			Direction: encoding.Descending,
   260  		},
   261  	}
   262  	numRows, rows := makeUniqueRows(t, &evalCtx, rng, numRows, types, ordering)
   263  
   264  	var containers [2]numberedContainer
   265  	containers[0] = makeNumberedContainerUsingIRC(
   266  		ctx, t, types, &evalCtx, tempEngine, st, memoryBudget, diskMonitor)
   267  	containers[1] = makeNumberedContainerUsingNRC(
   268  		ctx, t, types, &evalCtx, tempEngine, st, memoryBudget, diskMonitor)
   269  	defer func() {
   270  		for _, rc := range containers {
   271  			rc.close(ctx)
   272  		}
   273  	}()
   274  
   275  	// Each pass does an UnsafeReset at the end.
   276  	for passWithReset := 0; passWithReset < 2; passWithReset++ {
   277  		// Insert rows.
   278  		for i := 0; i < numRows; i++ {
   279  			for _, rc := range containers {
   280  				err := rc.addRow(ctx, rows[i])
   281  				require.NoError(t, err)
   282  			}
   283  		}
   284  		// We want all the memory to be usable by the cache, so spill to disk.
   285  		if memoryBudget != math.MaxInt64 {
   286  			for _, rc := range containers {
   287  				require.NoError(t, rc.spillToDisk(ctx))
   288  			}
   289  		}
   290  
   291  		// Random access of the inserted rows.
   292  		var accesses [][]int
   293  		for i := 0; i < 2*numRows; i++ {
   294  			var access []int
   295  			for j := 0; j < 4; j++ {
   296  				access = append(access, rng.Intn(numRows))
   297  			}
   298  			accesses = append(accesses, access)
   299  		}
   300  		for _, rc := range containers {
   301  			rc.setupForRead(ctx, accesses)
   302  		}
   303  		for _, access := range accesses {
   304  			for _, index := range access {
   305  				skip := rng.Intn(10) == 0
   306  				var rows [2]sqlbase.EncDatumRow
   307  				for i, rc := range containers {
   308  					row, err := rc.getRow(ctx, index, skip)
   309  					require.NoError(t, err)
   310  					rows[i] = row
   311  				}
   312  				if skip {
   313  					continue
   314  				}
   315  				require.Equal(t, rows[0].String(types), rows[1].String(types))
   316  			}
   317  		}
   318  		// Reset and reorder the rows for the next pass.
   319  		rand.Shuffle(numRows, func(i, j int) {
   320  			rows[i], rows[j] = rows[j], rows[i]
   321  		})
   322  		for _, rc := range containers {
   323  			require.NoError(t, rc.unsafeReset(ctx))
   324  		}
   325  	}
   326  }
   327  
   328  // Adapter interface that can be implemented using both DiskBackedNumberedRowContainer
   329  // and DiskBackedIndexedRowContainer.
   330  type numberedContainer interface {
   331  	addRow(context.Context, sqlbase.EncDatumRow) error
   332  	setupForRead(ctx context.Context, accesses [][]int)
   333  	getRow(ctx context.Context, idx int, skip bool) (sqlbase.EncDatumRow, error)
   334  	spillToDisk(context.Context) error
   335  	unsafeReset(context.Context) error
   336  	close(context.Context)
   337  }
   338  
   339  type numberedContainerUsingNRC struct {
   340  	rc            *DiskBackedNumberedRowContainer
   341  	memoryMonitor *mon.BytesMonitor
   342  }
   343  
   344  func (d numberedContainerUsingNRC) addRow(ctx context.Context, row sqlbase.EncDatumRow) error {
   345  	_, err := d.rc.AddRow(ctx, row)
   346  	return err
   347  }
   348  func (d numberedContainerUsingNRC) setupForRead(ctx context.Context, accesses [][]int) {
   349  	d.rc.SetupForRead(ctx, accesses)
   350  }
   351  func (d numberedContainerUsingNRC) getRow(
   352  	ctx context.Context, idx int, skip bool,
   353  ) (sqlbase.EncDatumRow, error) {
   354  	return d.rc.GetRow(ctx, idx, false)
   355  }
   356  func (d numberedContainerUsingNRC) spillToDisk(ctx context.Context) error {
   357  	return d.rc.testingSpillToDisk(ctx)
   358  }
   359  func (d numberedContainerUsingNRC) unsafeReset(ctx context.Context) error {
   360  	return d.rc.UnsafeReset(ctx)
   361  }
   362  func (d numberedContainerUsingNRC) close(ctx context.Context) {
   363  	d.rc.Close(ctx)
   364  	d.memoryMonitor.Stop(ctx)
   365  }
   366  func makeNumberedContainerUsingNRC(
   367  	ctx context.Context,
   368  	t testing.TB,
   369  	types []*types.T,
   370  	evalCtx *tree.EvalContext,
   371  	engine diskmap.Factory,
   372  	st *cluster.Settings,
   373  	memoryBudget int64,
   374  	diskMonitor *mon.BytesMonitor,
   375  ) numberedContainerUsingNRC {
   376  	memoryMonitor := makeMemMonitorAndStart(ctx, st, memoryBudget)
   377  	rc := NewDiskBackedNumberedRowContainer(
   378  		false /* deDup */, types, evalCtx, engine, memoryMonitor, diskMonitor, 0 /* rowCapacity */)
   379  	require.NoError(t, rc.testingSpillToDisk(ctx))
   380  	return numberedContainerUsingNRC{rc: rc, memoryMonitor: memoryMonitor}
   381  }
   382  
   383  type numberedContainerUsingIRC struct {
   384  	rc            *DiskBackedIndexedRowContainer
   385  	memoryMonitor *mon.BytesMonitor
   386  }
   387  
   388  func (d numberedContainerUsingIRC) addRow(ctx context.Context, row sqlbase.EncDatumRow) error {
   389  	return d.rc.AddRow(ctx, row)
   390  }
   391  func (d numberedContainerUsingIRC) setupForRead(context.Context, [][]int) {}
   392  func (d numberedContainerUsingIRC) getRow(
   393  	ctx context.Context, idx int, skip bool,
   394  ) (sqlbase.EncDatumRow, error) {
   395  	if skip {
   396  		return nil, nil
   397  	}
   398  	row, err := d.rc.GetRow(ctx, idx)
   399  	if err != nil {
   400  		return nil, err
   401  	}
   402  	return row.(IndexedRow).Row, nil
   403  }
   404  func (d numberedContainerUsingIRC) spillToDisk(ctx context.Context) error {
   405  	if d.rc.UsingDisk() {
   406  		return nil
   407  	}
   408  	return d.rc.SpillToDisk(ctx)
   409  }
   410  func (d numberedContainerUsingIRC) unsafeReset(ctx context.Context) error {
   411  	return d.rc.UnsafeReset(ctx)
   412  }
   413  func (d numberedContainerUsingIRC) close(ctx context.Context) {
   414  	d.rc.Close(ctx)
   415  	d.memoryMonitor.Stop(ctx)
   416  }
   417  func makeNumberedContainerUsingIRC(
   418  	ctx context.Context,
   419  	t require.TestingT,
   420  	types []*types.T,
   421  	evalCtx *tree.EvalContext,
   422  	engine diskmap.Factory,
   423  	st *cluster.Settings,
   424  	memoryBudget int64,
   425  	diskMonitor *mon.BytesMonitor,
   426  ) numberedContainerUsingIRC {
   427  	memoryMonitor := makeMemMonitorAndStart(ctx, st, memoryBudget)
   428  	rc := NewDiskBackedIndexedRowContainer(
   429  		nil /* ordering */, types, evalCtx, engine, memoryMonitor, diskMonitor, 0 /* rowCapacity */)
   430  	require.NoError(t, rc.SpillToDisk(ctx))
   431  	return numberedContainerUsingIRC{rc: rc, memoryMonitor: memoryMonitor}
   432  }
   433  
   434  func makeMemMonitorAndStart(
   435  	ctx context.Context, st *cluster.Settings, budget int64,
   436  ) *mon.BytesMonitor {
   437  	memoryMonitor := mon.MakeMonitor(
   438  		"test-mem",
   439  		mon.MemoryResource,
   440  		nil,           /* curCount */
   441  		nil,           /* maxHist */
   442  		-1,            /* increment */
   443  		math.MaxInt64, /* noteworthy */
   444  		st,
   445  	)
   446  	memoryMonitor.Start(ctx, nil, mon.MakeStandaloneBudget(budget))
   447  	return &memoryMonitor
   448  }
   449  
   450  // Assume that join is using a batch of 100 left rows.
   451  const leftRowsBatch = 100
   452  
   453  // repeatAccesses is the number of times on average that each right row is accessed.
   454  func generateLookupJoinAccessPattern(
   455  	rng *rand.Rand, rightRowsReadPerLeftRow int, repeatAccesses int,
   456  ) [][]int {
   457  	// Unique rows accessed.
   458  	numRowsAccessed := (leftRowsBatch * rightRowsReadPerLeftRow) / repeatAccesses
   459  	out := make([][]int, leftRowsBatch)
   460  	for i := 0; i < len(out); i++ {
   461  		// Each left row sees a contiguous sequence of rows on the right since the
   462  		// rows are being retrieved and stored in the container in index order.
   463  		start := rng.Intn(numRowsAccessed - rightRowsReadPerLeftRow)
   464  		out[i] = make([]int, rightRowsReadPerLeftRow)
   465  		for j := start; j < start+rightRowsReadPerLeftRow; j++ {
   466  			out[i][j-start] = j
   467  		}
   468  	}
   469  	return out
   470  }
   471  
   472  // numRightRows is the number of rows in the container, of which a certain
   473  // fraction of rows are accessed randomly (when using an inverted index for
   474  // intersection the result set can be sparse).
   475  // repeatAccesses is the number of times on average that each right row is accessed.
   476  func generateInvertedJoinAccessPattern(
   477  	b *testing.B, rng *rand.Rand, numRightRows int, rightRowsReadPerLeftRow int, repeatAccesses int,
   478  ) [][]int {
   479  	// Unique rows accessed.
   480  	numRowsAccessed := (leftRowsBatch * rightRowsReadPerLeftRow) / repeatAccesses
   481  	// Don't want each left row to access most of the right rows.
   482  	require.True(b, rightRowsReadPerLeftRow < numRowsAccessed/2)
   483  	accessedIndexes := make(map[int]struct{})
   484  	for len(accessedIndexes) < numRowsAccessed {
   485  		accessedIndexes[rng.Intn(numRightRows)] = struct{}{}
   486  	}
   487  	accessedRightRows := make([]int, 0, numRowsAccessed)
   488  	for k := range accessedIndexes {
   489  		accessedRightRows = append(accessedRightRows, k)
   490  	}
   491  	out := make([][]int, leftRowsBatch)
   492  	for i := 0; i < len(out); i++ {
   493  		out[i] = make([]int, 0, rightRowsReadPerLeftRow)
   494  		uniqueRows := make(map[int]struct{})
   495  		for len(uniqueRows) < rightRowsReadPerLeftRow {
   496  			idx := rng.Intn(len(accessedRightRows))
   497  			if _, notUnique := uniqueRows[idx]; notUnique {
   498  				continue
   499  			}
   500  			uniqueRows[idx] = struct{}{}
   501  			out[i] = append(out[i], accessedRightRows[idx])
   502  		}
   503  		// Sort since accesses by a left row are in ascending order.
   504  		sort.Slice(out[i], func(a, b int) bool {
   505  			return out[i][a] < out[i][b]
   506  		})
   507  	}
   508  	return out
   509  }
   510  
   511  func accessPatternForBenchmarkIterations(totalAccesses int, accessPattern [][]int) [][]int {
   512  	var out [][]int
   513  	var i, j int
   514  	for count := 0; count < totalAccesses; {
   515  		if i >= len(accessPattern) {
   516  			i = 0
   517  			continue
   518  		}
   519  		if j >= len(accessPattern[i]) {
   520  			j = 0
   521  			i++
   522  			continue
   523  		}
   524  		if j == 0 {
   525  			out = append(out, []int(nil))
   526  		}
   527  		last := len(out) - 1
   528  		out[last] = append(out[last], accessPattern[i][j])
   529  		count++
   530  		j++
   531  	}
   532  	return out
   533  }
   534  
   535  func BenchmarkNumberedContainerIteratorCaching(b *testing.B) {
   536  	const numRows = 10000
   537  
   538  	ctx := context.Background()
   539  	st := cluster.MakeTestingClusterSettings()
   540  	evalCtx := tree.MakeTestingEvalContext(st)
   541  	tempEngine, _, err := storage.NewTempEngine(ctx, storage.DefaultStorageEngine, base.TempStorageConfig{InMemory: true}, base.DefaultTestStoreSpec)
   542  	if err != nil {
   543  		b.Fatal(err)
   544  	}
   545  	defer tempEngine.Close()
   546  
   547  	diskMonitor := execinfra.NewTestDiskMonitor(ctx, st)
   548  	defer diskMonitor.Stop(ctx)
   549  
   550  	// Each row is 10 string columns. Each string has a mean length of 5, and the
   551  	// row encoded into bytes is ~64 bytes. So we approximate ~512 rows per ssblock.
   552  	// The in-memory decoded footprint in the cache is ~780 bytes.
   553  	var typs []*types.T
   554  	for i := 0; i < 10; i++ {
   555  		typs = append(typs, types.String)
   556  	}
   557  	rng, _ := randutil.NewPseudoRand()
   558  	rows := make([]sqlbase.EncDatumRow, numRows)
   559  	for i := 0; i < numRows; i++ {
   560  		rows[i] = make([]sqlbase.EncDatum, len(typs))
   561  		for j := range typs {
   562  			rows[i][j] = sqlbase.DatumToEncDatum(typs[j], sqlbase.RandDatum(rng, typs[j], false))
   563  		}
   564  	}
   565  
   566  	type accessPattern struct {
   567  		joinType string
   568  		paramStr string
   569  		accesses [][]int
   570  	}
   571  	var accessPatterns []accessPattern
   572  	// Lookup join access patterns. The highest number of unique rows accessed is
   573  	// when rightRowsReadPerLeftRow = 64 and repeatAccesses = 1, which with a left
   574  	// batch of 100 is 100 * 64 / 1 = 6400 rows accessed. The container has
   575  	// 10000 rows. If N unique rows are accessed these form a prefix of the rows
   576  	// in the container.
   577  	for _, rightRowsReadPerLeftRow := range []int{1, 2, 4, 8, 16, 32, 64} {
   578  		for _, repeatAccesses := range []int{1, 2} {
   579  			accessPatterns = append(accessPatterns, accessPattern{
   580  				joinType: "lookup-join",
   581  				paramStr: fmt.Sprintf("matchRatio=%d/repeatAccesses=%d",
   582  					rightRowsReadPerLeftRow, repeatAccesses),
   583  				accesses: generateLookupJoinAccessPattern(rng, rightRowsReadPerLeftRow, repeatAccesses),
   584  			})
   585  		}
   586  	}
   587  	// Inverted join access patterns.
   588  	// With a left batch of 100 rows, and rightRowsReadPerLeftRow = (25, 50, 100), the
   589  	// total accesses are (2500, 5000, 10000). Consider repeatAccesses = 2: the unique
   590  	// rows accessed are (1250, 2500, 5000), which will be randomly distributed over the
   591  	// 10000 rows.
   592  	for _, rightRowsReadPerLeftRow := range []int{1, 25, 50, 100} {
   593  		for _, repeatAccesses := range []int{1, 2, 4, 8} {
   594  			accessPatterns = append(accessPatterns, accessPattern{
   595  				joinType: "inverted-join",
   596  				paramStr: fmt.Sprintf("matchRatio=%d/repeatAccesses=%d",
   597  					rightRowsReadPerLeftRow, repeatAccesses),
   598  				accesses: generateInvertedJoinAccessPattern(
   599  					b, rng, numRows, rightRowsReadPerLeftRow, repeatAccesses),
   600  			})
   601  		}
   602  	}
   603  
   604  	// Observed cache behavior for a particular access pattern for each kind of
   605  	// join, to give some insight into performance.
   606  	// - The inverted join pattern has poor locality and the IndexedRowContainer
   607  	//   does poorly. The NumberedRowContainer is able to use the knowledge that
   608  	//   many rows will never be accessed.
   609  	//                         11000   100KB   500KB   2.5MB
   610  	//   IndexedRowContainer   0.00    0.00    0.00    0.00
   611  	//   NumberedRowContainer  0.22    0.68    0.88    1.00
   612  	// - The lookup join access pattern and observed hit rates. The better
   613  	//   locality improves the behavior of the IndexedRowContainer, but it
   614  	//   is still significantly worse than the NumberedRowContainer.
   615  	//                         11000   100KB   500KB   2.5MB
   616  	//   IndexedRowContainer   0.00    0.00    0.10    0.35
   617  	//   NumberedRowContainer  0.01    0.09    0.28    0.63
   618  
   619  	for _, pattern := range accessPatterns {
   620  		// Approx cache capacity in rows with these settings: 13, 132, 666, 3300.
   621  		for _, memoryBudget := range []int64{11000, 100 << 10, 500 << 10, 2500 << 10} {
   622  			for _, containerKind := range []string{"indexed", "numbered"} {
   623  				b.Run(fmt.Sprintf("%s/%s/mem=%d/%s", pattern.joinType, pattern.paramStr, memoryBudget,
   624  					containerKind), func(b *testing.B) {
   625  					var nc numberedContainer
   626  					switch containerKind {
   627  					case "indexed":
   628  						nc = makeNumberedContainerUsingIRC(
   629  							ctx, b, typs, &evalCtx, tempEngine, st, memoryBudget, diskMonitor)
   630  					case "numbered":
   631  						nc = makeNumberedContainerUsingNRC(
   632  							ctx, b, typs, &evalCtx, tempEngine, st, memoryBudget, diskMonitor)
   633  					}
   634  					defer nc.close(ctx)
   635  					for i := 0; i < len(rows); i++ {
   636  						require.NoError(b, nc.addRow(ctx, rows[i]))
   637  					}
   638  					accesses := accessPatternForBenchmarkIterations(b.N, pattern.accesses)
   639  					b.ResetTimer()
   640  					nc.setupForRead(ctx, accesses)
   641  					for i := 0; i < len(accesses); i++ {
   642  						for j := 0; j < len(accesses[i]); j++ {
   643  							if _, err := nc.getRow(ctx, accesses[i][j], false /* skip */); err != nil {
   644  								b.Fatal(err)
   645  							}
   646  						}
   647  					}
   648  					b.StopTimer()
   649  					// Disabled code block. Change to true to look at hit ratio and cache sizes
   650  					// for these benchmarks.
   651  					if false {
   652  						// Print statements for understanding the performance differences.
   653  						fmt.Printf("\n**%s/%s/%d/%s: iters: %d\n", pattern.joinType, pattern.paramStr, memoryBudget, containerKind, b.N)
   654  						switch rc := nc.(type) {
   655  						case numberedContainerUsingNRC:
   656  							fmt.Printf("hit rate: %.2f, maxCacheSize: %d\n",
   657  								float64(rc.rc.rowIter.hitCount)/float64(rc.rc.rowIter.missCount+rc.rc.rowIter.hitCount),
   658  								rc.rc.rowIter.maxCacheSize)
   659  						case numberedContainerUsingIRC:
   660  							fmt.Printf("hit rate: %.2f, maxCacheSize: %d\n",
   661  								float64(rc.rc.hitCount)/float64(rc.rc.missCount+rc.rc.hitCount),
   662  								rc.rc.maxCacheSize)
   663  						}
   664  					}
   665  				})
   666  			}
   667  		}
   668  	}
   669  }
   670  
   671  // TODO(sumeer):
   672  // - Benchmarks:
   673  //   - de-duping with and without spilling.