github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/sorter_test.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rowexec
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"math"
    17  	"math/rand"
    18  	"testing"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/base"
    21  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/rowcontainer"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    27  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    28  	"github.com/cockroachdb/cockroach/pkg/storage"
    29  	"github.com/cockroachdb/cockroach/pkg/testutils"
    30  	"github.com/cockroachdb/cockroach/pkg/testutils/distsqlutils"
    31  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    32  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    33  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    34  )
    35  
    36  func TestSorter(t *testing.T) {
    37  	defer leaktest.AfterTest(t)()
    38  
    39  	v := [6]sqlbase.EncDatum{}
    40  	for i := range v {
    41  		v[i] = sqlbase.IntEncDatum(i)
    42  	}
    43  
    44  	asc := encoding.Ascending
    45  	desc := encoding.Descending
    46  
    47  	testCases := []struct {
    48  		name     string
    49  		spec     execinfrapb.SorterSpec
    50  		post     execinfrapb.PostProcessSpec
    51  		types    []*types.T
    52  		input    sqlbase.EncDatumRows
    53  		expected sqlbase.EncDatumRows
    54  	}{
    55  		{
    56  			name: "SortAll",
    57  			// No specified input ordering and unspecified limit.
    58  			spec: execinfrapb.SorterSpec{
    59  				OutputOrdering: execinfrapb.ConvertToSpecOrdering(
    60  					sqlbase.ColumnOrdering{
    61  						{ColIdx: 0, Direction: asc},
    62  						{ColIdx: 1, Direction: desc},
    63  						{ColIdx: 2, Direction: asc},
    64  					}),
    65  			},
    66  			types: sqlbase.ThreeIntCols,
    67  			input: sqlbase.EncDatumRows{
    68  				{v[1], v[0], v[4]},
    69  				{v[3], v[4], v[1]},
    70  				{v[4], v[4], v[4]},
    71  				{v[3], v[2], v[0]},
    72  				{v[4], v[4], v[5]},
    73  				{v[3], v[3], v[0]},
    74  				{v[0], v[0], v[0]},
    75  			},
    76  			expected: sqlbase.EncDatumRows{
    77  				{v[0], v[0], v[0]},
    78  				{v[1], v[0], v[4]},
    79  				{v[3], v[4], v[1]},
    80  				{v[3], v[3], v[0]},
    81  				{v[3], v[2], v[0]},
    82  				{v[4], v[4], v[4]},
    83  				{v[4], v[4], v[5]},
    84  			},
    85  		}, {
    86  			name: "SortLimit",
    87  			// No specified input ordering but specified limit.
    88  			spec: execinfrapb.SorterSpec{
    89  				OutputOrdering: execinfrapb.ConvertToSpecOrdering(
    90  					sqlbase.ColumnOrdering{
    91  						{ColIdx: 0, Direction: asc},
    92  						{ColIdx: 1, Direction: asc},
    93  						{ColIdx: 2, Direction: asc},
    94  					}),
    95  			},
    96  			post:  execinfrapb.PostProcessSpec{Limit: 4},
    97  			types: sqlbase.ThreeIntCols,
    98  			input: sqlbase.EncDatumRows{
    99  				{v[3], v[3], v[0]},
   100  				{v[3], v[4], v[1]},
   101  				{v[1], v[0], v[4]},
   102  				{v[0], v[0], v[0]},
   103  				{v[4], v[4], v[4]},
   104  				{v[4], v[4], v[5]},
   105  				{v[3], v[2], v[0]},
   106  			},
   107  			expected: sqlbase.EncDatumRows{
   108  				{v[0], v[0], v[0]},
   109  				{v[1], v[0], v[4]},
   110  				{v[3], v[2], v[0]},
   111  				{v[3], v[3], v[0]},
   112  			},
   113  		}, {
   114  			name: "SortOffset",
   115  			// No specified input ordering but specified offset and limit.
   116  			spec: execinfrapb.SorterSpec{
   117  				OutputOrdering: execinfrapb.ConvertToSpecOrdering(
   118  					sqlbase.ColumnOrdering{
   119  						{ColIdx: 0, Direction: asc},
   120  						{ColIdx: 1, Direction: asc},
   121  						{ColIdx: 2, Direction: asc},
   122  					}),
   123  			},
   124  			post:  execinfrapb.PostProcessSpec{Offset: 2, Limit: 2},
   125  			types: sqlbase.ThreeIntCols,
   126  			input: sqlbase.EncDatumRows{
   127  				{v[3], v[3], v[0]},
   128  				{v[3], v[4], v[1]},
   129  				{v[1], v[0], v[4]},
   130  				{v[0], v[0], v[0]},
   131  				{v[4], v[4], v[4]},
   132  				{v[4], v[4], v[5]},
   133  				{v[3], v[2], v[0]},
   134  			},
   135  			expected: sqlbase.EncDatumRows{
   136  				{v[3], v[2], v[0]},
   137  				{v[3], v[3], v[0]},
   138  			},
   139  		}, {
   140  			name: "SortFilterExpr",
   141  			// No specified input ordering but specified postprocess filter expression.
   142  			spec: execinfrapb.SorterSpec{
   143  				OutputOrdering: execinfrapb.ConvertToSpecOrdering(
   144  					sqlbase.ColumnOrdering{
   145  						{ColIdx: 0, Direction: asc},
   146  						{ColIdx: 1, Direction: asc},
   147  						{ColIdx: 2, Direction: asc},
   148  					}),
   149  			},
   150  			post:  execinfrapb.PostProcessSpec{Filter: execinfrapb.Expression{Expr: "@1 + @2 < 7"}},
   151  			types: sqlbase.ThreeIntCols,
   152  			input: sqlbase.EncDatumRows{
   153  				{v[3], v[3], v[0]},
   154  				{v[3], v[4], v[1]},
   155  				{v[1], v[0], v[4]},
   156  				{v[0], v[0], v[0]},
   157  				{v[4], v[4], v[4]},
   158  				{v[4], v[4], v[5]},
   159  				{v[3], v[2], v[0]},
   160  			},
   161  			expected: sqlbase.EncDatumRows{
   162  				{v[0], v[0], v[0]},
   163  				{v[1], v[0], v[4]},
   164  				{v[3], v[2], v[0]},
   165  				{v[3], v[3], v[0]},
   166  			},
   167  		}, {
   168  			name: "SortMatchOrderingNoLimit",
   169  			// Specified match ordering length but no specified limit.
   170  			spec: execinfrapb.SorterSpec{
   171  				OrderingMatchLen: 2,
   172  				OutputOrdering: execinfrapb.ConvertToSpecOrdering(
   173  					sqlbase.ColumnOrdering{
   174  						{ColIdx: 0, Direction: asc},
   175  						{ColIdx: 1, Direction: asc},
   176  						{ColIdx: 2, Direction: asc},
   177  					}),
   178  			},
   179  			types: sqlbase.ThreeIntCols,
   180  			input: sqlbase.EncDatumRows{
   181  				{v[0], v[1], v[2]},
   182  				{v[0], v[1], v[0]},
   183  				{v[1], v[0], v[5]},
   184  				{v[1], v[1], v[5]},
   185  				{v[1], v[1], v[4]},
   186  				{v[3], v[4], v[3]},
   187  				{v[3], v[4], v[2]},
   188  				{v[3], v[5], v[1]},
   189  				{v[4], v[4], v[5]},
   190  				{v[4], v[4], v[4]},
   191  			},
   192  			expected: sqlbase.EncDatumRows{
   193  				{v[0], v[1], v[0]},
   194  				{v[0], v[1], v[2]},
   195  				{v[1], v[0], v[5]},
   196  				{v[1], v[1], v[4]},
   197  				{v[1], v[1], v[5]},
   198  				{v[3], v[4], v[2]},
   199  				{v[3], v[4], v[3]},
   200  				{v[3], v[5], v[1]},
   201  				{v[4], v[4], v[4]},
   202  				{v[4], v[4], v[5]},
   203  			},
   204  		}, {
   205  			name: "SortInputOrderingNoLimit",
   206  			// Specified input ordering but no specified limit.
   207  			spec: execinfrapb.SorterSpec{
   208  				OrderingMatchLen: 2,
   209  				OutputOrdering: execinfrapb.ConvertToSpecOrdering(
   210  					sqlbase.ColumnOrdering{
   211  						{ColIdx: 1, Direction: asc},
   212  						{ColIdx: 2, Direction: asc},
   213  						{ColIdx: 3, Direction: asc},
   214  					}),
   215  			},
   216  			types: []*types.T{types.Int, types.Int, types.Int, types.Int},
   217  			input: sqlbase.EncDatumRows{
   218  				{v[1], v[1], v[2], v[5]},
   219  				{v[0], v[1], v[2], v[4]},
   220  				{v[0], v[1], v[2], v[3]},
   221  				{v[1], v[1], v[2], v[2]},
   222  				{v[1], v[2], v[2], v[5]},
   223  				{v[0], v[2], v[2], v[4]},
   224  				{v[0], v[2], v[2], v[3]},
   225  				{v[1], v[2], v[2], v[2]},
   226  			},
   227  			expected: sqlbase.EncDatumRows{
   228  				{v[1], v[1], v[2], v[2]},
   229  				{v[0], v[1], v[2], v[3]},
   230  				{v[0], v[1], v[2], v[4]},
   231  				{v[1], v[1], v[2], v[5]},
   232  				{v[1], v[2], v[2], v[2]},
   233  				{v[0], v[2], v[2], v[3]},
   234  				{v[0], v[2], v[2], v[4]},
   235  				{v[1], v[2], v[2], v[5]},
   236  			},
   237  		}, {
   238  			name: "SortInputOrderingAlreadySorted",
   239  			spec: execinfrapb.SorterSpec{
   240  				OrderingMatchLen: 2,
   241  				OutputOrdering: execinfrapb.ConvertToSpecOrdering(
   242  					sqlbase.ColumnOrdering{
   243  						{ColIdx: 1, Direction: asc},
   244  						{ColIdx: 2, Direction: asc},
   245  						{ColIdx: 3, Direction: asc},
   246  					}),
   247  			},
   248  			types: []*types.T{types.Int, types.Int, types.Int, types.Int},
   249  			input: sqlbase.EncDatumRows{
   250  				{v[1], v[1], v[2], v[2]},
   251  				{v[0], v[1], v[2], v[3]},
   252  				{v[0], v[1], v[2], v[4]},
   253  				{v[1], v[1], v[2], v[5]},
   254  				{v[1], v[2], v[2], v[2]},
   255  				{v[0], v[2], v[2], v[3]},
   256  				{v[0], v[2], v[2], v[4]},
   257  				{v[1], v[2], v[2], v[5]},
   258  			},
   259  			expected: sqlbase.EncDatumRows{
   260  				{v[1], v[1], v[2], v[2]},
   261  				{v[0], v[1], v[2], v[3]},
   262  				{v[0], v[1], v[2], v[4]},
   263  				{v[1], v[1], v[2], v[5]},
   264  				{v[1], v[2], v[2], v[2]},
   265  				{v[0], v[2], v[2], v[3]},
   266  				{v[0], v[2], v[2], v[4]},
   267  				{v[1], v[2], v[2], v[5]},
   268  			},
   269  		},
   270  	}
   271  
   272  	// Test with several memory limits:
   273  	memLimits := []struct {
   274  		bytes    int64
   275  		expSpill bool
   276  	}{
   277  		// Use the default limit.
   278  		{bytes: 0, expSpill: false},
   279  		// Immediately switch to disk.
   280  		{bytes: 1, expSpill: true},
   281  		// A memory limit that should not be hit; the processor will
   282  		// not use disk.
   283  		{bytes: 1 << 20, expSpill: false},
   284  	}
   285  
   286  	for _, c := range testCases {
   287  		t.Run(c.name, func(t *testing.T) {
   288  			for _, memLimit := range memLimits {
   289  				// In theory, SortAllProcessor should be able to handle all sorting
   290  				// strategies, as the other processors are optimizations.
   291  				for _, forceSortAll := range []bool{false, true} {
   292  					name := fmt.Sprintf("MemLimit=%d/ForceSort=%t", memLimit.bytes, forceSortAll)
   293  					t.Run(name, func(t *testing.T) {
   294  						ctx := context.Background()
   295  						st := cluster.MakeTestingClusterSettings()
   296  						tempEngine, _, err := storage.NewTempEngine(ctx, storage.DefaultStorageEngine, base.DefaultTestTempStorageConfig(st), base.DefaultTestStoreSpec)
   297  						if err != nil {
   298  							t.Fatal(err)
   299  						}
   300  						defer tempEngine.Close()
   301  
   302  						evalCtx := tree.MakeTestingEvalContext(st)
   303  						defer evalCtx.Stop(ctx)
   304  						diskMonitor := execinfra.NewTestDiskMonitor(ctx, st)
   305  						defer diskMonitor.Stop(ctx)
   306  						flowCtx := execinfra.FlowCtx{
   307  							EvalCtx: &evalCtx,
   308  							Cfg: &execinfra.ServerConfig{
   309  								Settings:    cluster.MakeTestingClusterSettings(),
   310  								TempStorage: tempEngine,
   311  								DiskMonitor: diskMonitor,
   312  							},
   313  						}
   314  						// Override the default memory limit. This will result in using
   315  						// a memory row container which will hit this limit and fall
   316  						// back to using a disk row container.
   317  						flowCtx.Cfg.TestingKnobs.MemoryLimitBytes = memLimit.bytes
   318  
   319  						in := distsqlutils.NewRowBuffer(c.types, c.input, distsqlutils.RowBufferArgs{})
   320  						out := &distsqlutils.RowBuffer{}
   321  
   322  						var s execinfra.Processor
   323  						if !forceSortAll {
   324  							var err error
   325  							s, err = newSorter(context.Background(), &flowCtx, 0 /* processorID */, &c.spec, in, &c.post, out)
   326  							if err != nil {
   327  								t.Fatal(err)
   328  							}
   329  						} else {
   330  							var err error
   331  							s, err = newSortAllProcessor(context.Background(), &flowCtx, 0 /* procedssorID */, &c.spec, in, &c.post, out)
   332  							if err != nil {
   333  								t.Fatal(err)
   334  							}
   335  						}
   336  						s.Run(context.Background())
   337  						if !out.ProducerClosed() {
   338  							t.Fatalf("output RowReceiver not closed")
   339  						}
   340  
   341  						var retRows sqlbase.EncDatumRows
   342  						for {
   343  							row := out.NextNoMeta(t)
   344  							if row == nil {
   345  								break
   346  							}
   347  							retRows = append(retRows, row)
   348  						}
   349  
   350  						expStr := c.expected.String(c.types)
   351  						retStr := retRows.String(c.types)
   352  						if expStr != retStr {
   353  							t.Errorf("invalid results; expected:\n   %s\ngot:\n   %s",
   354  								expStr, retStr)
   355  						}
   356  
   357  						// Check whether the DiskBackedRowContainer spilled to disk.
   358  						spilled := s.(rowsAccessor).getRows().Spilled()
   359  						if memLimit.expSpill != spilled {
   360  							t.Errorf("expected spill to disk=%t, found %t", memLimit.expSpill, spilled)
   361  						}
   362  						if spilled {
   363  							if scp, ok := s.(*sortChunksProcessor); ok {
   364  								if scp.rows.(*rowcontainer.DiskBackedRowContainer).UsingDisk() {
   365  									t.Errorf("expected chunks processor to reset to use memory")
   366  								}
   367  							}
   368  						}
   369  					})
   370  				}
   371  			}
   372  		})
   373  	}
   374  }
   375  
   376  // TestSortInvalidLimit verifies that a top-k sorter will never be created with
   377  // an invalid k-parameter.
   378  func TestSortInvalidLimit(t *testing.T) {
   379  	defer leaktest.AfterTest(t)()
   380  
   381  	spec := execinfrapb.SorterSpec{}
   382  
   383  	t.Run("KTooLarge", func(t *testing.T) {
   384  		post := execinfrapb.PostProcessSpec{}
   385  		post.Limit = math.MaxInt64
   386  		post.Offset = math.MaxInt64 + 1
   387  		// All arguments apart from spec and post are not necessary.
   388  		if _, err := newSorter(
   389  			context.Background(), nil, 0, &spec, nil, &post, nil,
   390  		); !testutils.IsError(err, "too large") {
   391  			t.Fatalf("unexpected error %v, expected k too large", err)
   392  		}
   393  	})
   394  
   395  	t.Run("KZero", func(t *testing.T) {
   396  		var k uint64
   397  		// All arguments apart from spec and post are not necessary.
   398  		if _, err := newSortTopKProcessor(
   399  			nil, 0, &spec, nil, nil, nil, k,
   400  		); !testutils.IsError(err, errSortTopKZeroK.Error()) {
   401  			t.Fatalf("unexpected error %v, expected %v", err, errSortTopKZeroK)
   402  		}
   403  	})
   404  }
   405  
   406  var twoColOrdering = execinfrapb.ConvertToSpecOrdering(sqlbase.ColumnOrdering{
   407  	{ColIdx: 0, Direction: encoding.Ascending},
   408  	{ColIdx: 1, Direction: encoding.Ascending},
   409  })
   410  
   411  // BenchmarkSortAll times how long it takes to sort an input of varying length.
   412  func BenchmarkSortAll(b *testing.B) {
   413  	const numCols = 2
   414  
   415  	ctx := context.Background()
   416  	st := cluster.MakeTestingClusterSettings()
   417  	evalCtx := tree.MakeTestingEvalContext(st)
   418  	defer evalCtx.Stop(ctx)
   419  	diskMonitor := execinfra.NewTestDiskMonitor(ctx, st)
   420  	defer diskMonitor.Stop(ctx)
   421  	flowCtx := execinfra.FlowCtx{
   422  		EvalCtx: &evalCtx,
   423  		Cfg: &execinfra.ServerConfig{
   424  			Settings:    st,
   425  			DiskMonitor: diskMonitor,
   426  		},
   427  	}
   428  
   429  	rng := rand.New(rand.NewSource(timeutil.Now().UnixNano()))
   430  	spec := execinfrapb.SorterSpec{OutputOrdering: twoColOrdering}
   431  	post := execinfrapb.PostProcessSpec{}
   432  
   433  	for _, numRows := range []int{1 << 4, 1 << 8, 1 << 12, 1 << 16} {
   434  		b.Run(fmt.Sprintf("rows=%d", numRows), func(b *testing.B) {
   435  			input := execinfra.NewRepeatableRowSource(sqlbase.TwoIntCols, sqlbase.MakeRandIntRows(rng, numRows, numCols))
   436  			b.SetBytes(int64(numRows * numCols * 8))
   437  			b.ResetTimer()
   438  			for i := 0; i < b.N; i++ {
   439  				s, err := newSorter(
   440  					context.Background(), &flowCtx, 0 /* processorID */, &spec, input, &post, &rowDisposer{},
   441  				)
   442  				if err != nil {
   443  					b.Fatal(err)
   444  				}
   445  				s.Run(context.Background())
   446  				input.Reset()
   447  			}
   448  		})
   449  	}
   450  }
   451  
   452  // BenchmarkSortLimit times how long it takes to sort a fixed size input with
   453  // varying limits.
   454  func BenchmarkSortLimit(b *testing.B) {
   455  	const numCols = 2
   456  
   457  	ctx := context.Background()
   458  	st := cluster.MakeTestingClusterSettings()
   459  	evalCtx := tree.MakeTestingEvalContext(st)
   460  	defer evalCtx.Stop(ctx)
   461  	diskMonitor := execinfra.NewTestDiskMonitor(ctx, st)
   462  	defer diskMonitor.Stop(ctx)
   463  	flowCtx := execinfra.FlowCtx{
   464  		EvalCtx: &evalCtx,
   465  		Cfg: &execinfra.ServerConfig{
   466  			Settings:    st,
   467  			DiskMonitor: diskMonitor,
   468  		},
   469  	}
   470  
   471  	rng := rand.New(rand.NewSource(timeutil.Now().UnixNano()))
   472  	spec := execinfrapb.SorterSpec{OutputOrdering: twoColOrdering}
   473  
   474  	const numRows = 1 << 16
   475  	b.Run(fmt.Sprintf("rows=%d", numRows), func(b *testing.B) {
   476  		input := execinfra.NewRepeatableRowSource(sqlbase.TwoIntCols, sqlbase.MakeRandIntRows(rng, numRows, numCols))
   477  		for _, limit := range []uint64{1 << 4, 1 << 8, 1 << 12, 1 << 16} {
   478  			post := execinfrapb.PostProcessSpec{Limit: limit}
   479  			b.Run(fmt.Sprintf("Limit=%d", limit), func(b *testing.B) {
   480  				b.SetBytes(int64(numRows * numCols * 8))
   481  				b.ResetTimer()
   482  				for i := 0; i < b.N; i++ {
   483  					s, err := newSorter(
   484  						context.Background(), &flowCtx, 0 /* processorID */, &spec, input, &post, &rowDisposer{},
   485  					)
   486  					if err != nil {
   487  						b.Fatal(err)
   488  					}
   489  					s.Run(context.Background())
   490  					input.Reset()
   491  				}
   492  			})
   493  
   494  		}
   495  	})
   496  }
   497  
   498  // BenchmarkSortChunks times how long it takes to sort an input which is already
   499  // sorted on a prefix.
   500  func BenchmarkSortChunks(b *testing.B) {
   501  	const numCols = 2
   502  
   503  	ctx := context.Background()
   504  	st := cluster.MakeTestingClusterSettings()
   505  	evalCtx := tree.MakeTestingEvalContext(st)
   506  	defer evalCtx.Stop(ctx)
   507  	diskMonitor := execinfra.NewTestDiskMonitor(ctx, st)
   508  	defer diskMonitor.Stop(ctx)
   509  	flowCtx := execinfra.FlowCtx{
   510  		EvalCtx: &evalCtx,
   511  		Cfg: &execinfra.ServerConfig{
   512  			Settings:    st,
   513  			DiskMonitor: diskMonitor,
   514  		},
   515  	}
   516  
   517  	rng := rand.New(rand.NewSource(timeutil.Now().UnixNano()))
   518  	spec := execinfrapb.SorterSpec{
   519  		OutputOrdering:   twoColOrdering,
   520  		OrderingMatchLen: 1,
   521  	}
   522  	post := execinfrapb.PostProcessSpec{}
   523  
   524  	for _, numRows := range []int{1 << 4, 1 << 8, 1 << 12, 1 << 16} {
   525  		for chunkSize := 1; chunkSize <= numRows; chunkSize *= 4 {
   526  			b.Run(fmt.Sprintf("rows=%d,ChunkSize=%d", numRows, chunkSize), func(b *testing.B) {
   527  				rows := sqlbase.MakeRandIntRows(rng, numRows, numCols)
   528  				for i, row := range rows {
   529  					row[0] = sqlbase.IntEncDatum(i / chunkSize)
   530  				}
   531  				input := execinfra.NewRepeatableRowSource(sqlbase.TwoIntCols, rows)
   532  				b.SetBytes(int64(numRows * numCols * 8))
   533  				b.ResetTimer()
   534  				for i := 0; i < b.N; i++ {
   535  					s, err := newSorter(context.Background(), &flowCtx, 0 /* processorID */, &spec, input, &post, &rowDisposer{})
   536  					if err != nil {
   537  						b.Fatal(err)
   538  					}
   539  					s.Run(context.Background())
   540  					input.Reset()
   541  				}
   542  			})
   543  		}
   544  	}
   545  }