github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/sort_test.go

github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/sort_test.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package colexec
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"math"
    17  	"math/rand"
    18  	"sort"
    19  	"testing"
    20  
    21  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    26  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    27  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    28  )
    29  
    30  var sortAllTestCases []sortTestCase
    31  
    32  func init() {
    33  	sortAllTestCases = []sortTestCase{
    34  		{
    35  			tuples:   tuples{{1}, {2}, {nil}, {4}, {5}, {nil}},
    36  			expected: tuples{{nil}, {nil}, {1}, {2}, {4}, {5}},
    37  			typs:     []*types.T{types.Int},
    38  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 0}},
    39  		},
    40  		{
    41  			tuples:   tuples{{1, 2}, {1, 1}, {1, nil}, {2, nil}, {2, 3}, {2, nil}, {5, 1}},
    42  			expected: tuples{{1, nil}, {1, 1}, {1, 2}, {2, nil}, {2, nil}, {2, 3}, {5, 1}},
    43  			typs:     []*types.T{types.Int, types.Int},
    44  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}},
    45  		},
    46  		{
    47  			tuples:   tuples{{1, 2}, {1, 1}, {1, nil}, {2, nil}, {2, 3}, {2, nil}, {5, 1}},
    48  			expected: tuples{{5, 1}, {2, 3}, {2, nil}, {2, nil}, {1, 2}, {1, 1}, {1, nil}},
    49  			typs:     []*types.T{types.Int, types.Int},
    50  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 0, Direction: execinfrapb.Ordering_Column_DESC}, {ColIdx: 1, Direction: execinfrapb.Ordering_Column_DESC}},
    51  		},
    52  		{
    53  			tuples:   tuples{{nil, nil}, {nil, 3}, {1, nil}, {nil, 1}, {1, 2}, {nil, nil}, {5, nil}},
    54  			expected: tuples{{nil, nil}, {nil, nil}, {nil, 1}, {nil, 3}, {1, nil}, {1, 2}, {5, nil}},
    55  			typs:     []*types.T{types.Int, types.Int},
    56  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}},
    57  		},
    58  		{
    59  			tuples:   tuples{{1}, {2}, {3}, {4}, {5}, {6}, {7}},
    60  			expected: tuples{{1}, {2}, {3}, {4}, {5}, {6}, {7}},
    61  			typs:     []*types.T{types.Int},
    62  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 0}},
    63  		},
    64  		{
    65  			tuples:   tuples{{1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}},
    66  			expected: tuples{{1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}},
    67  			typs:     []*types.T{types.Int},
    68  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 0}},
    69  		},
    70  		{
    71  			tuples:   tuples{{1, 1}, {3, 2}, {2, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}},
    72  			expected: tuples{{1, 1}, {2, 3}, {3, 2}, {4, 4}, {5, 5}, {6, 6}, {7, 7}},
    73  			typs:     []*types.T{types.Int, types.Int},
    74  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 0}},
    75  		},
    76  		{
    77  			tuples:   tuples{{1, 1}, {5, 2}, {3, 3}, {7, 4}, {2, 5}, {6, 6}, {4, 7}},
    78  			expected: tuples{{1, 1}, {2, 5}, {3, 3}, {4, 7}, {5, 2}, {6, 6}, {7, 4}},
    79  			typs:     []*types.T{types.Int, types.Int},
    80  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 0}},
    81  		},
    82  		{
    83  			tuples:   tuples{{1}, {5}, {3}, {3}, {2}, {6}, {4}},
    84  			expected: tuples{{1}, {2}, {3}, {3}, {4}, {5}, {6}},
    85  			typs:     []*types.T{types.Int},
    86  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 0}},
    87  		},
    88  		{
    89  			tuples:   tuples{{false}, {true}},
    90  			expected: tuples{{false}, {true}},
    91  			typs:     []*types.T{types.Bool},
    92  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 0}},
    93  		},
    94  		{
    95  			tuples:   tuples{{true}, {false}},
    96  			expected: tuples{{false}, {true}},
    97  			typs:     []*types.T{types.Bool},
    98  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 0}},
    99  		},
   100  		{
   101  			tuples:   tuples{{3.2}, {2.0}, {2.4}, {math.NaN()}, {math.Inf(-1)}, {math.Inf(1)}},
   102  			expected: tuples{{math.NaN()}, {math.Inf(-1)}, {2.0}, {2.4}, {3.2}, {math.Inf(1)}},
   103  			typs:     []*types.T{types.Float},
   104  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 0}},
   105  		},
   106  
   107  		{
   108  			tuples:   tuples{{0, 1, 0}, {1, 2, 0}, {2, 3, 2}, {3, 7, 1}, {4, 2, 2}},
   109  			expected: tuples{{0, 1, 0}, {1, 2, 0}, {3, 7, 1}, {4, 2, 2}, {2, 3, 2}},
   110  			typs:     []*types.T{types.Int, types.Int, types.Int},
   111  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 2}, {ColIdx: 1}},
   112  		},
   113  
   114  		{
   115  			// ensure that sort partitions stack: make sure that a run of identical
   116  			// values in a later column doesn't get sorted if the run is broken up
   117  			// by previous columns.
   118  			tuples: tuples{
   119  				{0, 1, 0},
   120  				{0, 1, 0},
   121  				{0, 1, 1},
   122  				{0, 0, 1},
   123  				{0, 0, 0},
   124  			},
   125  			expected: tuples{
   126  				{0, 0, 0},
   127  				{0, 0, 1},
   128  				{0, 1, 0},
   129  				{0, 1, 0},
   130  				{0, 1, 1},
   131  			},
   132  			typs:    []*types.T{types.Int, types.Int, types.Int},
   133  			ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}, {ColIdx: 2}},
   134  		},
   135  	}
   136  }
   137  
   138  func TestSort(t *testing.T) {
   139  	defer leaktest.AfterTest(t)()
   140  	for _, tc := range sortAllTestCases {
   141  		runTestsWithTyps(t, []tuples{tc.tuples}, [][]*types.T{tc.typs}, tc.expected, orderedVerifier,
   142  			func(input []colexecbase.Operator) (colexecbase.Operator, error) {
   143  				return NewSorter(testAllocator, input[0], tc.typs, tc.ordCols)
   144  			})
   145  	}
   146  }
   147  
   148  func TestSortRandomized(t *testing.T) {
   149  	defer leaktest.AfterTest(t)()
   150  	rng, _ := randutil.NewPseudoRand()
   151  	nTups := coldata.BatchSize()*2 + 1
   152  	maxCols := 3
   153  	// TODO(yuzefovich): randomize types as well.
   154  	typs := make([]*types.T, maxCols)
   155  	for i := range typs {
   156  		typs[i] = types.Int
   157  	}
   158  	for nCols := 1; nCols < maxCols; nCols++ {
   159  		for nOrderingCols := 1; nOrderingCols <= nCols; nOrderingCols++ {
   160  			for _, k := range []int{0, rng.Intn(nTups) + 1} {
   161  				topK := k != 0
   162  				name := fmt.Sprintf("nCols=%d/nOrderingCols=%d/topK=%t", nCols, nOrderingCols, topK)
   163  				t.Run(name, func(t *testing.T) {
   164  					tups, expected, ordCols := generateRandomDataForTestSort(rng, nTups, nCols, nOrderingCols)
   165  					if topK {
   166  						expected = expected[:k]
   167  					}
   168  					runTests(t, []tuples{tups}, expected, orderedVerifier, func(input []colexecbase.Operator) (colexecbase.Operator, error) {
   169  						if topK {
   170  							return NewTopKSorter(testAllocator, input[0], typs[:nCols], ordCols, k), nil
   171  						}
   172  						return NewSorter(testAllocator, input[0], typs[:nCols], ordCols)
   173  					})
   174  				})
   175  			}
   176  		}
   177  	}
   178  }
   179  
   180  // generateRandomDataForTestSort is a utility function that generates data to
   181  // be used in randomized unit test of a sort operation. It returns:
   182  // - tups - the data to be sorted
   183  // - expected - the same data but already sorted
   184  // - ordCols - ordering columns used in the sort operation.
   185  func generateRandomDataForTestSort(
   186  	rng *rand.Rand, nTups, nCols, nOrderingCols int,
   187  ) (tups, expected tuples, ordCols []execinfrapb.Ordering_Column) {
   188  	ordCols = generateColumnOrdering(rng, nCols, nOrderingCols)
   189  	tups = make(tuples, nTups)
   190  	for i := range tups {
   191  		tups[i] = make(tuple, nCols)
   192  		for j := range tups[i] {
   193  			// Small range so we can test partitioning
   194  			if rng.Float64() < nullProbability {
   195  				tups[i][j] = nil
   196  			} else {
   197  				tups[i][j] = rng.Int63() % 2048
   198  			}
   199  		}
   200  		// Enforce that the last ordering column is always unique. Otherwise
   201  		// there would be multiple valid sort orders.
   202  		tups[i][ordCols[nOrderingCols-1].ColIdx] = int64(i)
   203  	}
   204  
   205  	expected = make(tuples, nTups)
   206  	copy(expected, tups)
   207  	sort.Slice(expected, less(expected, ordCols))
   208  	return tups, expected, ordCols
   209  }
   210  
   211  func TestAllSpooler(t *testing.T) {
   212  	defer leaktest.AfterTest(t)()
   213  
   214  	tcs := []struct {
   215  		tuples tuples
   216  		typ    []*types.T
   217  	}{
   218  		{
   219  			tuples: tuples{{1}, {2}, {3}, {4}, {5}, {6}, {7}},
   220  			typ:    []*types.T{types.Int},
   221  		},
   222  		{
   223  			tuples: tuples{{1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}},
   224  			typ:    []*types.T{types.Int},
   225  		},
   226  		{
   227  			tuples: tuples{{1, 1}, {3, 2}, {2, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}},
   228  			typ:    []*types.T{types.Int, types.Int},
   229  		},
   230  		{
   231  			tuples: tuples{{1, 1}, {5, 2}, {3, 3}, {7, 4}, {2, 5}, {6, 6}, {4, 7}},
   232  			typ:    []*types.T{types.Int, types.Int},
   233  		},
   234  		{
   235  			tuples: tuples{{1}, {5}, {3}, {3}, {2}, {6}, {4}},
   236  			typ:    []*types.T{types.Int},
   237  		},
   238  		{
   239  			tuples: tuples{{0, 1, 0}, {1, 2, 0}, {2, 3, 2}, {3, 7, 1}, {4, 2, 2}},
   240  			typ:    []*types.T{types.Int, types.Int, types.Int},
   241  		},
   242  		{
   243  			tuples: tuples{
   244  				{0, 1, 0},
   245  				{0, 1, 0},
   246  				{0, 1, 1},
   247  				{0, 0, 1},
   248  				{0, 0, 0},
   249  			},
   250  			typ: []*types.T{types.Int, types.Int, types.Int},
   251  		},
   252  	}
   253  	for _, tc := range tcs {
   254  		runTestsWithFn(t, []tuples{tc.tuples}, nil /* typs */, func(t *testing.T, input []colexecbase.Operator) {
   255  			allSpooler := newAllSpooler(testAllocator, input[0], tc.typ)
   256  			allSpooler.init()
   257  			allSpooler.spool(context.Background())
   258  			if len(tc.tuples) != allSpooler.getNumTuples() {
   259  				t.Fatal(fmt.Sprintf("allSpooler spooled wrong number of tuples: expected %d, but received %d", len(tc.tuples), allSpooler.getNumTuples()))
   260  			}
   261  			if allSpooler.getPartitionsCol() != nil {
   262  				t.Fatal("allSpooler returned non-nil partitionsCol")
   263  			}
   264  			for col := 0; col < len(tc.typ); col++ {
   265  				colVec := allSpooler.getValues(col).Int64()
   266  				for i := 0; i < allSpooler.getNumTuples(); i++ {
   267  					if colVec[i] != int64(tc.tuples[i][col].(int)) {
   268  						t.Fatal(fmt.Sprintf("allSpooler returned wrong value in %d column of %d'th tuple : expected %v, but received %v",
   269  							col, i, tc.tuples[i][col].(int), colVec[i]))
   270  					}
   271  				}
   272  			}
   273  		})
   274  	}
   275  }
   276  
   277  func BenchmarkSort(b *testing.B) {
   278  	rng, _ := randutil.NewPseudoRand()
   279  	ctx := context.Background()
   280  	k := 128
   281  
   282  	for _, nBatches := range []int{1 << 1, 1 << 4, 1 << 8} {
   283  		for _, nCols := range []int{1, 2, 4} {
   284  			for _, topK := range []bool{false, true} {
   285  				name := fmt.Sprintf("rows=%d/cols=%d/topK=%t", nBatches*coldata.BatchSize(), nCols, topK)
   286  				b.Run(name, func(b *testing.B) {
   287  					// 8 (bytes / int64) * nBatches (number of batches) * coldata.BatchSize() (rows /
   288  					// batch) * nCols (number of columns / row).
   289  					b.SetBytes(int64(8 * nBatches * coldata.BatchSize() * nCols))
   290  					typs := make([]*types.T, nCols)
   291  					for i := range typs {
   292  						typs[i] = types.Int
   293  					}
   294  					batch := testAllocator.NewMemBatch(typs)
   295  					batch.SetLength(coldata.BatchSize())
   296  					ordCols := make([]execinfrapb.Ordering_Column, nCols)
   297  					for i := range ordCols {
   298  						ordCols[i].ColIdx = uint32(i)
   299  						ordCols[i].Direction = execinfrapb.Ordering_Column_Direction(rng.Int() % 2)
   300  
   301  						col := batch.ColVec(i).Int64()
   302  						for j := 0; j < coldata.BatchSize(); j++ {
   303  							col[j] = rng.Int63() % int64((i*1024)+1)
   304  						}
   305  					}
   306  					b.ResetTimer()
   307  					for n := 0; n < b.N; n++ {
   308  						source := newFiniteBatchSource(batch, typs, nBatches)
   309  						var sorter colexecbase.Operator
   310  						if topK {
   311  							sorter = NewTopKSorter(testAllocator, source, typs, ordCols, k)
   312  						} else {
   313  							var err error
   314  							sorter, err = NewSorter(testAllocator, source, typs, ordCols)
   315  							if err != nil {
   316  								b.Fatal(err)
   317  							}
   318  						}
   319  						sorter.Init()
   320  						for out := sorter.Next(ctx); out.Length() != 0; out = sorter.Next(ctx) {
   321  						}
   322  					}
   323  				})
   324  			}
   325  		}
   326  	}
   327  }
   328  
   329  func BenchmarkAllSpooler(b *testing.B) {
   330  	rng, _ := randutil.NewPseudoRand()
   331  	ctx := context.Background()
   332  
   333  	for _, nBatches := range []int{1 << 1, 1 << 4, 1 << 8} {
   334  		for _, nCols := range []int{1, 2, 4} {
   335  			b.Run(fmt.Sprintf("rows=%d/cols=%d", nBatches*coldata.BatchSize(), nCols), func(b *testing.B) {
   336  				// 8 (bytes / int64) * nBatches (number of batches) * col.BatchSize() (rows /
   337  				// batch) * nCols (number of columns / row).
   338  				b.SetBytes(int64(8 * nBatches * coldata.BatchSize() * nCols))
   339  				typs := make([]*types.T, nCols)
   340  				for i := range typs {
   341  					typs[i] = types.Int
   342  				}
   343  				batch := testAllocator.NewMemBatch(typs)
   344  				batch.SetLength(coldata.BatchSize())
   345  				for i := 0; i < nCols; i++ {
   346  					col := batch.ColVec(i).Int64()
   347  					for j := 0; j < coldata.BatchSize(); j++ {
   348  						col[j] = rng.Int63() % int64((i*1024)+1)
   349  					}
   350  				}
   351  				b.ResetTimer()
   352  				for n := 0; n < b.N; n++ {
   353  					source := newFiniteBatchSource(batch, typs, nBatches)
   354  					allSpooler := newAllSpooler(testAllocator, source, typs)
   355  					allSpooler.init()
   356  					allSpooler.spool(ctx)
   357  				}
   358  			})
   359  		}
   360  	}
   361  }
   362  
   363  func less(tuples tuples, ordCols []execinfrapb.Ordering_Column) func(i, j int) bool {
   364  	return func(i, j int) bool {
   365  		for _, col := range ordCols {
   366  			n1 := tuples[i][col.ColIdx] == nil
   367  			n2 := tuples[j][col.ColIdx] == nil
   368  			if col.Direction == execinfrapb.Ordering_Column_ASC {
   369  				if n1 && n2 {
   370  					continue
   371  				} else if n1 {
   372  					return true
   373  				} else if n2 {
   374  					return false
   375  				}
   376  			} else {
   377  				if n1 && n2 {
   378  					continue
   379  				} else if n1 {
   380  					return false
   381  				} else if n2 {
   382  					return true
   383  				}
   384  			}
   385  			if tuples[i][col.ColIdx].(int64) < tuples[j][col.ColIdx].(int64) {
   386  				return col.Direction == execinfrapb.Ordering_Column_ASC
   387  			} else if tuples[i][col.ColIdx].(int64) > tuples[j][col.ColIdx].(int64) {
   388  				return col.Direction == execinfrapb.Ordering_Column_DESC
   389  			}
   390  		}
   391  		return false
   392  	}
   393  }
   394  
   395  // generateColumnOrdering produces a random ordering of nOrderingCols columns
   396  // on a table with nCols columns, so nOrderingCols must be not greater than
   397  // nCols.
   398  func generateColumnOrdering(
   399  	rng *rand.Rand, nCols int, nOrderingCols int,
   400  ) []execinfrapb.Ordering_Column {
   401  	if nOrderingCols > nCols {
   402  		colexecerror.InternalError("nOrderingCols > nCols in generateColumnOrdering")
   403  	}
   404  	orderingCols := make([]execinfrapb.Ordering_Column, nOrderingCols)
   405  	for i, col := range rng.Perm(nCols)[:nOrderingCols] {
   406  		orderingCols[i] = execinfrapb.Ordering_Column{ColIdx: uint32(col), Direction: execinfrapb.Ordering_Column_Direction(rng.Intn(2))}
   407  	}
   408  	return orderingCols
   409  }