github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/sort_chunks_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package colexec
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"sort"
    17  	"testing"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/colmem"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    24  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    25  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    26  )
    27  
    28  var sortChunksTestCases []sortTestCase
    29  
    30  func init() {
    31  	sortChunksTestCases = []sortTestCase{
    32  		{
    33  			description: `three chunks`,
    34  			tuples:      tuples{{1, 2}, {1, 2}, {1, 3}, {1, 1}, {5, 5}, {6, 6}, {6, 1}},
    35  			expected:    tuples{{1, 1}, {1, 2}, {1, 2}, {1, 3}, {5, 5}, {6, 1}, {6, 6}},
    36  			typs:        []*types.T{types.Int, types.Int},
    37  			ordCols:     []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}},
    38  			matchLen:    1,
    39  		},
    40  		{
    41  			description: `simple nulls asc`,
    42  			tuples:      tuples{{1, 2}, {1, nil}, {1, 3}, {1, 1}, {5, 5}, {6, 6}, {6, nil}},
    43  			expected:    tuples{{1, nil}, {1, 1}, {1, 2}, {1, 3}, {5, 5}, {6, nil}, {6, 6}},
    44  			typs:        []*types.T{types.Int, types.Int},
    45  			ordCols:     []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}},
    46  			matchLen:    1,
    47  		},
    48  		{
    49  			description: `simple nulls desc`,
    50  			tuples:      tuples{{1, 2}, {1, nil}, {1, 3}, {1, 1}, {5, 5}, {6, 6}, {6, nil}},
    51  			expected:    tuples{{1, 3}, {1, 2}, {1, 1}, {1, nil}, {5, 5}, {6, 6}, {6, nil}},
    52  			typs:        []*types.T{types.Int, types.Int},
    53  			ordCols:     []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1, Direction: execinfrapb.Ordering_Column_DESC}},
    54  			matchLen:    1,
    55  		},
    56  		{
    57  			description: `one chunk, matchLen 1, three ordering columns`,
    58  			tuples: tuples{
    59  				{0, 1, 2},
    60  				{0, 2, 0},
    61  				{0, 1, 0},
    62  				{0, 1, 1},
    63  				{0, 2, 1},
    64  			},
    65  			expected: tuples{
    66  				{0, 1, 0},
    67  				{0, 1, 1},
    68  				{0, 1, 2},
    69  				{0, 2, 0},
    70  				{0, 2, 1},
    71  			},
    72  			typs:     []*types.T{types.Int, types.Int, types.Int},
    73  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}, {ColIdx: 2}},
    74  			matchLen: 1,
    75  		},
    76  		{
    77  			description: `two chunks, matchLen 1, three ordering columns`,
    78  			tuples: tuples{
    79  				{0, 1, 2},
    80  				{0, 2, 0},
    81  				{0, 1, 0},
    82  				{1, 2, 1},
    83  				{1, 1, 1},
    84  			},
    85  			expected: tuples{
    86  				{0, 1, 0},
    87  				{0, 1, 2},
    88  				{0, 2, 0},
    89  				{1, 1, 1},
    90  				{1, 2, 1},
    91  			},
    92  			typs:     []*types.T{types.Int, types.Int, types.Int},
    93  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}, {ColIdx: 2}},
    94  			matchLen: 1,
    95  		},
    96  		{
    97  			description: `two chunks, matchLen 2, three ordering columns`,
    98  			tuples: tuples{
    99  				{0, 1, 2},
   100  				{0, 1, 0},
   101  				{0, 1, 1},
   102  				{0, 2, 1},
   103  				{0, 2, 0},
   104  			},
   105  			expected: tuples{
   106  				{0, 1, 0},
   107  				{0, 1, 1},
   108  				{0, 1, 2},
   109  				{0, 2, 0},
   110  				{0, 2, 1},
   111  			},
   112  			typs:     []*types.T{types.Int, types.Int, types.Int},
   113  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}, {ColIdx: 2}},
   114  			matchLen: 2,
   115  		},
   116  		{
   117  			description: `four chunks, matchLen 2, three ordering columns`,
   118  			tuples: tuples{
   119  				{0, 1, 2},
   120  				{0, 1, 0},
   121  				{0, 2, 0},
   122  				{1, 1, 1},
   123  				{1, 2, 1},
   124  			},
   125  			expected: tuples{
   126  				{0, 1, 0},
   127  				{0, 1, 2},
   128  				{0, 2, 0},
   129  				{1, 1, 1},
   130  				{1, 2, 1},
   131  			},
   132  			typs:     []*types.T{types.Int, types.Int, types.Int},
   133  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}, {ColIdx: 2}},
   134  			matchLen: 2,
   135  		},
   136  		{
   137  			description: `three chunks, matchLen 1, three ordering columns (reordered)`,
   138  			tuples: tuples{
   139  				{0, 2, 0},
   140  				{0, 1, 0},
   141  				{1, 1, 1},
   142  				{0, 1, 1},
   143  				{0, 1, 2},
   144  			},
   145  			expected: tuples{
   146  				{0, 1, 0},
   147  				{0, 2, 0},
   148  				{0, 1, 1},
   149  				{1, 1, 1},
   150  				{0, 1, 2},
   151  			},
   152  			typs:     []*types.T{types.Int, types.Int, types.Int},
   153  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 2}, {ColIdx: 1}, {ColIdx: 0}},
   154  			matchLen: 1,
   155  		},
   156  		{
   157  			description: `four chunks, matchLen 2, three ordering columns (reordered)`,
   158  			tuples: tuples{
   159  				{0, 2, 0},
   160  				{0, 1, 0},
   161  				{1, 1, 1},
   162  				{1, 2, 1},
   163  				{0, 1, 2},
   164  				{1, 2, 2},
   165  				{1, 1, 2},
   166  			},
   167  			expected: tuples{
   168  				{0, 1, 0},
   169  				{0, 2, 0},
   170  				{1, 1, 1},
   171  				{1, 2, 1},
   172  				{0, 1, 2},
   173  				{1, 1, 2},
   174  				{1, 2, 2},
   175  			},
   176  			typs:     []*types.T{types.Int, types.Int, types.Int},
   177  			ordCols:  []execinfrapb.Ordering_Column{{ColIdx: 2}, {ColIdx: 0}, {ColIdx: 1}},
   178  			matchLen: 2,
   179  		},
   180  	}
   181  }
   182  
   183  func TestSortChunks(t *testing.T) {
   184  	defer leaktest.AfterTest(t)()
   185  
   186  	for _, tc := range sortChunksTestCases {
   187  		runTests(t, []tuples{tc.tuples}, tc.expected, orderedVerifier, func(input []colexecbase.Operator) (colexecbase.Operator, error) {
   188  			return NewSortChunks(testAllocator, input[0], tc.typs, tc.ordCols, tc.matchLen)
   189  		})
   190  	}
   191  }
   192  
   193  func TestSortChunksRandomized(t *testing.T) {
   194  	defer leaktest.AfterTest(t)()
   195  	rng, _ := randutil.NewPseudoRand()
   196  	nTups := 8
   197  	maxCols := 5
   198  	// TODO(yuzefovich): randomize types as well.
   199  	typs := make([]*types.T, maxCols)
   200  	for i := range typs {
   201  		typs[i] = types.Int
   202  	}
   203  
   204  	for nCols := 1; nCols < maxCols; nCols++ {
   205  		for nOrderingCols := 1; nOrderingCols <= nCols; nOrderingCols++ {
   206  			for matchLen := 1; matchLen < nOrderingCols; matchLen++ {
   207  				ordCols := generateColumnOrdering(rng, nCols, nOrderingCols)
   208  				tups := make(tuples, nTups)
   209  				for i := range tups {
   210  					tups[i] = make(tuple, nCols)
   211  					for j := range tups[i] {
   212  						// Small range so we can test partitioning.
   213  						tups[i][j] = rng.Int63() % 2048
   214  					}
   215  				}
   216  
   217  				// Sort tups on the first matchLen columns as needed for sort chunks
   218  				// operator.
   219  				sortedTups := make(tuples, nTups)
   220  				copy(sortedTups, tups)
   221  				sort.Slice(sortedTups, less(sortedTups, ordCols[:matchLen]))
   222  
   223  				// Sort tups on all ordering columns to get the expected results.
   224  				expected := make(tuples, nTups)
   225  				copy(expected, tups)
   226  				sort.Slice(expected, less(expected, ordCols))
   227  
   228  				runTests(t, []tuples{sortedTups}, expected, orderedVerifier, func(input []colexecbase.Operator) (colexecbase.Operator, error) {
   229  					return NewSortChunks(testAllocator, input[0], typs[:nCols], ordCols, matchLen)
   230  				})
   231  			}
   232  		}
   233  	}
   234  }
   235  
   236  func BenchmarkSortChunks(b *testing.B) {
   237  	rng, _ := randutil.NewPseudoRand()
   238  	ctx := context.Background()
   239  
   240  	sorterConstructors := []func(*colmem.Allocator, colexecbase.Operator, []*types.T, []execinfrapb.Ordering_Column, int) (colexecbase.Operator, error){
   241  		NewSortChunks,
   242  		func(allocator *colmem.Allocator, input colexecbase.Operator, inputTypes []*types.T, orderingCols []execinfrapb.Ordering_Column, _ int) (colexecbase.Operator, error) {
   243  			return NewSorter(allocator, input, inputTypes, orderingCols)
   244  		},
   245  	}
   246  	sorterNames := []string{"CHUNKS", "ALL"}
   247  	for _, nBatches := range []int{1 << 2, 1 << 6} {
   248  		for _, nCols := range []int{2, 4} {
   249  			for _, matchLen := range []int{1, 2, 3} {
   250  				for _, avgChunkSize := range []int{1 << 3, 1 << 7} {
   251  					for sorterIdx, sorterConstructor := range sorterConstructors {
   252  						if matchLen >= nCols {
   253  							continue
   254  						}
   255  						b.Run(
   256  							fmt.Sprintf("%s/rows=%d/cols=%d/matchLen=%d/avgChunkSize=%d",
   257  								sorterNames[sorterIdx], nBatches*coldata.BatchSize(), nCols, matchLen, avgChunkSize),
   258  							func(b *testing.B) {
   259  								// 8 (bytes / int64) * nBatches (number of batches) * coldata.BatchSize() (rows /
   260  								// batch) * nCols (number of columns / row).
   261  								b.SetBytes(int64(8 * nBatches * coldata.BatchSize() * nCols))
   262  								typs := make([]*types.T, nCols)
   263  								for i := range typs {
   264  									typs[i] = types.Int
   265  								}
   266  								batch := testAllocator.NewMemBatch(typs)
   267  								batch.SetLength(coldata.BatchSize())
   268  								ordCols := make([]execinfrapb.Ordering_Column, nCols)
   269  								for i := range ordCols {
   270  									ordCols[i].ColIdx = uint32(i)
   271  									if i < matchLen {
   272  										ordCols[i].Direction = execinfrapb.Ordering_Column_ASC
   273  									} else {
   274  										ordCols[i].Direction = execinfrapb.Ordering_Column_Direction(rng.Int() % 2)
   275  									}
   276  
   277  									col := batch.ColVec(i).Int64()
   278  									col[0] = 0
   279  									for j := 1; j < coldata.BatchSize(); j++ {
   280  										if i < matchLen {
   281  											col[j] = col[j-1]
   282  											if rng.Float64() < 1.0/float64(avgChunkSize) {
   283  												col[j]++
   284  											}
   285  										} else {
   286  											col[j] = rng.Int63() % int64((i*1024)+1)
   287  										}
   288  									}
   289  								}
   290  								b.ResetTimer()
   291  								for n := 0; n < b.N; n++ {
   292  									source := newFiniteChunksSource(batch, typs, nBatches, matchLen)
   293  									sorter, err := sorterConstructor(testAllocator, source, typs, ordCols, matchLen)
   294  									if err != nil {
   295  										b.Fatal(err)
   296  									}
   297  
   298  									sorter.Init()
   299  									for out := sorter.Next(ctx); out.Length() != 0; out = sorter.Next(ctx) {
   300  									}
   301  								}
   302  								b.StopTimer()
   303  							})
   304  					}
   305  				}
   306  			}
   307  		}
   308  	}
   309  }