github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/sort_chunks_test.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package colexec 12 13 import ( 14 "context" 15 "fmt" 16 "sort" 17 "testing" 18 19 "github.com/cockroachdb/cockroach/pkg/col/coldata" 20 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase" 21 "github.com/cockroachdb/cockroach/pkg/sql/colmem" 22 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 23 "github.com/cockroachdb/cockroach/pkg/sql/types" 24 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 25 "github.com/cockroachdb/cockroach/pkg/util/randutil" 26 ) 27 28 var sortChunksTestCases []sortTestCase 29 30 func init() { 31 sortChunksTestCases = []sortTestCase{ 32 { 33 description: `three chunks`, 34 tuples: tuples{{1, 2}, {1, 2}, {1, 3}, {1, 1}, {5, 5}, {6, 6}, {6, 1}}, 35 expected: tuples{{1, 1}, {1, 2}, {1, 2}, {1, 3}, {5, 5}, {6, 1}, {6, 6}}, 36 typs: []*types.T{types.Int, types.Int}, 37 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}}, 38 matchLen: 1, 39 }, 40 { 41 description: `simple nulls asc`, 42 tuples: tuples{{1, 2}, {1, nil}, {1, 3}, {1, 1}, {5, 5}, {6, 6}, {6, nil}}, 43 expected: tuples{{1, nil}, {1, 1}, {1, 2}, {1, 3}, {5, 5}, {6, nil}, {6, 6}}, 44 typs: []*types.T{types.Int, types.Int}, 45 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}}, 46 matchLen: 1, 47 }, 48 { 49 description: `simple nulls desc`, 50 tuples: tuples{{1, 2}, {1, nil}, {1, 3}, {1, 1}, {5, 5}, {6, 6}, {6, nil}}, 51 expected: tuples{{1, 3}, {1, 2}, {1, 1}, {1, nil}, {5, 5}, {6, 6}, {6, nil}}, 52 typs: []*types.T{types.Int, types.Int}, 53 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1, Direction: execinfrapb.Ordering_Column_DESC}}, 54 matchLen: 1, 55 }, 56 { 57 description: `one chunk, matchLen 1, three ordering columns`, 58 tuples: tuples{ 59 {0, 1, 2}, 60 {0, 2, 0}, 61 {0, 1, 0}, 62 {0, 1, 1}, 63 {0, 2, 1}, 64 }, 65 expected: tuples{ 66 {0, 1, 0}, 67 {0, 1, 1}, 68 {0, 1, 2}, 69 {0, 2, 0}, 70 {0, 2, 1}, 71 }, 72 typs: []*types.T{types.Int, types.Int, types.Int}, 73 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}, {ColIdx: 2}}, 74 matchLen: 1, 75 }, 76 { 77 description: `two chunks, matchLen 1, three ordering columns`, 78 tuples: tuples{ 79 {0, 1, 2}, 80 {0, 2, 0}, 81 {0, 1, 0}, 82 {1, 2, 1}, 83 {1, 1, 1}, 84 }, 85 expected: tuples{ 86 {0, 1, 0}, 87 {0, 1, 2}, 88 {0, 2, 0}, 89 {1, 1, 1}, 90 {1, 2, 1}, 91 }, 92 typs: []*types.T{types.Int, types.Int, types.Int}, 93 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}, {ColIdx: 2}}, 94 matchLen: 1, 95 }, 96 { 97 description: `two chunks, matchLen 2, three ordering columns`, 98 tuples: tuples{ 99 {0, 1, 2}, 100 {0, 1, 0}, 101 {0, 1, 1}, 102 {0, 2, 1}, 103 {0, 2, 0}, 104 }, 105 expected: tuples{ 106 {0, 1, 0}, 107 {0, 1, 1}, 108 {0, 1, 2}, 109 {0, 2, 0}, 110 {0, 2, 1}, 111 }, 112 typs: []*types.T{types.Int, types.Int, types.Int}, 113 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}, {ColIdx: 2}}, 114 matchLen: 2, 115 }, 116 { 117 description: `four chunks, matchLen 2, three ordering columns`, 118 tuples: tuples{ 119 {0, 1, 2}, 120 {0, 1, 0}, 121 {0, 2, 0}, 122 {1, 1, 1}, 123 {1, 2, 1}, 124 }, 125 expected: tuples{ 126 {0, 1, 0}, 127 {0, 1, 2}, 128 {0, 2, 0}, 129 {1, 1, 1}, 130 {1, 2, 1}, 131 }, 132 typs: []*types.T{types.Int, types.Int, types.Int}, 133 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}, {ColIdx: 2}}, 134 matchLen: 2, 135 }, 136 { 137 description: `three chunks, matchLen 1, three ordering columns (reordered)`, 138 tuples: tuples{ 139 {0, 2, 0}, 140 {0, 1, 0}, 141 {1, 1, 1}, 142 {0, 1, 1}, 143 {0, 1, 2}, 144 }, 145 expected: tuples{ 146 {0, 1, 0}, 147 {0, 2, 0}, 148 {0, 1, 1}, 149 {1, 1, 1}, 150 {0, 1, 2}, 151 }, 152 typs: []*types.T{types.Int, types.Int, types.Int}, 153 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 2}, {ColIdx: 1}, {ColIdx: 0}}, 154 matchLen: 1, 155 }, 156 { 157 description: `four chunks, matchLen 2, three ordering columns (reordered)`, 158 tuples: tuples{ 159 {0, 2, 0}, 160 {0, 1, 0}, 161 {1, 1, 1}, 162 {1, 2, 1}, 163 {0, 1, 2}, 164 {1, 2, 2}, 165 {1, 1, 2}, 166 }, 167 expected: tuples{ 168 {0, 1, 0}, 169 {0, 2, 0}, 170 {1, 1, 1}, 171 {1, 2, 1}, 172 {0, 1, 2}, 173 {1, 1, 2}, 174 {1, 2, 2}, 175 }, 176 typs: []*types.T{types.Int, types.Int, types.Int}, 177 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 2}, {ColIdx: 0}, {ColIdx: 1}}, 178 matchLen: 2, 179 }, 180 } 181 } 182 183 func TestSortChunks(t *testing.T) { 184 defer leaktest.AfterTest(t)() 185 186 for _, tc := range sortChunksTestCases { 187 runTests(t, []tuples{tc.tuples}, tc.expected, orderedVerifier, func(input []colexecbase.Operator) (colexecbase.Operator, error) { 188 return NewSortChunks(testAllocator, input[0], tc.typs, tc.ordCols, tc.matchLen) 189 }) 190 } 191 } 192 193 func TestSortChunksRandomized(t *testing.T) { 194 defer leaktest.AfterTest(t)() 195 rng, _ := randutil.NewPseudoRand() 196 nTups := 8 197 maxCols := 5 198 // TODO(yuzefovich): randomize types as well. 199 typs := make([]*types.T, maxCols) 200 for i := range typs { 201 typs[i] = types.Int 202 } 203 204 for nCols := 1; nCols < maxCols; nCols++ { 205 for nOrderingCols := 1; nOrderingCols <= nCols; nOrderingCols++ { 206 for matchLen := 1; matchLen < nOrderingCols; matchLen++ { 207 ordCols := generateColumnOrdering(rng, nCols, nOrderingCols) 208 tups := make(tuples, nTups) 209 for i := range tups { 210 tups[i] = make(tuple, nCols) 211 for j := range tups[i] { 212 // Small range so we can test partitioning. 213 tups[i][j] = rng.Int63() % 2048 214 } 215 } 216 217 // Sort tups on the first matchLen columns as needed for sort chunks 218 // operator. 219 sortedTups := make(tuples, nTups) 220 copy(sortedTups, tups) 221 sort.Slice(sortedTups, less(sortedTups, ordCols[:matchLen])) 222 223 // Sort tups on all ordering columns to get the expected results. 224 expected := make(tuples, nTups) 225 copy(expected, tups) 226 sort.Slice(expected, less(expected, ordCols)) 227 228 runTests(t, []tuples{sortedTups}, expected, orderedVerifier, func(input []colexecbase.Operator) (colexecbase.Operator, error) { 229 return NewSortChunks(testAllocator, input[0], typs[:nCols], ordCols, matchLen) 230 }) 231 } 232 } 233 } 234 } 235 236 func BenchmarkSortChunks(b *testing.B) { 237 rng, _ := randutil.NewPseudoRand() 238 ctx := context.Background() 239 240 sorterConstructors := []func(*colmem.Allocator, colexecbase.Operator, []*types.T, []execinfrapb.Ordering_Column, int) (colexecbase.Operator, error){ 241 NewSortChunks, 242 func(allocator *colmem.Allocator, input colexecbase.Operator, inputTypes []*types.T, orderingCols []execinfrapb.Ordering_Column, _ int) (colexecbase.Operator, error) { 243 return NewSorter(allocator, input, inputTypes, orderingCols) 244 }, 245 } 246 sorterNames := []string{"CHUNKS", "ALL"} 247 for _, nBatches := range []int{1 << 2, 1 << 6} { 248 for _, nCols := range []int{2, 4} { 249 for _, matchLen := range []int{1, 2, 3} { 250 for _, avgChunkSize := range []int{1 << 3, 1 << 7} { 251 for sorterIdx, sorterConstructor := range sorterConstructors { 252 if matchLen >= nCols { 253 continue 254 } 255 b.Run( 256 fmt.Sprintf("%s/rows=%d/cols=%d/matchLen=%d/avgChunkSize=%d", 257 sorterNames[sorterIdx], nBatches*coldata.BatchSize(), nCols, matchLen, avgChunkSize), 258 func(b *testing.B) { 259 // 8 (bytes / int64) * nBatches (number of batches) * coldata.BatchSize() (rows / 260 // batch) * nCols (number of columns / row). 261 b.SetBytes(int64(8 * nBatches * coldata.BatchSize() * nCols)) 262 typs := make([]*types.T, nCols) 263 for i := range typs { 264 typs[i] = types.Int 265 } 266 batch := testAllocator.NewMemBatch(typs) 267 batch.SetLength(coldata.BatchSize()) 268 ordCols := make([]execinfrapb.Ordering_Column, nCols) 269 for i := range ordCols { 270 ordCols[i].ColIdx = uint32(i) 271 if i < matchLen { 272 ordCols[i].Direction = execinfrapb.Ordering_Column_ASC 273 } else { 274 ordCols[i].Direction = execinfrapb.Ordering_Column_Direction(rng.Int() % 2) 275 } 276 277 col := batch.ColVec(i).Int64() 278 col[0] = 0 279 for j := 1; j < coldata.BatchSize(); j++ { 280 if i < matchLen { 281 col[j] = col[j-1] 282 if rng.Float64() < 1.0/float64(avgChunkSize) { 283 col[j]++ 284 } 285 } else { 286 col[j] = rng.Int63() % int64((i*1024)+1) 287 } 288 } 289 } 290 b.ResetTimer() 291 for n := 0; n < b.N; n++ { 292 source := newFiniteChunksSource(batch, typs, nBatches, matchLen) 293 sorter, err := sorterConstructor(testAllocator, source, typs, ordCols, matchLen) 294 if err != nil { 295 b.Fatal(err) 296 } 297 298 sorter.Init() 299 for out := sorter.Next(ctx); out.Length() != 0; out = sorter.Next(ctx) { 300 } 301 } 302 b.StopTimer() 303 }) 304 } 305 } 306 } 307 } 308 } 309 }