github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/sort_test.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package colexec 12 13 import ( 14 "context" 15 "fmt" 16 "math" 17 "math/rand" 18 "sort" 19 "testing" 20 21 "github.com/cockroachdb/cockroach/pkg/col/coldata" 22 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase" 23 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase/colexecerror" 24 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 25 "github.com/cockroachdb/cockroach/pkg/sql/types" 26 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 27 "github.com/cockroachdb/cockroach/pkg/util/randutil" 28 ) 29 30 var sortAllTestCases []sortTestCase 31 32 func init() { 33 sortAllTestCases = []sortTestCase{ 34 { 35 tuples: tuples{{1}, {2}, {nil}, {4}, {5}, {nil}}, 36 expected: tuples{{nil}, {nil}, {1}, {2}, {4}, {5}}, 37 typs: []*types.T{types.Int}, 38 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}}, 39 }, 40 { 41 tuples: tuples{{1, 2}, {1, 1}, {1, nil}, {2, nil}, {2, 3}, {2, nil}, {5, 1}}, 42 expected: tuples{{1, nil}, {1, 1}, {1, 2}, {2, nil}, {2, nil}, {2, 3}, {5, 1}}, 43 typs: []*types.T{types.Int, types.Int}, 44 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}}, 45 }, 46 { 47 tuples: tuples{{1, 2}, {1, 1}, {1, nil}, {2, nil}, {2, 3}, {2, nil}, {5, 1}}, 48 expected: tuples{{5, 1}, {2, 3}, {2, nil}, {2, nil}, {1, 2}, {1, 1}, {1, nil}}, 49 typs: []*types.T{types.Int, types.Int}, 50 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0, Direction: execinfrapb.Ordering_Column_DESC}, {ColIdx: 1, Direction: execinfrapb.Ordering_Column_DESC}}, 51 }, 52 { 53 tuples: tuples{{nil, nil}, {nil, 3}, {1, nil}, {nil, 1}, {1, 2}, {nil, nil}, {5, nil}}, 54 expected: tuples{{nil, nil}, {nil, nil}, {nil, 1}, {nil, 3}, {1, nil}, {1, 2}, {5, nil}}, 55 typs: []*types.T{types.Int, types.Int}, 56 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}}, 57 }, 58 { 59 tuples: tuples{{1}, {2}, {3}, {4}, {5}, {6}, {7}}, 60 expected: tuples{{1}, {2}, {3}, {4}, {5}, {6}, {7}}, 61 typs: []*types.T{types.Int}, 62 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}}, 63 }, 64 { 65 tuples: tuples{{1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}}, 66 expected: tuples{{1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}}, 67 typs: []*types.T{types.Int}, 68 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}}, 69 }, 70 { 71 tuples: tuples{{1, 1}, {3, 2}, {2, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}}, 72 expected: tuples{{1, 1}, {2, 3}, {3, 2}, {4, 4}, {5, 5}, {6, 6}, {7, 7}}, 73 typs: []*types.T{types.Int, types.Int}, 74 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}}, 75 }, 76 { 77 tuples: tuples{{1, 1}, {5, 2}, {3, 3}, {7, 4}, {2, 5}, {6, 6}, {4, 7}}, 78 expected: tuples{{1, 1}, {2, 5}, {3, 3}, {4, 7}, {5, 2}, {6, 6}, {7, 4}}, 79 typs: []*types.T{types.Int, types.Int}, 80 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}}, 81 }, 82 { 83 tuples: tuples{{1}, {5}, {3}, {3}, {2}, {6}, {4}}, 84 expected: tuples{{1}, {2}, {3}, {3}, {4}, {5}, {6}}, 85 typs: []*types.T{types.Int}, 86 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}}, 87 }, 88 { 89 tuples: tuples{{false}, {true}}, 90 expected: tuples{{false}, {true}}, 91 typs: []*types.T{types.Bool}, 92 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}}, 93 }, 94 { 95 tuples: tuples{{true}, {false}}, 96 expected: tuples{{false}, {true}}, 97 typs: []*types.T{types.Bool}, 98 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}}, 99 }, 100 { 101 tuples: tuples{{3.2}, {2.0}, {2.4}, {math.NaN()}, {math.Inf(-1)}, {math.Inf(1)}}, 102 expected: tuples{{math.NaN()}, {math.Inf(-1)}, {2.0}, {2.4}, {3.2}, {math.Inf(1)}}, 103 typs: []*types.T{types.Float}, 104 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}}, 105 }, 106 107 { 108 tuples: tuples{{0, 1, 0}, {1, 2, 0}, {2, 3, 2}, {3, 7, 1}, {4, 2, 2}}, 109 expected: tuples{{0, 1, 0}, {1, 2, 0}, {3, 7, 1}, {4, 2, 2}, {2, 3, 2}}, 110 typs: []*types.T{types.Int, types.Int, types.Int}, 111 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 2}, {ColIdx: 1}}, 112 }, 113 114 { 115 // ensure that sort partitions stack: make sure that a run of identical 116 // values in a later column doesn't get sorted if the run is broken up 117 // by previous columns. 118 tuples: tuples{ 119 {0, 1, 0}, 120 {0, 1, 0}, 121 {0, 1, 1}, 122 {0, 0, 1}, 123 {0, 0, 0}, 124 }, 125 expected: tuples{ 126 {0, 0, 0}, 127 {0, 0, 1}, 128 {0, 1, 0}, 129 {0, 1, 0}, 130 {0, 1, 1}, 131 }, 132 typs: []*types.T{types.Int, types.Int, types.Int}, 133 ordCols: []execinfrapb.Ordering_Column{{ColIdx: 0}, {ColIdx: 1}, {ColIdx: 2}}, 134 }, 135 } 136 } 137 138 func TestSort(t *testing.T) { 139 defer leaktest.AfterTest(t)() 140 for _, tc := range sortAllTestCases { 141 runTestsWithTyps(t, []tuples{tc.tuples}, [][]*types.T{tc.typs}, tc.expected, orderedVerifier, 142 func(input []colexecbase.Operator) (colexecbase.Operator, error) { 143 return NewSorter(testAllocator, input[0], tc.typs, tc.ordCols) 144 }) 145 } 146 } 147 148 func TestSortRandomized(t *testing.T) { 149 defer leaktest.AfterTest(t)() 150 rng, _ := randutil.NewPseudoRand() 151 nTups := coldata.BatchSize()*2 + 1 152 maxCols := 3 153 // TODO(yuzefovich): randomize types as well. 154 typs := make([]*types.T, maxCols) 155 for i := range typs { 156 typs[i] = types.Int 157 } 158 for nCols := 1; nCols < maxCols; nCols++ { 159 for nOrderingCols := 1; nOrderingCols <= nCols; nOrderingCols++ { 160 for _, k := range []int{0, rng.Intn(nTups) + 1} { 161 topK := k != 0 162 name := fmt.Sprintf("nCols=%d/nOrderingCols=%d/topK=%t", nCols, nOrderingCols, topK) 163 t.Run(name, func(t *testing.T) { 164 tups, expected, ordCols := generateRandomDataForTestSort(rng, nTups, nCols, nOrderingCols) 165 if topK { 166 expected = expected[:k] 167 } 168 runTests(t, []tuples{tups}, expected, orderedVerifier, func(input []colexecbase.Operator) (colexecbase.Operator, error) { 169 if topK { 170 return NewTopKSorter(testAllocator, input[0], typs[:nCols], ordCols, k), nil 171 } 172 return NewSorter(testAllocator, input[0], typs[:nCols], ordCols) 173 }) 174 }) 175 } 176 } 177 } 178 } 179 180 // generateRandomDataForTestSort is a utility function that generates data to 181 // be used in randomized unit test of a sort operation. It returns: 182 // - tups - the data to be sorted 183 // - expected - the same data but already sorted 184 // - ordCols - ordering columns used in the sort operation. 185 func generateRandomDataForTestSort( 186 rng *rand.Rand, nTups, nCols, nOrderingCols int, 187 ) (tups, expected tuples, ordCols []execinfrapb.Ordering_Column) { 188 ordCols = generateColumnOrdering(rng, nCols, nOrderingCols) 189 tups = make(tuples, nTups) 190 for i := range tups { 191 tups[i] = make(tuple, nCols) 192 for j := range tups[i] { 193 // Small range so we can test partitioning 194 if rng.Float64() < nullProbability { 195 tups[i][j] = nil 196 } else { 197 tups[i][j] = rng.Int63() % 2048 198 } 199 } 200 // Enforce that the last ordering column is always unique. Otherwise 201 // there would be multiple valid sort orders. 202 tups[i][ordCols[nOrderingCols-1].ColIdx] = int64(i) 203 } 204 205 expected = make(tuples, nTups) 206 copy(expected, tups) 207 sort.Slice(expected, less(expected, ordCols)) 208 return tups, expected, ordCols 209 } 210 211 func TestAllSpooler(t *testing.T) { 212 defer leaktest.AfterTest(t)() 213 214 tcs := []struct { 215 tuples tuples 216 typ []*types.T 217 }{ 218 { 219 tuples: tuples{{1}, {2}, {3}, {4}, {5}, {6}, {7}}, 220 typ: []*types.T{types.Int}, 221 }, 222 { 223 tuples: tuples{{1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}, {1}}, 224 typ: []*types.T{types.Int}, 225 }, 226 { 227 tuples: tuples{{1, 1}, {3, 2}, {2, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}}, 228 typ: []*types.T{types.Int, types.Int}, 229 }, 230 { 231 tuples: tuples{{1, 1}, {5, 2}, {3, 3}, {7, 4}, {2, 5}, {6, 6}, {4, 7}}, 232 typ: []*types.T{types.Int, types.Int}, 233 }, 234 { 235 tuples: tuples{{1}, {5}, {3}, {3}, {2}, {6}, {4}}, 236 typ: []*types.T{types.Int}, 237 }, 238 { 239 tuples: tuples{{0, 1, 0}, {1, 2, 0}, {2, 3, 2}, {3, 7, 1}, {4, 2, 2}}, 240 typ: []*types.T{types.Int, types.Int, types.Int}, 241 }, 242 { 243 tuples: tuples{ 244 {0, 1, 0}, 245 {0, 1, 0}, 246 {0, 1, 1}, 247 {0, 0, 1}, 248 {0, 0, 0}, 249 }, 250 typ: []*types.T{types.Int, types.Int, types.Int}, 251 }, 252 } 253 for _, tc := range tcs { 254 runTestsWithFn(t, []tuples{tc.tuples}, nil /* typs */, func(t *testing.T, input []colexecbase.Operator) { 255 allSpooler := newAllSpooler(testAllocator, input[0], tc.typ) 256 allSpooler.init() 257 allSpooler.spool(context.Background()) 258 if len(tc.tuples) != allSpooler.getNumTuples() { 259 t.Fatal(fmt.Sprintf("allSpooler spooled wrong number of tuples: expected %d, but received %d", len(tc.tuples), allSpooler.getNumTuples())) 260 } 261 if allSpooler.getPartitionsCol() != nil { 262 t.Fatal("allSpooler returned non-nil partitionsCol") 263 } 264 for col := 0; col < len(tc.typ); col++ { 265 colVec := allSpooler.getValues(col).Int64() 266 for i := 0; i < allSpooler.getNumTuples(); i++ { 267 if colVec[i] != int64(tc.tuples[i][col].(int)) { 268 t.Fatal(fmt.Sprintf("allSpooler returned wrong value in %d column of %d'th tuple : expected %v, but received %v", 269 col, i, tc.tuples[i][col].(int), colVec[i])) 270 } 271 } 272 } 273 }) 274 } 275 } 276 277 func BenchmarkSort(b *testing.B) { 278 rng, _ := randutil.NewPseudoRand() 279 ctx := context.Background() 280 k := 128 281 282 for _, nBatches := range []int{1 << 1, 1 << 4, 1 << 8} { 283 for _, nCols := range []int{1, 2, 4} { 284 for _, topK := range []bool{false, true} { 285 name := fmt.Sprintf("rows=%d/cols=%d/topK=%t", nBatches*coldata.BatchSize(), nCols, topK) 286 b.Run(name, func(b *testing.B) { 287 // 8 (bytes / int64) * nBatches (number of batches) * coldata.BatchSize() (rows / 288 // batch) * nCols (number of columns / row). 289 b.SetBytes(int64(8 * nBatches * coldata.BatchSize() * nCols)) 290 typs := make([]*types.T, nCols) 291 for i := range typs { 292 typs[i] = types.Int 293 } 294 batch := testAllocator.NewMemBatch(typs) 295 batch.SetLength(coldata.BatchSize()) 296 ordCols := make([]execinfrapb.Ordering_Column, nCols) 297 for i := range ordCols { 298 ordCols[i].ColIdx = uint32(i) 299 ordCols[i].Direction = execinfrapb.Ordering_Column_Direction(rng.Int() % 2) 300 301 col := batch.ColVec(i).Int64() 302 for j := 0; j < coldata.BatchSize(); j++ { 303 col[j] = rng.Int63() % int64((i*1024)+1) 304 } 305 } 306 b.ResetTimer() 307 for n := 0; n < b.N; n++ { 308 source := newFiniteBatchSource(batch, typs, nBatches) 309 var sorter colexecbase.Operator 310 if topK { 311 sorter = NewTopKSorter(testAllocator, source, typs, ordCols, k) 312 } else { 313 var err error 314 sorter, err = NewSorter(testAllocator, source, typs, ordCols) 315 if err != nil { 316 b.Fatal(err) 317 } 318 } 319 sorter.Init() 320 for out := sorter.Next(ctx); out.Length() != 0; out = sorter.Next(ctx) { 321 } 322 } 323 }) 324 } 325 } 326 } 327 } 328 329 func BenchmarkAllSpooler(b *testing.B) { 330 rng, _ := randutil.NewPseudoRand() 331 ctx := context.Background() 332 333 for _, nBatches := range []int{1 << 1, 1 << 4, 1 << 8} { 334 for _, nCols := range []int{1, 2, 4} { 335 b.Run(fmt.Sprintf("rows=%d/cols=%d", nBatches*coldata.BatchSize(), nCols), func(b *testing.B) { 336 // 8 (bytes / int64) * nBatches (number of batches) * col.BatchSize() (rows / 337 // batch) * nCols (number of columns / row). 338 b.SetBytes(int64(8 * nBatches * coldata.BatchSize() * nCols)) 339 typs := make([]*types.T, nCols) 340 for i := range typs { 341 typs[i] = types.Int 342 } 343 batch := testAllocator.NewMemBatch(typs) 344 batch.SetLength(coldata.BatchSize()) 345 for i := 0; i < nCols; i++ { 346 col := batch.ColVec(i).Int64() 347 for j := 0; j < coldata.BatchSize(); j++ { 348 col[j] = rng.Int63() % int64((i*1024)+1) 349 } 350 } 351 b.ResetTimer() 352 for n := 0; n < b.N; n++ { 353 source := newFiniteBatchSource(batch, typs, nBatches) 354 allSpooler := newAllSpooler(testAllocator, source, typs) 355 allSpooler.init() 356 allSpooler.spool(ctx) 357 } 358 }) 359 } 360 } 361 } 362 363 func less(tuples tuples, ordCols []execinfrapb.Ordering_Column) func(i, j int) bool { 364 return func(i, j int) bool { 365 for _, col := range ordCols { 366 n1 := tuples[i][col.ColIdx] == nil 367 n2 := tuples[j][col.ColIdx] == nil 368 if col.Direction == execinfrapb.Ordering_Column_ASC { 369 if n1 && n2 { 370 continue 371 } else if n1 { 372 return true 373 } else if n2 { 374 return false 375 } 376 } else { 377 if n1 && n2 { 378 continue 379 } else if n1 { 380 return false 381 } else if n2 { 382 return true 383 } 384 } 385 if tuples[i][col.ColIdx].(int64) < tuples[j][col.ColIdx].(int64) { 386 return col.Direction == execinfrapb.Ordering_Column_ASC 387 } else if tuples[i][col.ColIdx].(int64) > tuples[j][col.ColIdx].(int64) { 388 return col.Direction == execinfrapb.Ordering_Column_DESC 389 } 390 } 391 return false 392 } 393 } 394 395 // generateColumnOrdering produces a random ordering of nOrderingCols columns 396 // on a table with nCols columns, so nOrderingCols must be not greater than 397 // nCols. 398 func generateColumnOrdering( 399 rng *rand.Rand, nCols int, nOrderingCols int, 400 ) []execinfrapb.Ordering_Column { 401 if nOrderingCols > nCols { 402 colexecerror.InternalError("nOrderingCols > nCols in generateColumnOrdering") 403 } 404 orderingCols := make([]execinfrapb.Ordering_Column, nOrderingCols) 405 for i, col := range rng.Perm(nCols)[:nOrderingCols] { 406 orderingCols[i] = execinfrapb.Ordering_Column{ColIdx: uint32(col), Direction: execinfrapb.Ordering_Column_Direction(rng.Intn(2))} 407 } 408 return orderingCols 409 }