github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/sorter_test.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rowexec 12 13 import ( 14 "context" 15 "fmt" 16 "math" 17 "math/rand" 18 "testing" 19 20 "github.com/cockroachdb/cockroach/pkg/base" 21 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 22 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 23 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 24 "github.com/cockroachdb/cockroach/pkg/sql/rowcontainer" 25 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 26 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 27 "github.com/cockroachdb/cockroach/pkg/sql/types" 28 "github.com/cockroachdb/cockroach/pkg/storage" 29 "github.com/cockroachdb/cockroach/pkg/testutils" 30 "github.com/cockroachdb/cockroach/pkg/testutils/distsqlutils" 31 "github.com/cockroachdb/cockroach/pkg/util/encoding" 32 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 33 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 34 ) 35 36 func TestSorter(t *testing.T) { 37 defer leaktest.AfterTest(t)() 38 39 v := [6]sqlbase.EncDatum{} 40 for i := range v { 41 v[i] = sqlbase.IntEncDatum(i) 42 } 43 44 asc := encoding.Ascending 45 desc := encoding.Descending 46 47 testCases := []struct { 48 name string 49 spec execinfrapb.SorterSpec 50 post execinfrapb.PostProcessSpec 51 types []*types.T 52 input sqlbase.EncDatumRows 53 expected sqlbase.EncDatumRows 54 }{ 55 { 56 name: "SortAll", 57 // No specified input ordering and unspecified limit. 58 spec: execinfrapb.SorterSpec{ 59 OutputOrdering: execinfrapb.ConvertToSpecOrdering( 60 sqlbase.ColumnOrdering{ 61 {ColIdx: 0, Direction: asc}, 62 {ColIdx: 1, Direction: desc}, 63 {ColIdx: 2, Direction: asc}, 64 }), 65 }, 66 types: sqlbase.ThreeIntCols, 67 input: sqlbase.EncDatumRows{ 68 {v[1], v[0], v[4]}, 69 {v[3], v[4], v[1]}, 70 {v[4], v[4], v[4]}, 71 {v[3], v[2], v[0]}, 72 {v[4], v[4], v[5]}, 73 {v[3], v[3], v[0]}, 74 {v[0], v[0], v[0]}, 75 }, 76 expected: sqlbase.EncDatumRows{ 77 {v[0], v[0], v[0]}, 78 {v[1], v[0], v[4]}, 79 {v[3], v[4], v[1]}, 80 {v[3], v[3], v[0]}, 81 {v[3], v[2], v[0]}, 82 {v[4], v[4], v[4]}, 83 {v[4], v[4], v[5]}, 84 }, 85 }, { 86 name: "SortLimit", 87 // No specified input ordering but specified limit. 88 spec: execinfrapb.SorterSpec{ 89 OutputOrdering: execinfrapb.ConvertToSpecOrdering( 90 sqlbase.ColumnOrdering{ 91 {ColIdx: 0, Direction: asc}, 92 {ColIdx: 1, Direction: asc}, 93 {ColIdx: 2, Direction: asc}, 94 }), 95 }, 96 post: execinfrapb.PostProcessSpec{Limit: 4}, 97 types: sqlbase.ThreeIntCols, 98 input: sqlbase.EncDatumRows{ 99 {v[3], v[3], v[0]}, 100 {v[3], v[4], v[1]}, 101 {v[1], v[0], v[4]}, 102 {v[0], v[0], v[0]}, 103 {v[4], v[4], v[4]}, 104 {v[4], v[4], v[5]}, 105 {v[3], v[2], v[0]}, 106 }, 107 expected: sqlbase.EncDatumRows{ 108 {v[0], v[0], v[0]}, 109 {v[1], v[0], v[4]}, 110 {v[3], v[2], v[0]}, 111 {v[3], v[3], v[0]}, 112 }, 113 }, { 114 name: "SortOffset", 115 // No specified input ordering but specified offset and limit. 116 spec: execinfrapb.SorterSpec{ 117 OutputOrdering: execinfrapb.ConvertToSpecOrdering( 118 sqlbase.ColumnOrdering{ 119 {ColIdx: 0, Direction: asc}, 120 {ColIdx: 1, Direction: asc}, 121 {ColIdx: 2, Direction: asc}, 122 }), 123 }, 124 post: execinfrapb.PostProcessSpec{Offset: 2, Limit: 2}, 125 types: sqlbase.ThreeIntCols, 126 input: sqlbase.EncDatumRows{ 127 {v[3], v[3], v[0]}, 128 {v[3], v[4], v[1]}, 129 {v[1], v[0], v[4]}, 130 {v[0], v[0], v[0]}, 131 {v[4], v[4], v[4]}, 132 {v[4], v[4], v[5]}, 133 {v[3], v[2], v[0]}, 134 }, 135 expected: sqlbase.EncDatumRows{ 136 {v[3], v[2], v[0]}, 137 {v[3], v[3], v[0]}, 138 }, 139 }, { 140 name: "SortFilterExpr", 141 // No specified input ordering but specified postprocess filter expression. 142 spec: execinfrapb.SorterSpec{ 143 OutputOrdering: execinfrapb.ConvertToSpecOrdering( 144 sqlbase.ColumnOrdering{ 145 {ColIdx: 0, Direction: asc}, 146 {ColIdx: 1, Direction: asc}, 147 {ColIdx: 2, Direction: asc}, 148 }), 149 }, 150 post: execinfrapb.PostProcessSpec{Filter: execinfrapb.Expression{Expr: "@1 + @2 < 7"}}, 151 types: sqlbase.ThreeIntCols, 152 input: sqlbase.EncDatumRows{ 153 {v[3], v[3], v[0]}, 154 {v[3], v[4], v[1]}, 155 {v[1], v[0], v[4]}, 156 {v[0], v[0], v[0]}, 157 {v[4], v[4], v[4]}, 158 {v[4], v[4], v[5]}, 159 {v[3], v[2], v[0]}, 160 }, 161 expected: sqlbase.EncDatumRows{ 162 {v[0], v[0], v[0]}, 163 {v[1], v[0], v[4]}, 164 {v[3], v[2], v[0]}, 165 {v[3], v[3], v[0]}, 166 }, 167 }, { 168 name: "SortMatchOrderingNoLimit", 169 // Specified match ordering length but no specified limit. 170 spec: execinfrapb.SorterSpec{ 171 OrderingMatchLen: 2, 172 OutputOrdering: execinfrapb.ConvertToSpecOrdering( 173 sqlbase.ColumnOrdering{ 174 {ColIdx: 0, Direction: asc}, 175 {ColIdx: 1, Direction: asc}, 176 {ColIdx: 2, Direction: asc}, 177 }), 178 }, 179 types: sqlbase.ThreeIntCols, 180 input: sqlbase.EncDatumRows{ 181 {v[0], v[1], v[2]}, 182 {v[0], v[1], v[0]}, 183 {v[1], v[0], v[5]}, 184 {v[1], v[1], v[5]}, 185 {v[1], v[1], v[4]}, 186 {v[3], v[4], v[3]}, 187 {v[3], v[4], v[2]}, 188 {v[3], v[5], v[1]}, 189 {v[4], v[4], v[5]}, 190 {v[4], v[4], v[4]}, 191 }, 192 expected: sqlbase.EncDatumRows{ 193 {v[0], v[1], v[0]}, 194 {v[0], v[1], v[2]}, 195 {v[1], v[0], v[5]}, 196 {v[1], v[1], v[4]}, 197 {v[1], v[1], v[5]}, 198 {v[3], v[4], v[2]}, 199 {v[3], v[4], v[3]}, 200 {v[3], v[5], v[1]}, 201 {v[4], v[4], v[4]}, 202 {v[4], v[4], v[5]}, 203 }, 204 }, { 205 name: "SortInputOrderingNoLimit", 206 // Specified input ordering but no specified limit. 207 spec: execinfrapb.SorterSpec{ 208 OrderingMatchLen: 2, 209 OutputOrdering: execinfrapb.ConvertToSpecOrdering( 210 sqlbase.ColumnOrdering{ 211 {ColIdx: 1, Direction: asc}, 212 {ColIdx: 2, Direction: asc}, 213 {ColIdx: 3, Direction: asc}, 214 }), 215 }, 216 types: []*types.T{types.Int, types.Int, types.Int, types.Int}, 217 input: sqlbase.EncDatumRows{ 218 {v[1], v[1], v[2], v[5]}, 219 {v[0], v[1], v[2], v[4]}, 220 {v[0], v[1], v[2], v[3]}, 221 {v[1], v[1], v[2], v[2]}, 222 {v[1], v[2], v[2], v[5]}, 223 {v[0], v[2], v[2], v[4]}, 224 {v[0], v[2], v[2], v[3]}, 225 {v[1], v[2], v[2], v[2]}, 226 }, 227 expected: sqlbase.EncDatumRows{ 228 {v[1], v[1], v[2], v[2]}, 229 {v[0], v[1], v[2], v[3]}, 230 {v[0], v[1], v[2], v[4]}, 231 {v[1], v[1], v[2], v[5]}, 232 {v[1], v[2], v[2], v[2]}, 233 {v[0], v[2], v[2], v[3]}, 234 {v[0], v[2], v[2], v[4]}, 235 {v[1], v[2], v[2], v[5]}, 236 }, 237 }, { 238 name: "SortInputOrderingAlreadySorted", 239 spec: execinfrapb.SorterSpec{ 240 OrderingMatchLen: 2, 241 OutputOrdering: execinfrapb.ConvertToSpecOrdering( 242 sqlbase.ColumnOrdering{ 243 {ColIdx: 1, Direction: asc}, 244 {ColIdx: 2, Direction: asc}, 245 {ColIdx: 3, Direction: asc}, 246 }), 247 }, 248 types: []*types.T{types.Int, types.Int, types.Int, types.Int}, 249 input: sqlbase.EncDatumRows{ 250 {v[1], v[1], v[2], v[2]}, 251 {v[0], v[1], v[2], v[3]}, 252 {v[0], v[1], v[2], v[4]}, 253 {v[1], v[1], v[2], v[5]}, 254 {v[1], v[2], v[2], v[2]}, 255 {v[0], v[2], v[2], v[3]}, 256 {v[0], v[2], v[2], v[4]}, 257 {v[1], v[2], v[2], v[5]}, 258 }, 259 expected: sqlbase.EncDatumRows{ 260 {v[1], v[1], v[2], v[2]}, 261 {v[0], v[1], v[2], v[3]}, 262 {v[0], v[1], v[2], v[4]}, 263 {v[1], v[1], v[2], v[5]}, 264 {v[1], v[2], v[2], v[2]}, 265 {v[0], v[2], v[2], v[3]}, 266 {v[0], v[2], v[2], v[4]}, 267 {v[1], v[2], v[2], v[5]}, 268 }, 269 }, 270 } 271 272 // Test with several memory limits: 273 memLimits := []struct { 274 bytes int64 275 expSpill bool 276 }{ 277 // Use the default limit. 278 {bytes: 0, expSpill: false}, 279 // Immediately switch to disk. 280 {bytes: 1, expSpill: true}, 281 // A memory limit that should not be hit; the processor will 282 // not use disk. 283 {bytes: 1 << 20, expSpill: false}, 284 } 285 286 for _, c := range testCases { 287 t.Run(c.name, func(t *testing.T) { 288 for _, memLimit := range memLimits { 289 // In theory, SortAllProcessor should be able to handle all sorting 290 // strategies, as the other processors are optimizations. 291 for _, forceSortAll := range []bool{false, true} { 292 name := fmt.Sprintf("MemLimit=%d/ForceSort=%t", memLimit.bytes, forceSortAll) 293 t.Run(name, func(t *testing.T) { 294 ctx := context.Background() 295 st := cluster.MakeTestingClusterSettings() 296 tempEngine, _, err := storage.NewTempEngine(ctx, storage.DefaultStorageEngine, base.DefaultTestTempStorageConfig(st), base.DefaultTestStoreSpec) 297 if err != nil { 298 t.Fatal(err) 299 } 300 defer tempEngine.Close() 301 302 evalCtx := tree.MakeTestingEvalContext(st) 303 defer evalCtx.Stop(ctx) 304 diskMonitor := execinfra.NewTestDiskMonitor(ctx, st) 305 defer diskMonitor.Stop(ctx) 306 flowCtx := execinfra.FlowCtx{ 307 EvalCtx: &evalCtx, 308 Cfg: &execinfra.ServerConfig{ 309 Settings: cluster.MakeTestingClusterSettings(), 310 TempStorage: tempEngine, 311 DiskMonitor: diskMonitor, 312 }, 313 } 314 // Override the default memory limit. This will result in using 315 // a memory row container which will hit this limit and fall 316 // back to using a disk row container. 317 flowCtx.Cfg.TestingKnobs.MemoryLimitBytes = memLimit.bytes 318 319 in := distsqlutils.NewRowBuffer(c.types, c.input, distsqlutils.RowBufferArgs{}) 320 out := &distsqlutils.RowBuffer{} 321 322 var s execinfra.Processor 323 if !forceSortAll { 324 var err error 325 s, err = newSorter(context.Background(), &flowCtx, 0 /* processorID */, &c.spec, in, &c.post, out) 326 if err != nil { 327 t.Fatal(err) 328 } 329 } else { 330 var err error 331 s, err = newSortAllProcessor(context.Background(), &flowCtx, 0 /* procedssorID */, &c.spec, in, &c.post, out) 332 if err != nil { 333 t.Fatal(err) 334 } 335 } 336 s.Run(context.Background()) 337 if !out.ProducerClosed() { 338 t.Fatalf("output RowReceiver not closed") 339 } 340 341 var retRows sqlbase.EncDatumRows 342 for { 343 row := out.NextNoMeta(t) 344 if row == nil { 345 break 346 } 347 retRows = append(retRows, row) 348 } 349 350 expStr := c.expected.String(c.types) 351 retStr := retRows.String(c.types) 352 if expStr != retStr { 353 t.Errorf("invalid results; expected:\n %s\ngot:\n %s", 354 expStr, retStr) 355 } 356 357 // Check whether the DiskBackedRowContainer spilled to disk. 358 spilled := s.(rowsAccessor).getRows().Spilled() 359 if memLimit.expSpill != spilled { 360 t.Errorf("expected spill to disk=%t, found %t", memLimit.expSpill, spilled) 361 } 362 if spilled { 363 if scp, ok := s.(*sortChunksProcessor); ok { 364 if scp.rows.(*rowcontainer.DiskBackedRowContainer).UsingDisk() { 365 t.Errorf("expected chunks processor to reset to use memory") 366 } 367 } 368 } 369 }) 370 } 371 } 372 }) 373 } 374 } 375 376 // TestSortInvalidLimit verifies that a top-k sorter will never be created with 377 // an invalid k-parameter. 378 func TestSortInvalidLimit(t *testing.T) { 379 defer leaktest.AfterTest(t)() 380 381 spec := execinfrapb.SorterSpec{} 382 383 t.Run("KTooLarge", func(t *testing.T) { 384 post := execinfrapb.PostProcessSpec{} 385 post.Limit = math.MaxInt64 386 post.Offset = math.MaxInt64 + 1 387 // All arguments apart from spec and post are not necessary. 388 if _, err := newSorter( 389 context.Background(), nil, 0, &spec, nil, &post, nil, 390 ); !testutils.IsError(err, "too large") { 391 t.Fatalf("unexpected error %v, expected k too large", err) 392 } 393 }) 394 395 t.Run("KZero", func(t *testing.T) { 396 var k uint64 397 // All arguments apart from spec and post are not necessary. 398 if _, err := newSortTopKProcessor( 399 nil, 0, &spec, nil, nil, nil, k, 400 ); !testutils.IsError(err, errSortTopKZeroK.Error()) { 401 t.Fatalf("unexpected error %v, expected %v", err, errSortTopKZeroK) 402 } 403 }) 404 } 405 406 var twoColOrdering = execinfrapb.ConvertToSpecOrdering(sqlbase.ColumnOrdering{ 407 {ColIdx: 0, Direction: encoding.Ascending}, 408 {ColIdx: 1, Direction: encoding.Ascending}, 409 }) 410 411 // BenchmarkSortAll times how long it takes to sort an input of varying length. 412 func BenchmarkSortAll(b *testing.B) { 413 const numCols = 2 414 415 ctx := context.Background() 416 st := cluster.MakeTestingClusterSettings() 417 evalCtx := tree.MakeTestingEvalContext(st) 418 defer evalCtx.Stop(ctx) 419 diskMonitor := execinfra.NewTestDiskMonitor(ctx, st) 420 defer diskMonitor.Stop(ctx) 421 flowCtx := execinfra.FlowCtx{ 422 EvalCtx: &evalCtx, 423 Cfg: &execinfra.ServerConfig{ 424 Settings: st, 425 DiskMonitor: diskMonitor, 426 }, 427 } 428 429 rng := rand.New(rand.NewSource(timeutil.Now().UnixNano())) 430 spec := execinfrapb.SorterSpec{OutputOrdering: twoColOrdering} 431 post := execinfrapb.PostProcessSpec{} 432 433 for _, numRows := range []int{1 << 4, 1 << 8, 1 << 12, 1 << 16} { 434 b.Run(fmt.Sprintf("rows=%d", numRows), func(b *testing.B) { 435 input := execinfra.NewRepeatableRowSource(sqlbase.TwoIntCols, sqlbase.MakeRandIntRows(rng, numRows, numCols)) 436 b.SetBytes(int64(numRows * numCols * 8)) 437 b.ResetTimer() 438 for i := 0; i < b.N; i++ { 439 s, err := newSorter( 440 context.Background(), &flowCtx, 0 /* processorID */, &spec, input, &post, &rowDisposer{}, 441 ) 442 if err != nil { 443 b.Fatal(err) 444 } 445 s.Run(context.Background()) 446 input.Reset() 447 } 448 }) 449 } 450 } 451 452 // BenchmarkSortLimit times how long it takes to sort a fixed size input with 453 // varying limits. 454 func BenchmarkSortLimit(b *testing.B) { 455 const numCols = 2 456 457 ctx := context.Background() 458 st := cluster.MakeTestingClusterSettings() 459 evalCtx := tree.MakeTestingEvalContext(st) 460 defer evalCtx.Stop(ctx) 461 diskMonitor := execinfra.NewTestDiskMonitor(ctx, st) 462 defer diskMonitor.Stop(ctx) 463 flowCtx := execinfra.FlowCtx{ 464 EvalCtx: &evalCtx, 465 Cfg: &execinfra.ServerConfig{ 466 Settings: st, 467 DiskMonitor: diskMonitor, 468 }, 469 } 470 471 rng := rand.New(rand.NewSource(timeutil.Now().UnixNano())) 472 spec := execinfrapb.SorterSpec{OutputOrdering: twoColOrdering} 473 474 const numRows = 1 << 16 475 b.Run(fmt.Sprintf("rows=%d", numRows), func(b *testing.B) { 476 input := execinfra.NewRepeatableRowSource(sqlbase.TwoIntCols, sqlbase.MakeRandIntRows(rng, numRows, numCols)) 477 for _, limit := range []uint64{1 << 4, 1 << 8, 1 << 12, 1 << 16} { 478 post := execinfrapb.PostProcessSpec{Limit: limit} 479 b.Run(fmt.Sprintf("Limit=%d", limit), func(b *testing.B) { 480 b.SetBytes(int64(numRows * numCols * 8)) 481 b.ResetTimer() 482 for i := 0; i < b.N; i++ { 483 s, err := newSorter( 484 context.Background(), &flowCtx, 0 /* processorID */, &spec, input, &post, &rowDisposer{}, 485 ) 486 if err != nil { 487 b.Fatal(err) 488 } 489 s.Run(context.Background()) 490 input.Reset() 491 } 492 }) 493 494 } 495 }) 496 } 497 498 // BenchmarkSortChunks times how long it takes to sort an input which is already 499 // sorted on a prefix. 500 func BenchmarkSortChunks(b *testing.B) { 501 const numCols = 2 502 503 ctx := context.Background() 504 st := cluster.MakeTestingClusterSettings() 505 evalCtx := tree.MakeTestingEvalContext(st) 506 defer evalCtx.Stop(ctx) 507 diskMonitor := execinfra.NewTestDiskMonitor(ctx, st) 508 defer diskMonitor.Stop(ctx) 509 flowCtx := execinfra.FlowCtx{ 510 EvalCtx: &evalCtx, 511 Cfg: &execinfra.ServerConfig{ 512 Settings: st, 513 DiskMonitor: diskMonitor, 514 }, 515 } 516 517 rng := rand.New(rand.NewSource(timeutil.Now().UnixNano())) 518 spec := execinfrapb.SorterSpec{ 519 OutputOrdering: twoColOrdering, 520 OrderingMatchLen: 1, 521 } 522 post := execinfrapb.PostProcessSpec{} 523 524 for _, numRows := range []int{1 << 4, 1 << 8, 1 << 12, 1 << 16} { 525 for chunkSize := 1; chunkSize <= numRows; chunkSize *= 4 { 526 b.Run(fmt.Sprintf("rows=%d,ChunkSize=%d", numRows, chunkSize), func(b *testing.B) { 527 rows := sqlbase.MakeRandIntRows(rng, numRows, numCols) 528 for i, row := range rows { 529 row[0] = sqlbase.IntEncDatum(i / chunkSize) 530 } 531 input := execinfra.NewRepeatableRowSource(sqlbase.TwoIntCols, rows) 532 b.SetBytes(int64(numRows * numCols * 8)) 533 b.ResetTimer() 534 for i := 0; i < b.N; i++ { 535 s, err := newSorter(context.Background(), &flowCtx, 0 /* processorID */, &spec, input, &post, &rowDisposer{}) 536 if err != nil { 537 b.Fatal(err) 538 } 539 s.Run(context.Background()) 540 input.Reset() 541 } 542 }) 543 } 544 } 545 }