github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/aggregator_test.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rowexec 12 13 import ( 14 "context" 15 "math" 16 "testing" 17 18 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 19 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 20 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 21 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 22 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 23 "github.com/cockroachdb/cockroach/pkg/sql/types" 24 "github.com/cockroachdb/cockroach/pkg/util" 25 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 26 ) 27 28 type aggTestSpec struct { 29 // The name of the aggregate function. 30 fname string 31 distinct bool 32 // The column indices of the arguments to the function. 33 colIdx []uint32 34 filterColIdx *uint32 35 } 36 37 func aggregations(aggTestSpecs []aggTestSpec) []execinfrapb.AggregatorSpec_Aggregation { 38 agg := make([]execinfrapb.AggregatorSpec_Aggregation, len(aggTestSpecs)) 39 for i, spec := range aggTestSpecs { 40 agg[i].Func = execinfrapb.AggregatorSpec_Func(execinfrapb.AggregatorSpec_Func_value[spec.fname]) 41 agg[i].Distinct = spec.distinct 42 agg[i].ColIdx = spec.colIdx 43 agg[i].FilterColIdx = spec.filterColIdx 44 } 45 return agg 46 } 47 48 // TODO(irfansharif): Add tests to verify the following aggregation functions: 49 // AVG 50 // BOOL_AND 51 // BOOL_OR 52 // CONCAT_AGG 53 // JSON_AGG 54 // JSONB_AGG 55 // STDDEV 56 // VARIANCE 57 func TestAggregator(t *testing.T) { 58 defer leaktest.AfterTest(t)() 59 60 var ( 61 col0 = []uint32{0} 62 col1 = []uint32{1} 63 col2 = []uint32{2} 64 filterCol1 uint32 = 1 65 filterCol3 uint32 = 3 66 ) 67 68 testCases := []ProcessorTestCase{ 69 { 70 // SELECT min(@0), max(@0), count(@0), avg(@0), sum(@0), stddev(@0), 71 // variance(@0) GROUP BY [] (no rows). 72 Name: "MinMaxCountAvgSumStddevGroupByNoneNoRows", 73 Input: ProcessorTestCaseRows{ 74 Rows: [][]interface{}{}, 75 Types: sqlbase.MakeIntCols(1), 76 }, 77 Output: ProcessorTestCaseRows{ 78 Rows: [][]interface{}{ 79 {nil, nil, 0, nil, nil, nil, nil}, 80 }, 81 Types: []*types.T{types.Int, types.Int, types.Int, types.Decimal, types.Decimal, types.Decimal, types.Decimal}, 82 }, 83 ProcessorCore: execinfrapb.ProcessorCoreUnion{ 84 Aggregator: &execinfrapb.AggregatorSpec{ 85 Aggregations: aggregations([]aggTestSpec{ 86 {fname: "MIN", colIdx: col0}, 87 {fname: "MAX", colIdx: col0}, 88 {fname: "COUNT", colIdx: col0}, 89 {fname: "AVG", colIdx: col0}, 90 {fname: "SUM", colIdx: col0}, 91 {fname: "STDDEV", colIdx: col0}, 92 {fname: "VARIANCE", colIdx: col0}, 93 }), 94 }, 95 }, 96 }, 97 { 98 // SELECT @2, count(@1), GROUP BY @2. 99 Name: "CountGroupByWithNull", 100 Input: ProcessorTestCaseRows{ 101 Rows: [][]interface{}{ 102 {1, 2}, 103 {3, nil}, 104 {6, 2}, 105 {7, 2}, 106 {8, 4}, 107 }, 108 Types: sqlbase.MakeIntCols(2), 109 }, 110 Output: ProcessorTestCaseRows{ 111 Rows: [][]interface{}{ 112 {nil, 1}, 113 {4, 1}, 114 {2, 3}, 115 }, 116 Types: sqlbase.MakeIntCols(2), 117 }, 118 ProcessorCore: execinfrapb.ProcessorCoreUnion{ 119 Aggregator: &execinfrapb.AggregatorSpec{ 120 GroupCols: col1, 121 Aggregations: aggregations([]aggTestSpec{ 122 {fname: "ANY_NOT_NULL", colIdx: col1}, 123 {fname: "COUNT", colIdx: col0}, 124 }), 125 }, 126 }, 127 }, 128 { 129 // SELECT @2, count(@1), GROUP BY @2. 130 Name: "CountGroupBy", 131 Input: ProcessorTestCaseRows{ 132 Rows: [][]interface{}{ 133 {1, 2}, 134 {3, 4}, 135 {6, 2}, 136 {7, 2}, 137 {8, 4}, 138 }, 139 Types: sqlbase.MakeIntCols(2), 140 }, 141 Output: ProcessorTestCaseRows{ 142 Rows: [][]interface{}{ 143 {4, 2}, 144 {2, 3}, 145 }, 146 Types: sqlbase.MakeIntCols(2), 147 }, 148 ProcessorCore: execinfrapb.ProcessorCoreUnion{ 149 Aggregator: &execinfrapb.AggregatorSpec{ 150 GroupCols: col1, 151 Aggregations: aggregations([]aggTestSpec{ 152 {fname: "ANY_NOT_NULL", colIdx: col1}, 153 {fname: "COUNT", colIdx: col0}, 154 }), 155 }, 156 }, 157 }, 158 { 159 // SELECT @2, count(@1), GROUP BY @2 (ordering: @2+). 160 Name: "CountGroupByOrderBy", 161 Input: ProcessorTestCaseRows{ 162 Rows: [][]interface{}{ 163 {1, 2}, 164 {6, 2}, 165 {7, 2}, 166 {3, 4}, 167 {8, 4}, 168 }, 169 Types: sqlbase.MakeIntCols(2), 170 }, 171 Output: ProcessorTestCaseRows{ 172 Rows: [][]interface{}{ 173 {2, 3}, 174 {4, 2}, 175 }, 176 Types: sqlbase.MakeIntCols(2), 177 }, 178 DisableSort: true, 179 ProcessorCore: execinfrapb.ProcessorCoreUnion{ 180 Aggregator: &execinfrapb.AggregatorSpec{ 181 OrderedGroupCols: col1, 182 GroupCols: col1, 183 Aggregations: aggregations([]aggTestSpec{ 184 {fname: "ANY_NOT_NULL", colIdx: col1}, 185 {fname: "COUNT", colIdx: col0}, 186 }), 187 }, 188 }, 189 }, 190 { 191 // SELECT @2, sum(@1), GROUP BY @2. 192 Name: "SumGroupBy", 193 Input: ProcessorTestCaseRows{ 194 Rows: [][]interface{}{ 195 {1, 2}, 196 {3, 4}, 197 {6, 2}, 198 {7, 2}, 199 {8, 4}, 200 }, 201 Types: sqlbase.MakeIntCols(2), 202 }, 203 Output: ProcessorTestCaseRows{ 204 Rows: [][]interface{}{ 205 {2, 14}, 206 {4, 11}, 207 }, 208 Types: []*types.T{types.Int, types.Decimal}, 209 }, 210 ProcessorCore: execinfrapb.ProcessorCoreUnion{ 211 Aggregator: &execinfrapb.AggregatorSpec{ 212 GroupCols: col1, 213 Aggregations: aggregations([]aggTestSpec{ 214 {fname: "ANY_NOT_NULL", colIdx: col1}, 215 {fname: "SUM", colIdx: col0}, 216 }), 217 }, 218 }, 219 }, 220 { 221 // SELECT @2, sum(@1), GROUP BY @2 (ordering: @2+). 222 Name: "SumGroupByOrderBy", 223 Input: ProcessorTestCaseRows{ 224 Rows: [][]interface{}{ 225 {1, 2}, 226 {6, 2}, 227 {7, 2}, 228 {8, 4}, 229 {3, 4}, 230 }, 231 Types: sqlbase.MakeIntCols(2), 232 }, 233 Output: ProcessorTestCaseRows{ 234 Rows: [][]interface{}{ 235 {2, 14}, 236 {4, 11}, 237 }, 238 Types: []*types.T{types.Int, types.Decimal}, 239 }, 240 DisableSort: true, 241 ProcessorCore: execinfrapb.ProcessorCoreUnion{ 242 Aggregator: &execinfrapb.AggregatorSpec{ 243 GroupCols: col1, 244 OrderedGroupCols: col1, 245 Aggregations: aggregations([]aggTestSpec{ 246 {fname: "ANY_NOT_NULL", colIdx: col1}, 247 {fname: "SUM", colIdx: col0}, 248 }), 249 }, 250 }, 251 }, 252 { 253 // SELECT count(@1), sum(@1), GROUP BY [] (empty group key). 254 Name: "CountSumGroupByNone", 255 Input: ProcessorTestCaseRows{ 256 Rows: [][]interface{}{ 257 {1, 2}, 258 {1, 4}, 259 {3, 2}, 260 {4, 2}, 261 {5, 4}, 262 }, 263 Types: sqlbase.MakeIntCols(2), 264 }, 265 Output: ProcessorTestCaseRows{ 266 Rows: [][]interface{}{ 267 {5, 14}, 268 }, 269 Types: []*types.T{types.Int, types.Decimal}, 270 }, 271 ProcessorCore: execinfrapb.ProcessorCoreUnion{ 272 Aggregator: &execinfrapb.AggregatorSpec{ 273 Aggregations: aggregations([]aggTestSpec{ 274 {fname: "COUNT", colIdx: col0}, 275 {fname: "SUM", colIdx: col0}, 276 }), 277 }, 278 }, 279 }, 280 { 281 // SELECT SUM DISTINCT (@1), GROUP BY [] (empty group key). 282 Name: "SumdistinctGroupByNone", 283 Input: ProcessorTestCaseRows{ 284 Rows: [][]interface{}{ 285 {2}, 286 {4}, 287 {2}, 288 {2}, 289 {4}, 290 }, 291 Types: sqlbase.MakeIntCols(1), 292 }, 293 Output: ProcessorTestCaseRows{ 294 Rows: [][]interface{}{ 295 {6}, 296 }, 297 Types: sqlbase.MakeIntCols(1), 298 }, 299 ProcessorCore: execinfrapb.ProcessorCoreUnion{ 300 Aggregator: &execinfrapb.AggregatorSpec{ 301 Aggregations: aggregations([]aggTestSpec{ 302 {fname: "SUM", distinct: true, colIdx: col0}, 303 }), 304 }, 305 }, 306 }, 307 { 308 // SELECT (@1), GROUP BY [] (empty group key). 309 Name: "GroupByNone", 310 Input: ProcessorTestCaseRows{ 311 Rows: [][]interface{}{ 312 {1}, 313 {1}, 314 {1}, 315 }, 316 Types: sqlbase.MakeIntCols(1), 317 }, 318 Output: ProcessorTestCaseRows{ 319 Rows: [][]interface{}{ 320 {1}, 321 }, 322 Types: sqlbase.MakeIntCols(1), 323 }, 324 ProcessorCore: execinfrapb.ProcessorCoreUnion{ 325 Aggregator: &execinfrapb.AggregatorSpec{ 326 Aggregations: aggregations([]aggTestSpec{ 327 {fname: "ANY_NOT_NULL", colIdx: col0}, 328 }), 329 }, 330 }, 331 }, 332 { 333 Name: "MaxMinCountCountdistinctGroupByNone", 334 Input: ProcessorTestCaseRows{ 335 Rows: [][]interface{}{ 336 {2, 2}, 337 {1, 4}, 338 {3, 2}, 339 {4, 2}, 340 {5, 4}, 341 }, 342 Types: sqlbase.MakeIntCols(2), 343 }, 344 Output: ProcessorTestCaseRows{ 345 Rows: [][]interface{}{ 346 {5, 2, 5, 2}, 347 }, 348 Types: sqlbase.MakeIntCols(4), 349 }, 350 ProcessorCore: execinfrapb.ProcessorCoreUnion{ 351 Aggregator: &execinfrapb.AggregatorSpec{ 352 Aggregations: aggregations([]aggTestSpec{ 353 {fname: "MAX", colIdx: col0}, 354 {fname: "MIN", colIdx: col1}, 355 {fname: "COUNT", colIdx: col1}, 356 {fname: "COUNT", distinct: true, colIdx: col1}, 357 }), 358 }, 359 }, 360 }, 361 { 362 Name: "MaxfilterCountfilterCountrowsfilter", 363 Input: ProcessorTestCaseRows{ 364 Rows: [][]interface{}{ 365 {1, true, 1, true}, 366 {5, false, 1, false}, 367 {2, true, 1, nil}, 368 {3, nil, 1, true}, 369 {2, true, 1, true}, 370 }, 371 Types: []*types.T{types.Int, types.Bool, types.Int, types.Bool}, 372 }, 373 Output: ProcessorTestCaseRows{ 374 Rows: [][]interface{}{ 375 {2, 3, 3}, 376 }, 377 Types: sqlbase.MakeIntCols(3), 378 }, 379 ProcessorCore: execinfrapb.ProcessorCoreUnion{ 380 Aggregator: &execinfrapb.AggregatorSpec{ 381 Aggregations: aggregations([]aggTestSpec{ 382 {fname: "MAX", colIdx: col0, filterColIdx: &filterCol1}, 383 {fname: "COUNT", colIdx: col2, filterColIdx: &filterCol3}, 384 {fname: "COUNT_ROWS", filterColIdx: &filterCol3}, 385 }), 386 }, 387 }, 388 }, 389 } 390 391 ctx := context.Background() 392 test := MakeProcessorTest(DefaultProcessorTestConfig()) 393 test.RunTestCases(ctx, t, testCases) 394 test.Close(ctx) 395 } 396 397 func BenchmarkAggregation(b *testing.B) { 398 const numCols = 1 399 const numRows = 1000 400 401 aggFuncs := []execinfrapb.AggregatorSpec_Func{ 402 execinfrapb.AggregatorSpec_ANY_NOT_NULL, 403 execinfrapb.AggregatorSpec_AVG, 404 execinfrapb.AggregatorSpec_COUNT, 405 execinfrapb.AggregatorSpec_MAX, 406 execinfrapb.AggregatorSpec_MIN, 407 execinfrapb.AggregatorSpec_STDDEV, 408 execinfrapb.AggregatorSpec_SUM, 409 execinfrapb.AggregatorSpec_SUM_INT, 410 execinfrapb.AggregatorSpec_VARIANCE, 411 execinfrapb.AggregatorSpec_XOR_AGG, 412 } 413 414 ctx := context.Background() 415 st := cluster.MakeTestingClusterSettings() 416 evalCtx := tree.MakeTestingEvalContext(st) 417 defer evalCtx.Stop(ctx) 418 419 flowCtx := &execinfra.FlowCtx{ 420 Cfg: &execinfra.ServerConfig{Settings: st}, 421 EvalCtx: &evalCtx, 422 } 423 424 for _, aggFunc := range aggFuncs { 425 b.Run(aggFunc.String(), func(b *testing.B) { 426 spec := &execinfrapb.AggregatorSpec{ 427 Aggregations: []execinfrapb.AggregatorSpec_Aggregation{ 428 { 429 Func: aggFunc, 430 ColIdx: []uint32{0}, 431 }, 432 }, 433 } 434 post := &execinfrapb.PostProcessSpec{} 435 disposer := &rowDisposer{} 436 input := execinfra.NewRepeatableRowSource(sqlbase.OneIntCol, sqlbase.MakeIntRows(numRows, numCols)) 437 438 b.SetBytes(int64(8 * numRows * numCols)) 439 b.ResetTimer() 440 for i := 0; i < b.N; i++ { 441 d, err := newAggregator(flowCtx, 0 /* processorID */, spec, input, post, disposer) 442 if err != nil { 443 b.Fatal(err) 444 } 445 d.Run(context.Background()) 446 input.Reset() 447 } 448 b.StopTimer() 449 }) 450 } 451 } 452 453 func BenchmarkCountRows(b *testing.B) { 454 spec := &execinfrapb.AggregatorSpec{ 455 Aggregations: []execinfrapb.AggregatorSpec_Aggregation{ 456 { 457 Func: execinfrapb.AggregatorSpec_COUNT_ROWS, 458 }, 459 }, 460 } 461 post := &execinfrapb.PostProcessSpec{} 462 disposer := &rowDisposer{} 463 const numCols = 1 464 const numRows = 100000 465 input := execinfra.NewRepeatableRowSource(sqlbase.OneIntCol, sqlbase.MakeIntRows(numRows, numCols)) 466 467 ctx := context.Background() 468 st := cluster.MakeTestingClusterSettings() 469 evalCtx := tree.MakeTestingEvalContext(st) 470 defer evalCtx.Stop(ctx) 471 472 flowCtx := &execinfra.FlowCtx{ 473 Cfg: &execinfra.ServerConfig{Settings: st}, 474 EvalCtx: &evalCtx, 475 } 476 477 b.SetBytes(int64(8 * numRows * numCols)) 478 b.ResetTimer() 479 for i := 0; i < b.N; i++ { 480 d, err := newAggregator(flowCtx, 0 /* processorID */, spec, input, post, disposer) 481 if err != nil { 482 b.Fatal(err) 483 } 484 d.Run(context.Background()) 485 input.Reset() 486 } 487 } 488 489 func BenchmarkGrouping(b *testing.B) { 490 const numCols = 1 491 const numRows = 1000 492 493 ctx := context.Background() 494 st := cluster.MakeTestingClusterSettings() 495 evalCtx := tree.MakeTestingEvalContext(st) 496 defer evalCtx.Stop(ctx) 497 498 flowCtx := &execinfra.FlowCtx{ 499 Cfg: &execinfra.ServerConfig{Settings: st}, 500 EvalCtx: &evalCtx, 501 } 502 spec := &execinfrapb.AggregatorSpec{ 503 GroupCols: []uint32{0}, 504 } 505 post := &execinfrapb.PostProcessSpec{} 506 disposer := &rowDisposer{} 507 input := execinfra.NewRepeatableRowSource(sqlbase.OneIntCol, sqlbase.MakeIntRows(numRows, numCols)) 508 509 b.SetBytes(int64(8 * numRows * numCols)) 510 b.ResetTimer() 511 for i := 0; i < b.N; i++ { 512 d, err := newAggregator(flowCtx, 0 /* processorID */, spec, input, post, disposer) 513 if err != nil { 514 b.Fatal(err) 515 } 516 d.Run(context.Background()) 517 input.Reset() 518 } 519 b.StopTimer() 520 } 521 522 func benchmarkAggregationWithGrouping(b *testing.B, numOrderedCols int) { 523 const numCols = 3 524 const groupSize = 10 525 var groupedCols = [2]int{0, 1} 526 var allOrderedGroupCols = [2]uint32{0, 1} 527 528 aggFuncs := []execinfrapb.AggregatorSpec_Func{ 529 execinfrapb.AggregatorSpec_ANY_NOT_NULL, 530 execinfrapb.AggregatorSpec_AVG, 531 execinfrapb.AggregatorSpec_COUNT, 532 execinfrapb.AggregatorSpec_MAX, 533 execinfrapb.AggregatorSpec_MIN, 534 execinfrapb.AggregatorSpec_STDDEV, 535 execinfrapb.AggregatorSpec_SUM, 536 execinfrapb.AggregatorSpec_SUM_INT, 537 execinfrapb.AggregatorSpec_VARIANCE, 538 execinfrapb.AggregatorSpec_XOR_AGG, 539 } 540 541 ctx := context.Background() 542 st := cluster.MakeTestingClusterSettings() 543 evalCtx := tree.MakeTestingEvalContext(st) 544 defer evalCtx.Stop(ctx) 545 546 flowCtx := &execinfra.FlowCtx{ 547 Cfg: &execinfra.ServerConfig{Settings: st}, 548 EvalCtx: &evalCtx, 549 } 550 551 for _, aggFunc := range aggFuncs { 552 b.Run(aggFunc.String(), func(b *testing.B) { 553 spec := &execinfrapb.AggregatorSpec{ 554 GroupCols: []uint32{0, 1}, 555 Aggregations: []execinfrapb.AggregatorSpec_Aggregation{ 556 { 557 Func: aggFunc, 558 ColIdx: []uint32{2}, 559 }, 560 }, 561 } 562 spec.OrderedGroupCols = allOrderedGroupCols[:numOrderedCols] 563 post := &execinfrapb.PostProcessSpec{} 564 disposer := &rowDisposer{} 565 input := execinfra.NewRepeatableRowSource(sqlbase.ThreeIntCols, makeGroupedIntRows(groupSize, numCols, groupedCols[:])) 566 567 b.SetBytes(int64(8 * intPow(groupSize, len(groupedCols)+1) * numCols)) 568 b.ResetTimer() 569 for i := 0; i < b.N; i++ { 570 d, err := newAggregator(flowCtx, 0 /* processorID */, spec, input, post, disposer) 571 if err != nil { 572 b.Fatal(err) 573 } 574 d.Run(context.Background()) 575 input.Reset() 576 } 577 b.StopTimer() 578 }) 579 } 580 } 581 582 func BenchmarkOrderedAggregation(b *testing.B) { 583 benchmarkAggregationWithGrouping(b, 2 /* numOrderedCols */) 584 } 585 586 func BenchmarkPartiallyOrderedAggregation(b *testing.B) { 587 benchmarkAggregationWithGrouping(b, 1 /* numOrderedCols */) 588 } 589 590 func BenchmarkUnorderedAggregation(b *testing.B) { 591 benchmarkAggregationWithGrouping(b, 0 /* numOrderedCols */) 592 } 593 594 func intPow(a, b int) int { 595 return int(math.Pow(float64(a), float64(b))) 596 } 597 598 // makeGroupedIntRows constructs a (groupSize**(len(groupedCols)+1)) x numCols 599 // table, where columns in groupedCols are sorted in ascending order with column 600 // priority defined by their position in groupedCols. If used in an aggregation 601 // where groupedCols are the GROUP BY columns, each group will have a size of 602 // groupSize. To make the input more interesting for aggregation, group columns 603 // are repeated. 604 // 605 // Examples: 606 // makeGroupedIntRows(2, 2, []int{1, 0}) -> 607 // [0 0] 608 // [0 0] 609 // [1 0] 610 // [1 0] 611 // [0 1] 612 // [0 1] 613 // [1 1] 614 // [1 1] 615 func makeGroupedIntRows(groupSize, numCols int, groupedCols []int) sqlbase.EncDatumRows { 616 numRows := intPow(groupSize, len(groupedCols)+1) 617 rows := make(sqlbase.EncDatumRows, numRows) 618 619 groupColSet := util.MakeFastIntSet(groupedCols...) 620 getGroupedColVal := func(rowIdx, colIdx int) int { 621 rank := -1 622 for i, c := range groupedCols { 623 if colIdx == c { 624 rank = len(groupedCols) - i 625 break 626 } 627 } 628 if rank == -1 { 629 panic("provided colIdx is not a group column") 630 } 631 return (rowIdx % intPow(groupSize, rank+1)) / intPow(groupSize, rank) 632 } 633 634 for i := range rows { 635 rows[i] = make(sqlbase.EncDatumRow, numCols) 636 for j := 0; j < numCols; j++ { 637 if groupColSet.Contains(j) { 638 rows[i][j] = sqlbase.DatumToEncDatum( 639 types.Int, tree.NewDInt(tree.DInt(getGroupedColVal(i, j)))) 640 } else { 641 rows[i][j] = sqlbase.DatumToEncDatum(types.Int, tree.NewDInt(tree.DInt(i+j))) 642 } 643 } 644 } 645 return rows 646 }