github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/aggregators_test.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package colexec 12 13 import ( 14 "context" 15 "fmt" 16 "strings" 17 "testing" 18 19 "github.com/cockroachdb/apd" 20 "github.com/cockroachdb/cockroach/pkg/col/coldata" 21 "github.com/cockroachdb/cockroach/pkg/col/coldatatestutils" 22 "github.com/cockroachdb/cockroach/pkg/sql/colexecbase" 23 "github.com/cockroachdb/cockroach/pkg/sql/colmem" 24 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 25 "github.com/cockroachdb/cockroach/pkg/sql/types" 26 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 27 "github.com/cockroachdb/cockroach/pkg/util/randutil" 28 ) 29 30 var ( 31 defaultGroupCols = []uint32{0} 32 defaultAggCols = [][]uint32{{1}} 33 defaultAggFns = []execinfrapb.AggregatorSpec_Func{execinfrapb.AggregatorSpec_SUM} 34 defaultTyps = []*types.T{types.Int, types.Int} 35 ) 36 37 type aggregatorTestCase struct { 38 // typs, aggFns, groupCols, and aggCols will be set to their default 39 // values before running a test if nil. 40 typs []*types.T 41 aggFns []execinfrapb.AggregatorSpec_Func 42 groupCols []uint32 43 aggCols [][]uint32 44 input tuples 45 unorderedInput bool 46 expected tuples 47 // {output}BatchSize() if not 0 are passed in to NewOrderedAggregator to 48 // divide input/output batches. 49 batchSize int 50 outputBatchSize int 51 name string 52 53 // convToDecimal will convert any float64s to apd.Decimals. If a string is 54 // encountered, a best effort is made to convert that string to an 55 // apd.Decimal. 56 convToDecimal bool 57 } 58 59 // aggType is a helper struct that allows tests to test both the ordered and 60 // hash aggregators at the same time. 61 type aggType struct { 62 new func( 63 allocator *colmem.Allocator, 64 input colexecbase.Operator, 65 typs []*types.T, 66 aggFns []execinfrapb.AggregatorSpec_Func, 67 groupCols []uint32, 68 aggCols [][]uint32, 69 isScalar bool, 70 ) (colexecbase.Operator, error) 71 name string 72 } 73 74 var aggTypes = []aggType{ 75 { 76 // This is a wrapper around NewHashAggregator so its signature is compatible 77 // with orderedAggregator. 78 new: func( 79 allocator *colmem.Allocator, 80 input colexecbase.Operator, 81 typs []*types.T, 82 aggFns []execinfrapb.AggregatorSpec_Func, 83 groupCols []uint32, 84 aggCols [][]uint32, 85 _ bool, 86 ) (colexecbase.Operator, error) { 87 return NewHashAggregator( 88 allocator, input, typs, aggFns, groupCols, aggCols) 89 }, 90 name: "hash", 91 }, 92 { 93 new: NewOrderedAggregator, 94 name: "ordered", 95 }, 96 } 97 98 func (tc *aggregatorTestCase) init() error { 99 if tc.convToDecimal { 100 for _, tuples := range []tuples{tc.input, tc.expected} { 101 for _, tuple := range tuples { 102 for i, e := range tuple { 103 switch v := e.(type) { 104 case float64: 105 d := &apd.Decimal{} 106 d, err := d.SetFloat64(v) 107 if err != nil { 108 return err 109 } 110 tuple[i] = *d 111 case string: 112 d := &apd.Decimal{} 113 d, _, err := d.SetString(v) 114 if err != nil { 115 // If there was an error converting the string to decimal, just 116 // leave the datum as is. 117 continue 118 } 119 tuple[i] = *d 120 } 121 } 122 } 123 } 124 } 125 if tc.groupCols == nil { 126 tc.groupCols = defaultGroupCols 127 } 128 if tc.aggFns == nil { 129 tc.aggFns = defaultAggFns 130 } 131 if tc.aggCols == nil { 132 tc.aggCols = defaultAggCols 133 } 134 if tc.typs == nil { 135 tc.typs = defaultTyps 136 } 137 if tc.batchSize == 0 { 138 tc.batchSize = coldata.BatchSize() 139 } 140 if tc.outputBatchSize == 0 { 141 tc.outputBatchSize = coldata.BatchSize() 142 } 143 return nil 144 } 145 146 func TestAggregatorOneFunc(t *testing.T) { 147 defer leaktest.AfterTest(t)() 148 testCases := []aggregatorTestCase{ 149 { 150 input: tuples{ 151 {0, 1}, 152 }, 153 expected: tuples{ 154 {1}, 155 }, 156 name: "OneTuple", 157 outputBatchSize: 4, 158 }, 159 { 160 input: tuples{ 161 {0, 1}, 162 {0, 1}, 163 }, 164 expected: tuples{ 165 {2}, 166 }, 167 name: "OneGroup", 168 }, 169 { 170 input: tuples{ 171 {0, 1}, 172 {0, 0}, 173 {0, 1}, 174 {1, 4}, 175 {2, 5}, 176 }, 177 expected: tuples{ 178 {2}, 179 {4}, 180 {5}, 181 }, 182 batchSize: 2, 183 name: "MultiGroup", 184 }, 185 { 186 input: tuples{ 187 {0, 1}, 188 {0, 2}, 189 {0, 3}, 190 {1, 4}, 191 {1, 5}, 192 }, 193 expected: tuples{ 194 {6}, 195 {9}, 196 }, 197 batchSize: 1, 198 name: "CarryBetweenInputBatches", 199 }, 200 { 201 input: tuples{ 202 {0, 1}, 203 {0, 2}, 204 {0, 3}, 205 {0, 4}, 206 {1, 5}, 207 {2, 6}, 208 }, 209 expected: tuples{ 210 {10}, 211 {5}, 212 {6}, 213 }, 214 batchSize: 2, 215 outputBatchSize: 1, 216 name: "CarryBetweenOutputBatches", 217 }, 218 { 219 input: tuples{ 220 {0, 1}, 221 {0, 1}, 222 {1, 2}, 223 {2, 3}, 224 {2, 3}, 225 {3, 4}, 226 {3, 4}, 227 {4, 5}, 228 {5, 6}, 229 {6, 7}, 230 {7, 8}, 231 }, 232 expected: tuples{ 233 {2}, 234 {2}, 235 {6}, 236 {8}, 237 {5}, 238 {6}, 239 {7}, 240 {8}, 241 }, 242 batchSize: 3, 243 outputBatchSize: 1, 244 name: "CarryBetweenInputAndOutputBatches", 245 }, 246 { 247 input: tuples{ 248 {0, 1}, 249 {0, 2}, 250 {0, 3}, 251 {0, 4}, 252 }, 253 expected: tuples{ 254 {10}, 255 }, 256 batchSize: 1, 257 outputBatchSize: 1, 258 name: "NoGroupingCols", 259 groupCols: []uint32{}, 260 }, 261 { 262 input: tuples{ 263 {1, 0, 0}, 264 {2, 0, 0}, 265 {3, 0, 0}, 266 {4, 0, 0}, 267 }, 268 expected: tuples{ 269 {10}, 270 }, 271 batchSize: 1, 272 outputBatchSize: 1, 273 name: "UnusedInputColumns", 274 typs: []*types.T{types.Int, types.Int, types.Int}, 275 groupCols: []uint32{1, 2}, 276 aggCols: [][]uint32{{0}}, 277 }, 278 { 279 input: tuples{ 280 {nil, 1}, 281 {4, 42}, 282 {nil, 2}, 283 }, 284 expected: tuples{ 285 {3}, 286 {42}, 287 }, 288 name: "UnorderedWithNullsInGroupingCol", 289 unorderedInput: true, 290 }, 291 } 292 293 // Run tests with deliberate batch sizes and no selection vectors. 294 for _, tc := range testCases { 295 t.Run(tc.name, func(t *testing.T) { 296 if err := tc.init(); err != nil { 297 t.Fatal(err) 298 } 299 300 if !tc.unorderedInput { 301 tupleSource := newOpTestInput(tc.batchSize, tc.input, nil /* typs */) 302 a, err := NewOrderedAggregator( 303 testAllocator, 304 tupleSource, 305 tc.typs, 306 tc.aggFns, 307 tc.groupCols, 308 tc.aggCols, 309 false, /* isScalar */ 310 ) 311 if err != nil { 312 t.Fatal(err) 313 } 314 315 out := newOpTestOutput(a, tc.expected) 316 // Explicitly reinitialize the aggregator with the given output batch 317 // size. 318 a.(*orderedAggregator).initWithInputAndOutputBatchSize(tc.batchSize, tc.outputBatchSize) 319 if err := out.VerifyAnyOrder(); err != nil { 320 t.Fatal(err) 321 } 322 } 323 324 // Run randomized tests on this test case. 325 t.Run(fmt.Sprintf("Randomized"), func(t *testing.T) { 326 for _, agg := range aggTypes { 327 if tc.unorderedInput && agg.name == "ordered" { 328 // This test case has unordered input, so we skip ordered 329 // aggregator. 330 continue 331 } 332 t.Run(agg.name, func(t *testing.T) { 333 runTests(t, []tuples{tc.input}, tc.expected, unorderedVerifier, 334 func(input []colexecbase.Operator) (colexecbase.Operator, error) { 335 return agg.new( 336 testAllocator, 337 input[0], 338 tc.typs, 339 tc.aggFns, 340 tc.groupCols, 341 tc.aggCols, 342 false, /* isScalar */ 343 ) 344 }) 345 }) 346 } 347 }) 348 }) 349 } 350 } 351 352 func TestAggregatorMultiFunc(t *testing.T) { 353 defer leaktest.AfterTest(t)() 354 testCases := []aggregatorTestCase{ 355 { 356 aggFns: []execinfrapb.AggregatorSpec_Func{execinfrapb.AggregatorSpec_SUM, execinfrapb.AggregatorSpec_SUM}, 357 aggCols: [][]uint32{ 358 {2}, {1}, 359 }, 360 input: tuples{ 361 {0, 1, 2}, 362 {0, 1, 2}, 363 }, 364 typs: []*types.T{types.Int, types.Int, types.Int}, 365 expected: tuples{ 366 {4, 2}, 367 }, 368 name: "OutputOrder", 369 }, 370 { 371 aggFns: []execinfrapb.AggregatorSpec_Func{execinfrapb.AggregatorSpec_SUM, execinfrapb.AggregatorSpec_SUM}, 372 aggCols: [][]uint32{ 373 {2}, {1}, 374 }, 375 input: tuples{ 376 {0, 1, 1.3}, 377 {0, 1, 1.6}, 378 {0, 1, 0.5}, 379 {1, 1, 1.2}, 380 }, 381 typs: []*types.T{types.Int, types.Int, types.Decimal}, 382 expected: tuples{ 383 {3.4, 3}, 384 {1.2, 1}, 385 }, 386 name: "SumMultiType", 387 convToDecimal: true, 388 }, 389 { 390 aggFns: []execinfrapb.AggregatorSpec_Func{execinfrapb.AggregatorSpec_AVG, execinfrapb.AggregatorSpec_SUM}, 391 aggCols: [][]uint32{ 392 {1}, {1}, 393 }, 394 input: tuples{ 395 {0, 1.1}, 396 {0, 1.2}, 397 {0, 2.3}, 398 {1, 6.21}, 399 {1, 2.43}, 400 }, 401 typs: []*types.T{types.Int, types.Decimal}, 402 expected: tuples{ 403 {"1.5333333333333333333", 4.6}, 404 {4.32, 8.64}, 405 }, 406 name: "AvgSumSingleInputBatch", 407 convToDecimal: true, 408 }, 409 { 410 aggFns: []execinfrapb.AggregatorSpec_Func{ 411 execinfrapb.AggregatorSpec_BOOL_AND, 412 execinfrapb.AggregatorSpec_BOOL_OR, 413 }, 414 aggCols: [][]uint32{ 415 {1}, {1}, 416 }, 417 input: tuples{ 418 {0, true}, 419 {1, false}, 420 {2, true}, 421 {2, false}, 422 {3, true}, 423 {3, true}, 424 {4, false}, 425 {4, false}, 426 {5, false}, 427 {5, nil}, 428 {6, nil}, 429 {6, true}, 430 {7, nil}, 431 {7, false}, 432 {7, true}, 433 {8, nil}, 434 {8, nil}, 435 }, 436 typs: []*types.T{types.Int, types.Bool}, 437 expected: tuples{ 438 {true, true}, 439 {false, false}, 440 {false, true}, 441 {true, true}, 442 {false, false}, 443 {false, false}, 444 {true, true}, 445 {false, true}, 446 {nil, nil}, 447 }, 448 name: "BoolAndOrBatch", 449 }, 450 { 451 aggFns: []execinfrapb.AggregatorSpec_Func{ 452 execinfrapb.AggregatorSpec_ANY_NOT_NULL, 453 execinfrapb.AggregatorSpec_ANY_NOT_NULL, 454 execinfrapb.AggregatorSpec_ANY_NOT_NULL, 455 execinfrapb.AggregatorSpec_MIN, 456 execinfrapb.AggregatorSpec_SUM_INT, 457 }, 458 input: tuples{ 459 {2, 1.0, "1.0", 2.0}, 460 {2, 1.0, "1.0", 4.0}, 461 {2, 2.0, "2.0", 6.0}, 462 }, 463 expected: tuples{ 464 {2, 1.0, "1.0", 2.0, 6.0}, 465 {2, 2.0, "2.0", 6.0, 6.0}, 466 }, 467 batchSize: 1, 468 typs: []*types.T{types.Int, types.Decimal, types.Bytes, types.Decimal}, 469 name: "MultiGroupColsWithPointerTypes", 470 groupCols: []uint32{0, 1, 2}, 471 aggCols: [][]uint32{ 472 {0}, {1}, {2}, {3}, {3}, 473 }, 474 }, 475 { 476 aggFns: []execinfrapb.AggregatorSpec_Func{ 477 execinfrapb.AggregatorSpec_ANY_NOT_NULL, 478 execinfrapb.AggregatorSpec_SUM_INT, 479 }, 480 input: tuples{ 481 {`{"id": null}`, -1}, 482 {`{"id": 0, "data": "s1"}`, 1}, 483 {`{"id": 0, "data": "s1"}`, 2}, 484 {`{"id": 1, "data": "s2"}`, 10}, 485 {`{"id": 1, "data": "s2"}`, 11}, 486 {`{"id": 2, "data": "s3"}`, 100}, 487 {`{"id": 2, "data": "s3"}`, 101}, 488 {`{"id": 2, "data": "s4"}`, 102}, 489 }, 490 expected: tuples{ 491 {`{"id": null}`, -1}, 492 {`{"id": 0, "data": "s1"}`, 3}, 493 {`{"id": 1, "data": "s2"}`, 21}, 494 {`{"id": 2, "data": "s3"}`, 201}, 495 {`{"id": 2, "data": "s4"}`, 102}, 496 }, 497 typs: []*types.T{types.Jsonb, types.Int}, 498 name: "GroupOnJsonColumns", 499 groupCols: []uint32{0}, 500 aggCols: [][]uint32{ 501 {0}, {1}, 502 }, 503 }, 504 } 505 506 for _, agg := range aggTypes { 507 for _, tc := range testCases { 508 t.Run(fmt.Sprintf("%s/%s/Randomized", agg.name, tc.name), func(t *testing.T) { 509 if err := tc.init(); err != nil { 510 t.Fatal(err) 511 } 512 runTestsWithTyps(t, []tuples{tc.input}, [][]*types.T{tc.typs}, tc.expected, unorderedVerifier, 513 func(input []colexecbase.Operator) (colexecbase.Operator, error) { 514 return agg.new(testAllocator, input[0], tc.typs, tc.aggFns, tc.groupCols, tc.aggCols, false /* isScalar */) 515 }) 516 }) 517 } 518 } 519 } 520 521 func TestAggregatorAllFunctions(t *testing.T) { 522 defer leaktest.AfterTest(t)() 523 testCases := []aggregatorTestCase{ 524 { 525 aggFns: []execinfrapb.AggregatorSpec_Func{ 526 execinfrapb.AggregatorSpec_ANY_NOT_NULL, 527 execinfrapb.AggregatorSpec_ANY_NOT_NULL, 528 execinfrapb.AggregatorSpec_AVG, 529 execinfrapb.AggregatorSpec_COUNT_ROWS, 530 execinfrapb.AggregatorSpec_COUNT, 531 execinfrapb.AggregatorSpec_SUM, 532 execinfrapb.AggregatorSpec_MIN, 533 execinfrapb.AggregatorSpec_MAX, 534 execinfrapb.AggregatorSpec_BOOL_AND, 535 execinfrapb.AggregatorSpec_BOOL_OR, 536 }, 537 aggCols: [][]uint32{{0}, {4}, {1}, {}, {1}, {2}, {2}, {2}, {3}, {3}}, 538 typs: []*types.T{types.Int, types.Decimal, types.Int, types.Bool, types.Bytes}, 539 input: tuples{ 540 {0, 3.1, 2, true, "zero"}, 541 {0, 1.1, 3, false, "zero"}, 542 {1, 1.1, 1, false, "one"}, 543 {1, 4.1, 0, false, "one"}, 544 {2, 1.1, 1, true, "two"}, 545 {3, 4.1, 0, false, "three"}, 546 {3, 5.1, 0, true, "three"}, 547 }, 548 expected: tuples{ 549 {0, "zero", 2.1, 2, 2, 5, 2, 3, false, true}, 550 {1, "one", 2.6, 2, 2, 1, 0, 1, false, false}, 551 {2, "two", 1.1, 1, 1, 1, 1, 1, true, true}, 552 {3, "three", 4.6, 2, 2, 0, 0, 0, false, true}, 553 }, 554 convToDecimal: true, 555 }, 556 557 // Test case for null handling. 558 { 559 aggFns: []execinfrapb.AggregatorSpec_Func{ 560 execinfrapb.AggregatorSpec_ANY_NOT_NULL, 561 execinfrapb.AggregatorSpec_ANY_NOT_NULL, 562 execinfrapb.AggregatorSpec_COUNT_ROWS, 563 execinfrapb.AggregatorSpec_COUNT, 564 execinfrapb.AggregatorSpec_SUM, 565 execinfrapb.AggregatorSpec_SUM_INT, 566 execinfrapb.AggregatorSpec_MIN, 567 execinfrapb.AggregatorSpec_MAX, 568 execinfrapb.AggregatorSpec_AVG, 569 execinfrapb.AggregatorSpec_BOOL_AND, 570 execinfrapb.AggregatorSpec_BOOL_OR, 571 }, 572 aggCols: [][]uint32{{0}, {1}, {}, {1}, {1}, {2}, {2}, {2}, {1}, {3}, {3}}, 573 typs: []*types.T{types.Int, types.Decimal, types.Int, types.Bool}, 574 input: tuples{ 575 {nil, 1.1, 4, true}, 576 {0, nil, nil, nil}, 577 {0, 3.1, 5, nil}, 578 {1, nil, nil, nil}, 579 {1, nil, nil, false}, 580 }, 581 expected: tuples{ 582 {nil, 1.1, 1, 1, 1.1, 4, 4, 4, 1.1, true, true}, 583 {0, 3.1, 2, 1, 3.1, 5, 5, 5, 3.1, nil, nil}, 584 {1, nil, 2, 0, nil, nil, nil, nil, nil, false, false}, 585 }, 586 convToDecimal: true, 587 }, 588 } 589 590 for _, agg := range aggTypes { 591 for i, tc := range testCases { 592 t.Run(fmt.Sprintf("%s/%d", agg.name, i), func(t *testing.T) { 593 if err := tc.init(); err != nil { 594 t.Fatal(err) 595 } 596 verifier := orderedVerifier 597 if strings.Contains(agg.name, "hash") { 598 verifier = unorderedVerifier 599 } 600 runTests( 601 t, 602 []tuples{tc.input}, 603 tc.expected, 604 verifier, 605 func(input []colexecbase.Operator) (colexecbase.Operator, error) { 606 return agg.new(testAllocator, input[0], tc.typs, tc.aggFns, tc.groupCols, tc.aggCols, false /* isScalar */) 607 }) 608 }) 609 } 610 } 611 } 612 613 func TestAggregatorRandom(t *testing.T) { 614 defer leaktest.AfterTest(t)() 615 616 // This test aggregates random inputs, keeping track of the expected results 617 // to make sure the aggregations are correct. 618 rng, _ := randutil.NewPseudoRand() 619 for _, groupSize := range []int{1, 2, coldata.BatchSize() / 4, coldata.BatchSize() / 2} { 620 if groupSize == 0 { 621 // We might be varying coldata.BatchSize() so that when it is divided by 622 // 4, groupSize is 0. We want to skip such configuration. 623 continue 624 } 625 for _, numInputBatches := range []int{1, 2, 64} { 626 for _, hasNulls := range []bool{true, false} { 627 for _, agg := range aggTypes { 628 t.Run(fmt.Sprintf("%s/groupSize=%d/numInputBatches=%d/hasNulls=%t", agg.name, groupSize, numInputBatches, hasNulls), 629 func(t *testing.T) { 630 nTuples := coldata.BatchSize() * numInputBatches 631 typs := []*types.T{types.Int, types.Float} 632 cols := []coldata.Vec{ 633 testAllocator.NewMemColumn(typs[0], nTuples), 634 testAllocator.NewMemColumn(typs[1], nTuples), 635 } 636 groups, aggCol, aggColNulls := cols[0].Int64(), cols[1].Float64(), cols[1].Nulls() 637 expectedTuples := tuples{} 638 639 var expRowCounts, expCounts []int64 640 var expSums, expMins, expMaxs []float64 641 // SUM, MIN, MAX, and AVG aggregators can output null. 642 var expNulls []bool 643 curGroup := -1 644 for i := range groups { 645 if i%groupSize == 0 { 646 if curGroup != -1 { 647 if expNulls[curGroup] { 648 expectedTuples = append(expectedTuples, tuple{ 649 expRowCounts[curGroup], expCounts[curGroup], nil, nil, nil, nil, 650 }) 651 } else { 652 expectedTuples = append(expectedTuples, tuple{ 653 expRowCounts[curGroup], expCounts[curGroup], expSums[curGroup], expMins[curGroup], expMaxs[curGroup], expSums[curGroup] / float64(expCounts[curGroup]), 654 }) 655 } 656 } 657 expRowCounts = append(expRowCounts, 0) 658 expCounts = append(expCounts, 0) 659 expSums = append(expSums, 0) 660 expMins = append(expMins, 2048) 661 expMaxs = append(expMaxs, -2048) 662 expNulls = append(expNulls, true) 663 curGroup++ 664 } 665 // Keep the inputs small so they are a realistic size. Using a 666 // large range is not realistic and makes decimal operations 667 // slower. 668 aggCol[i] = 2048 * (rng.Float64() - 0.5) 669 670 // NULL values contribute to the row count, so we're updating 671 // the row counts outside of the if block. 672 expRowCounts[curGroup]++ 673 if hasNulls && rng.Float64() < nullProbability { 674 aggColNulls.SetNull(i) 675 } else { 676 expNulls[curGroup] = false 677 expCounts[curGroup]++ 678 expSums[curGroup] += aggCol[i] 679 expMins[curGroup] = min64(aggCol[i], expMins[curGroup]) 680 expMaxs[curGroup] = max64(aggCol[i], expMaxs[curGroup]) 681 } 682 groups[i] = int64(curGroup) 683 } 684 // Add result for last group. 685 if expNulls[curGroup] { 686 expectedTuples = append(expectedTuples, tuple{ 687 expRowCounts[curGroup], expCounts[curGroup], nil, nil, nil, nil, 688 }) 689 } else { 690 expectedTuples = append(expectedTuples, tuple{ 691 expRowCounts[curGroup], expCounts[curGroup], expSums[curGroup], expMins[curGroup], expMaxs[curGroup], expSums[curGroup] / float64(expCounts[curGroup]), 692 }) 693 } 694 695 source := newChunkingBatchSource(typs, cols, nTuples) 696 a, err := agg.new( 697 testAllocator, 698 source, 699 typs, 700 []execinfrapb.AggregatorSpec_Func{ 701 execinfrapb.AggregatorSpec_COUNT_ROWS, 702 execinfrapb.AggregatorSpec_COUNT, 703 execinfrapb.AggregatorSpec_SUM_INT, 704 execinfrapb.AggregatorSpec_MIN, 705 execinfrapb.AggregatorSpec_MAX, 706 execinfrapb.AggregatorSpec_AVG}, 707 []uint32{0}, 708 [][]uint32{{}, {1}, {1}, {1}, {1}, {1}}, 709 false, /* isScalar */ 710 ) 711 if err != nil { 712 t.Fatal(err) 713 } 714 a.Init() 715 716 testOutput := newOpTestOutput(a, expectedTuples) 717 if strings.Contains(agg.name, "hash") { 718 err = testOutput.VerifyAnyOrder() 719 } else { 720 err = testOutput.Verify() 721 } 722 723 if err != nil { 724 t.Fatal(err) 725 } 726 }) 727 } 728 } 729 } 730 } 731 } 732 733 func BenchmarkAggregator(b *testing.B) { 734 rng, _ := randutil.NewPseudoRand() 735 ctx := context.Background() 736 737 const bytesFixedLength = 8 738 for _, aggFn := range []execinfrapb.AggregatorSpec_Func{ 739 execinfrapb.AggregatorSpec_ANY_NOT_NULL, 740 execinfrapb.AggregatorSpec_AVG, 741 execinfrapb.AggregatorSpec_COUNT_ROWS, 742 execinfrapb.AggregatorSpec_COUNT, 743 execinfrapb.AggregatorSpec_SUM, 744 execinfrapb.AggregatorSpec_MIN, 745 execinfrapb.AggregatorSpec_MAX, 746 execinfrapb.AggregatorSpec_BOOL_AND, 747 execinfrapb.AggregatorSpec_BOOL_OR, 748 } { 749 fName := execinfrapb.AggregatorSpec_Func_name[int32(aggFn)] 750 b.Run(fName, func(b *testing.B) { 751 for _, agg := range aggTypes { 752 for typIdx, typ := range []*types.T{types.Int, types.Decimal, types.Bytes} { 753 for _, groupSize := range []int{1, 2, coldata.BatchSize() / 2, coldata.BatchSize()} { 754 for _, hasNulls := range []bool{false, true} { 755 for _, numInputBatches := range []int{64} { 756 if aggFn == execinfrapb.AggregatorSpec_BOOL_AND || aggFn == execinfrapb.AggregatorSpec_BOOL_OR { 757 typ = types.Bool 758 if typIdx > 0 { 759 // We don't need to run the benchmark of bool_and and 760 // bool_or multiple times, so we skip all runs except 761 // for the first one. 762 continue 763 } 764 } 765 b.Run(fmt.Sprintf("%s/%s/groupSize=%d/hasNulls=%t/numInputBatches=%d", agg.name, typ.String(), 766 groupSize, hasNulls, numInputBatches), 767 func(b *testing.B) { 768 typs := []*types.T{types.Int, typ} 769 nTuples := numInputBatches * coldata.BatchSize() 770 cols := []coldata.Vec{ 771 testAllocator.NewMemColumn(types.Int, nTuples), 772 testAllocator.NewMemColumn(typ, nTuples), 773 } 774 groups := cols[0].Int64() 775 curGroup := -1 776 for i := 0; i < nTuples; i++ { 777 if groupSize == 1 || i%groupSize == 0 { 778 curGroup++ 779 } 780 groups[i] = int64(curGroup) 781 } 782 nullProb := 0.0 783 if hasNulls { 784 nullProb = nullProbability 785 } 786 coldatatestutils.RandomVec(coldatatestutils.RandomVecArgs{ 787 Rand: rng, 788 Vec: cols[1], 789 N: nTuples, 790 NullProbability: nullProb, 791 BytesFixedLength: bytesFixedLength, 792 }) 793 if typ.Identical(types.Int) && aggFn == execinfrapb.AggregatorSpec_SUM { 794 // Summation of random Int64 values can lead to 795 // overflow, and we will panic. To go around it, we 796 // restrict the range of values. 797 vals := cols[1].Int64() 798 for i := range vals { 799 vals[i] = vals[i] % 1024 800 } 801 } 802 source := newChunkingBatchSource(typs, cols, nTuples) 803 804 nCols := 1 805 if aggFn == execinfrapb.AggregatorSpec_COUNT_ROWS { 806 nCols = 0 807 } 808 a, err := agg.new( 809 testAllocator, 810 source, 811 typs, 812 []execinfrapb.AggregatorSpec_Func{aggFn}, 813 []uint32{0}, 814 [][]uint32{[]uint32{1}[:nCols]}, 815 false, /* isScalar */ 816 ) 817 if err != nil { 818 b.Skip() 819 } 820 a.Init() 821 822 b.ResetTimer() 823 824 // Only count the int64 column. 825 b.SetBytes(int64(8 * nTuples)) 826 for i := 0; i < b.N; i++ { 827 a.(resetter).reset(ctx) 828 source.reset() 829 // Exhaust aggregator until all batches have been read. 830 for b := a.Next(ctx); b.Length() != 0; b = a.Next(ctx) { 831 } 832 } 833 }, 834 ) 835 } 836 } 837 } 838 } 839 } 840 }) 841 } 842 } 843 844 func TestHashAggregator(t *testing.T) { 845 defer leaktest.AfterTest(t)() 846 tcs := []aggregatorTestCase{ 847 { 848 // Test carry between output batches. 849 input: tuples{ 850 {0, 1}, 851 {1, 5}, 852 {0, 4}, 853 {0, 2}, 854 {2, 6}, 855 {0, 3}, 856 {0, 7}, 857 }, 858 typs: []*types.T{types.Int, types.Int}, 859 groupCols: []uint32{0}, 860 aggCols: [][]uint32{{1}}, 861 862 expected: tuples{ 863 {5}, 864 {6}, 865 {17}, 866 }, 867 868 name: "carryBetweenBatches", 869 }, 870 { 871 // Test a single row input source. 872 input: tuples{ 873 {5}, 874 }, 875 typs: []*types.T{types.Int}, 876 groupCols: []uint32{0}, 877 aggCols: [][]uint32{{0}}, 878 879 expected: tuples{ 880 {5}, 881 }, 882 883 name: "singleRowInput", 884 }, 885 { 886 // Test bucket collisions. 887 input: tuples{ 888 {0, 3}, 889 {0, 4}, 890 {hashTableNumBuckets, 6}, 891 {0, 5}, 892 {hashTableNumBuckets, 7}, 893 }, 894 typs: []*types.T{types.Int, types.Int}, 895 groupCols: []uint32{0}, 896 aggCols: [][]uint32{{1}}, 897 898 expected: tuples{ 899 {12}, 900 {13}, 901 }, 902 903 name: "bucketCollision", 904 }, 905 { 906 input: tuples{ 907 {0, 1, 1.3}, 908 {0, 1, 1.6}, 909 {0, 1, 0.5}, 910 {1, 1, 1.2}, 911 }, 912 typs: []*types.T{types.Int, types.Int, types.Decimal}, 913 convToDecimal: true, 914 915 aggFns: []execinfrapb.AggregatorSpec_Func{execinfrapb.AggregatorSpec_SUM, execinfrapb.AggregatorSpec_SUM}, 916 groupCols: []uint32{0, 1}, 917 aggCols: [][]uint32{ 918 {2}, {1}, 919 }, 920 921 expected: tuples{ 922 {3.4, 3}, 923 {1.2, 1}, 924 }, 925 926 name: "decimalSums", 927 }, 928 { 929 // Test unused input columns. 930 input: tuples{ 931 {0, 1, 2, 3}, 932 {0, 1, 4, 5}, 933 {1, 1, 3, 7}, 934 {1, 2, 4, 9}, 935 {0, 1, 6, 11}, 936 {1, 2, 6, 13}, 937 }, 938 typs: []*types.T{types.Int, types.Int, types.Int, types.Int}, 939 groupCols: []uint32{0, 1}, 940 aggCols: [][]uint32{{3}}, 941 942 expected: tuples{ 943 {7}, 944 {19}, 945 {22}, 946 }, 947 948 name: "unusedInputCol", 949 }, 950 } 951 952 for _, numOfHashBuckets := range []int{0 /* no limit */, 1, coldata.BatchSize()} { 953 for _, tc := range tcs { 954 if err := tc.init(); err != nil { 955 t.Fatal(err) 956 } 957 t.Run(fmt.Sprintf("numOfHashBuckets=%d", numOfHashBuckets), func(t *testing.T) { 958 runTests(t, []tuples{tc.input}, tc.expected, unorderedVerifier, func(sources []colexecbase.Operator) (colexecbase.Operator, error) { 959 a, err := NewHashAggregator(testAllocator, sources[0], tc.typs, tc.aggFns, tc.groupCols, tc.aggCols) 960 a.(*hashAggregator).testingKnobs.numOfHashBuckets = uint64(numOfHashBuckets) 961 return a, err 962 }) 963 }) 964 } 965 } 966 } 967 968 func min64(a, b float64) float64 { 969 if a < b { 970 return a 971 } 972 return b 973 } 974 975 func max64(a, b float64) float64 { 976 if a > b { 977 return a 978 } 979 return b 980 }