github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/aggregators_test.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package colexec
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"strings"
    17  	"testing"
    18  
    19  	"github.com/cockroachdb/apd"
    20  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    21  	"github.com/cockroachdb/cockroach/pkg/col/coldatatestutils"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/colmem"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    26  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    27  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    28  )
    29  
    30  var (
    31  	defaultGroupCols = []uint32{0}
    32  	defaultAggCols   = [][]uint32{{1}}
    33  	defaultAggFns    = []execinfrapb.AggregatorSpec_Func{execinfrapb.AggregatorSpec_SUM}
    34  	defaultTyps      = []*types.T{types.Int, types.Int}
    35  )
    36  
    37  type aggregatorTestCase struct {
    38  	// typs, aggFns, groupCols, and aggCols will be set to their default
    39  	// values before running a test if nil.
    40  	typs           []*types.T
    41  	aggFns         []execinfrapb.AggregatorSpec_Func
    42  	groupCols      []uint32
    43  	aggCols        [][]uint32
    44  	input          tuples
    45  	unorderedInput bool
    46  	expected       tuples
    47  	// {output}BatchSize() if not 0 are passed in to NewOrderedAggregator to
    48  	// divide input/output batches.
    49  	batchSize       int
    50  	outputBatchSize int
    51  	name            string
    52  
    53  	// convToDecimal will convert any float64s to apd.Decimals. If a string is
    54  	// encountered, a best effort is made to convert that string to an
    55  	// apd.Decimal.
    56  	convToDecimal bool
    57  }
    58  
    59  // aggType is a helper struct that allows tests to test both the ordered and
    60  // hash aggregators at the same time.
    61  type aggType struct {
    62  	new func(
    63  		allocator *colmem.Allocator,
    64  		input colexecbase.Operator,
    65  		typs []*types.T,
    66  		aggFns []execinfrapb.AggregatorSpec_Func,
    67  		groupCols []uint32,
    68  		aggCols [][]uint32,
    69  		isScalar bool,
    70  	) (colexecbase.Operator, error)
    71  	name string
    72  }
    73  
    74  var aggTypes = []aggType{
    75  	{
    76  		// This is a wrapper around NewHashAggregator so its signature is compatible
    77  		// with orderedAggregator.
    78  		new: func(
    79  			allocator *colmem.Allocator,
    80  			input colexecbase.Operator,
    81  			typs []*types.T,
    82  			aggFns []execinfrapb.AggregatorSpec_Func,
    83  			groupCols []uint32,
    84  			aggCols [][]uint32,
    85  			_ bool,
    86  		) (colexecbase.Operator, error) {
    87  			return NewHashAggregator(
    88  				allocator, input, typs, aggFns, groupCols, aggCols)
    89  		},
    90  		name: "hash",
    91  	},
    92  	{
    93  		new:  NewOrderedAggregator,
    94  		name: "ordered",
    95  	},
    96  }
    97  
    98  func (tc *aggregatorTestCase) init() error {
    99  	if tc.convToDecimal {
   100  		for _, tuples := range []tuples{tc.input, tc.expected} {
   101  			for _, tuple := range tuples {
   102  				for i, e := range tuple {
   103  					switch v := e.(type) {
   104  					case float64:
   105  						d := &apd.Decimal{}
   106  						d, err := d.SetFloat64(v)
   107  						if err != nil {
   108  							return err
   109  						}
   110  						tuple[i] = *d
   111  					case string:
   112  						d := &apd.Decimal{}
   113  						d, _, err := d.SetString(v)
   114  						if err != nil {
   115  							// If there was an error converting the string to decimal, just
   116  							// leave the datum as is.
   117  							continue
   118  						}
   119  						tuple[i] = *d
   120  					}
   121  				}
   122  			}
   123  		}
   124  	}
   125  	if tc.groupCols == nil {
   126  		tc.groupCols = defaultGroupCols
   127  	}
   128  	if tc.aggFns == nil {
   129  		tc.aggFns = defaultAggFns
   130  	}
   131  	if tc.aggCols == nil {
   132  		tc.aggCols = defaultAggCols
   133  	}
   134  	if tc.typs == nil {
   135  		tc.typs = defaultTyps
   136  	}
   137  	if tc.batchSize == 0 {
   138  		tc.batchSize = coldata.BatchSize()
   139  	}
   140  	if tc.outputBatchSize == 0 {
   141  		tc.outputBatchSize = coldata.BatchSize()
   142  	}
   143  	return nil
   144  }
   145  
   146  func TestAggregatorOneFunc(t *testing.T) {
   147  	defer leaktest.AfterTest(t)()
   148  	testCases := []aggregatorTestCase{
   149  		{
   150  			input: tuples{
   151  				{0, 1},
   152  			},
   153  			expected: tuples{
   154  				{1},
   155  			},
   156  			name:            "OneTuple",
   157  			outputBatchSize: 4,
   158  		},
   159  		{
   160  			input: tuples{
   161  				{0, 1},
   162  				{0, 1},
   163  			},
   164  			expected: tuples{
   165  				{2},
   166  			},
   167  			name: "OneGroup",
   168  		},
   169  		{
   170  			input: tuples{
   171  				{0, 1},
   172  				{0, 0},
   173  				{0, 1},
   174  				{1, 4},
   175  				{2, 5},
   176  			},
   177  			expected: tuples{
   178  				{2},
   179  				{4},
   180  				{5},
   181  			},
   182  			batchSize: 2,
   183  			name:      "MultiGroup",
   184  		},
   185  		{
   186  			input: tuples{
   187  				{0, 1},
   188  				{0, 2},
   189  				{0, 3},
   190  				{1, 4},
   191  				{1, 5},
   192  			},
   193  			expected: tuples{
   194  				{6},
   195  				{9},
   196  			},
   197  			batchSize: 1,
   198  			name:      "CarryBetweenInputBatches",
   199  		},
   200  		{
   201  			input: tuples{
   202  				{0, 1},
   203  				{0, 2},
   204  				{0, 3},
   205  				{0, 4},
   206  				{1, 5},
   207  				{2, 6},
   208  			},
   209  			expected: tuples{
   210  				{10},
   211  				{5},
   212  				{6},
   213  			},
   214  			batchSize:       2,
   215  			outputBatchSize: 1,
   216  			name:            "CarryBetweenOutputBatches",
   217  		},
   218  		{
   219  			input: tuples{
   220  				{0, 1},
   221  				{0, 1},
   222  				{1, 2},
   223  				{2, 3},
   224  				{2, 3},
   225  				{3, 4},
   226  				{3, 4},
   227  				{4, 5},
   228  				{5, 6},
   229  				{6, 7},
   230  				{7, 8},
   231  			},
   232  			expected: tuples{
   233  				{2},
   234  				{2},
   235  				{6},
   236  				{8},
   237  				{5},
   238  				{6},
   239  				{7},
   240  				{8},
   241  			},
   242  			batchSize:       3,
   243  			outputBatchSize: 1,
   244  			name:            "CarryBetweenInputAndOutputBatches",
   245  		},
   246  		{
   247  			input: tuples{
   248  				{0, 1},
   249  				{0, 2},
   250  				{0, 3},
   251  				{0, 4},
   252  			},
   253  			expected: tuples{
   254  				{10},
   255  			},
   256  			batchSize:       1,
   257  			outputBatchSize: 1,
   258  			name:            "NoGroupingCols",
   259  			groupCols:       []uint32{},
   260  		},
   261  		{
   262  			input: tuples{
   263  				{1, 0, 0},
   264  				{2, 0, 0},
   265  				{3, 0, 0},
   266  				{4, 0, 0},
   267  			},
   268  			expected: tuples{
   269  				{10},
   270  			},
   271  			batchSize:       1,
   272  			outputBatchSize: 1,
   273  			name:            "UnusedInputColumns",
   274  			typs:            []*types.T{types.Int, types.Int, types.Int},
   275  			groupCols:       []uint32{1, 2},
   276  			aggCols:         [][]uint32{{0}},
   277  		},
   278  		{
   279  			input: tuples{
   280  				{nil, 1},
   281  				{4, 42},
   282  				{nil, 2},
   283  			},
   284  			expected: tuples{
   285  				{3},
   286  				{42},
   287  			},
   288  			name:           "UnorderedWithNullsInGroupingCol",
   289  			unorderedInput: true,
   290  		},
   291  	}
   292  
   293  	// Run tests with deliberate batch sizes and no selection vectors.
   294  	for _, tc := range testCases {
   295  		t.Run(tc.name, func(t *testing.T) {
   296  			if err := tc.init(); err != nil {
   297  				t.Fatal(err)
   298  			}
   299  
   300  			if !tc.unorderedInput {
   301  				tupleSource := newOpTestInput(tc.batchSize, tc.input, nil /* typs */)
   302  				a, err := NewOrderedAggregator(
   303  					testAllocator,
   304  					tupleSource,
   305  					tc.typs,
   306  					tc.aggFns,
   307  					tc.groupCols,
   308  					tc.aggCols,
   309  					false, /* isScalar */
   310  				)
   311  				if err != nil {
   312  					t.Fatal(err)
   313  				}
   314  
   315  				out := newOpTestOutput(a, tc.expected)
   316  				// Explicitly reinitialize the aggregator with the given output batch
   317  				// size.
   318  				a.(*orderedAggregator).initWithInputAndOutputBatchSize(tc.batchSize, tc.outputBatchSize)
   319  				if err := out.VerifyAnyOrder(); err != nil {
   320  					t.Fatal(err)
   321  				}
   322  			}
   323  
   324  			// Run randomized tests on this test case.
   325  			t.Run(fmt.Sprintf("Randomized"), func(t *testing.T) {
   326  				for _, agg := range aggTypes {
   327  					if tc.unorderedInput && agg.name == "ordered" {
   328  						// This test case has unordered input, so we skip ordered
   329  						// aggregator.
   330  						continue
   331  					}
   332  					t.Run(agg.name, func(t *testing.T) {
   333  						runTests(t, []tuples{tc.input}, tc.expected, unorderedVerifier,
   334  							func(input []colexecbase.Operator) (colexecbase.Operator, error) {
   335  								return agg.new(
   336  									testAllocator,
   337  									input[0],
   338  									tc.typs,
   339  									tc.aggFns,
   340  									tc.groupCols,
   341  									tc.aggCols,
   342  									false, /* isScalar */
   343  								)
   344  							})
   345  					})
   346  				}
   347  			})
   348  		})
   349  	}
   350  }
   351  
   352  func TestAggregatorMultiFunc(t *testing.T) {
   353  	defer leaktest.AfterTest(t)()
   354  	testCases := []aggregatorTestCase{
   355  		{
   356  			aggFns: []execinfrapb.AggregatorSpec_Func{execinfrapb.AggregatorSpec_SUM, execinfrapb.AggregatorSpec_SUM},
   357  			aggCols: [][]uint32{
   358  				{2}, {1},
   359  			},
   360  			input: tuples{
   361  				{0, 1, 2},
   362  				{0, 1, 2},
   363  			},
   364  			typs: []*types.T{types.Int, types.Int, types.Int},
   365  			expected: tuples{
   366  				{4, 2},
   367  			},
   368  			name: "OutputOrder",
   369  		},
   370  		{
   371  			aggFns: []execinfrapb.AggregatorSpec_Func{execinfrapb.AggregatorSpec_SUM, execinfrapb.AggregatorSpec_SUM},
   372  			aggCols: [][]uint32{
   373  				{2}, {1},
   374  			},
   375  			input: tuples{
   376  				{0, 1, 1.3},
   377  				{0, 1, 1.6},
   378  				{0, 1, 0.5},
   379  				{1, 1, 1.2},
   380  			},
   381  			typs: []*types.T{types.Int, types.Int, types.Decimal},
   382  			expected: tuples{
   383  				{3.4, 3},
   384  				{1.2, 1},
   385  			},
   386  			name:          "SumMultiType",
   387  			convToDecimal: true,
   388  		},
   389  		{
   390  			aggFns: []execinfrapb.AggregatorSpec_Func{execinfrapb.AggregatorSpec_AVG, execinfrapb.AggregatorSpec_SUM},
   391  			aggCols: [][]uint32{
   392  				{1}, {1},
   393  			},
   394  			input: tuples{
   395  				{0, 1.1},
   396  				{0, 1.2},
   397  				{0, 2.3},
   398  				{1, 6.21},
   399  				{1, 2.43},
   400  			},
   401  			typs: []*types.T{types.Int, types.Decimal},
   402  			expected: tuples{
   403  				{"1.5333333333333333333", 4.6},
   404  				{4.32, 8.64},
   405  			},
   406  			name:          "AvgSumSingleInputBatch",
   407  			convToDecimal: true,
   408  		},
   409  		{
   410  			aggFns: []execinfrapb.AggregatorSpec_Func{
   411  				execinfrapb.AggregatorSpec_BOOL_AND,
   412  				execinfrapb.AggregatorSpec_BOOL_OR,
   413  			},
   414  			aggCols: [][]uint32{
   415  				{1}, {1},
   416  			},
   417  			input: tuples{
   418  				{0, true},
   419  				{1, false},
   420  				{2, true},
   421  				{2, false},
   422  				{3, true},
   423  				{3, true},
   424  				{4, false},
   425  				{4, false},
   426  				{5, false},
   427  				{5, nil},
   428  				{6, nil},
   429  				{6, true},
   430  				{7, nil},
   431  				{7, false},
   432  				{7, true},
   433  				{8, nil},
   434  				{8, nil},
   435  			},
   436  			typs: []*types.T{types.Int, types.Bool},
   437  			expected: tuples{
   438  				{true, true},
   439  				{false, false},
   440  				{false, true},
   441  				{true, true},
   442  				{false, false},
   443  				{false, false},
   444  				{true, true},
   445  				{false, true},
   446  				{nil, nil},
   447  			},
   448  			name: "BoolAndOrBatch",
   449  		},
   450  		{
   451  			aggFns: []execinfrapb.AggregatorSpec_Func{
   452  				execinfrapb.AggregatorSpec_ANY_NOT_NULL,
   453  				execinfrapb.AggregatorSpec_ANY_NOT_NULL,
   454  				execinfrapb.AggregatorSpec_ANY_NOT_NULL,
   455  				execinfrapb.AggregatorSpec_MIN,
   456  				execinfrapb.AggregatorSpec_SUM_INT,
   457  			},
   458  			input: tuples{
   459  				{2, 1.0, "1.0", 2.0},
   460  				{2, 1.0, "1.0", 4.0},
   461  				{2, 2.0, "2.0", 6.0},
   462  			},
   463  			expected: tuples{
   464  				{2, 1.0, "1.0", 2.0, 6.0},
   465  				{2, 2.0, "2.0", 6.0, 6.0},
   466  			},
   467  			batchSize: 1,
   468  			typs:      []*types.T{types.Int, types.Decimal, types.Bytes, types.Decimal},
   469  			name:      "MultiGroupColsWithPointerTypes",
   470  			groupCols: []uint32{0, 1, 2},
   471  			aggCols: [][]uint32{
   472  				{0}, {1}, {2}, {3}, {3},
   473  			},
   474  		},
   475  		{
   476  			aggFns: []execinfrapb.AggregatorSpec_Func{
   477  				execinfrapb.AggregatorSpec_ANY_NOT_NULL,
   478  				execinfrapb.AggregatorSpec_SUM_INT,
   479  			},
   480  			input: tuples{
   481  				{`{"id": null}`, -1},
   482  				{`{"id": 0, "data": "s1"}`, 1},
   483  				{`{"id": 0, "data": "s1"}`, 2},
   484  				{`{"id": 1, "data": "s2"}`, 10},
   485  				{`{"id": 1, "data": "s2"}`, 11},
   486  				{`{"id": 2, "data": "s3"}`, 100},
   487  				{`{"id": 2, "data": "s3"}`, 101},
   488  				{`{"id": 2, "data": "s4"}`, 102},
   489  			},
   490  			expected: tuples{
   491  				{`{"id": null}`, -1},
   492  				{`{"id": 0, "data": "s1"}`, 3},
   493  				{`{"id": 1, "data": "s2"}`, 21},
   494  				{`{"id": 2, "data": "s3"}`, 201},
   495  				{`{"id": 2, "data": "s4"}`, 102},
   496  			},
   497  			typs:      []*types.T{types.Jsonb, types.Int},
   498  			name:      "GroupOnJsonColumns",
   499  			groupCols: []uint32{0},
   500  			aggCols: [][]uint32{
   501  				{0}, {1},
   502  			},
   503  		},
   504  	}
   505  
   506  	for _, agg := range aggTypes {
   507  		for _, tc := range testCases {
   508  			t.Run(fmt.Sprintf("%s/%s/Randomized", agg.name, tc.name), func(t *testing.T) {
   509  				if err := tc.init(); err != nil {
   510  					t.Fatal(err)
   511  				}
   512  				runTestsWithTyps(t, []tuples{tc.input}, [][]*types.T{tc.typs}, tc.expected, unorderedVerifier,
   513  					func(input []colexecbase.Operator) (colexecbase.Operator, error) {
   514  						return agg.new(testAllocator, input[0], tc.typs, tc.aggFns, tc.groupCols, tc.aggCols, false /* isScalar */)
   515  					})
   516  			})
   517  		}
   518  	}
   519  }
   520  
   521  func TestAggregatorAllFunctions(t *testing.T) {
   522  	defer leaktest.AfterTest(t)()
   523  	testCases := []aggregatorTestCase{
   524  		{
   525  			aggFns: []execinfrapb.AggregatorSpec_Func{
   526  				execinfrapb.AggregatorSpec_ANY_NOT_NULL,
   527  				execinfrapb.AggregatorSpec_ANY_NOT_NULL,
   528  				execinfrapb.AggregatorSpec_AVG,
   529  				execinfrapb.AggregatorSpec_COUNT_ROWS,
   530  				execinfrapb.AggregatorSpec_COUNT,
   531  				execinfrapb.AggregatorSpec_SUM,
   532  				execinfrapb.AggregatorSpec_MIN,
   533  				execinfrapb.AggregatorSpec_MAX,
   534  				execinfrapb.AggregatorSpec_BOOL_AND,
   535  				execinfrapb.AggregatorSpec_BOOL_OR,
   536  			},
   537  			aggCols: [][]uint32{{0}, {4}, {1}, {}, {1}, {2}, {2}, {2}, {3}, {3}},
   538  			typs:    []*types.T{types.Int, types.Decimal, types.Int, types.Bool, types.Bytes},
   539  			input: tuples{
   540  				{0, 3.1, 2, true, "zero"},
   541  				{0, 1.1, 3, false, "zero"},
   542  				{1, 1.1, 1, false, "one"},
   543  				{1, 4.1, 0, false, "one"},
   544  				{2, 1.1, 1, true, "two"},
   545  				{3, 4.1, 0, false, "three"},
   546  				{3, 5.1, 0, true, "three"},
   547  			},
   548  			expected: tuples{
   549  				{0, "zero", 2.1, 2, 2, 5, 2, 3, false, true},
   550  				{1, "one", 2.6, 2, 2, 1, 0, 1, false, false},
   551  				{2, "two", 1.1, 1, 1, 1, 1, 1, true, true},
   552  				{3, "three", 4.6, 2, 2, 0, 0, 0, false, true},
   553  			},
   554  			convToDecimal: true,
   555  		},
   556  
   557  		// Test case for null handling.
   558  		{
   559  			aggFns: []execinfrapb.AggregatorSpec_Func{
   560  				execinfrapb.AggregatorSpec_ANY_NOT_NULL,
   561  				execinfrapb.AggregatorSpec_ANY_NOT_NULL,
   562  				execinfrapb.AggregatorSpec_COUNT_ROWS,
   563  				execinfrapb.AggregatorSpec_COUNT,
   564  				execinfrapb.AggregatorSpec_SUM,
   565  				execinfrapb.AggregatorSpec_SUM_INT,
   566  				execinfrapb.AggregatorSpec_MIN,
   567  				execinfrapb.AggregatorSpec_MAX,
   568  				execinfrapb.AggregatorSpec_AVG,
   569  				execinfrapb.AggregatorSpec_BOOL_AND,
   570  				execinfrapb.AggregatorSpec_BOOL_OR,
   571  			},
   572  			aggCols: [][]uint32{{0}, {1}, {}, {1}, {1}, {2}, {2}, {2}, {1}, {3}, {3}},
   573  			typs:    []*types.T{types.Int, types.Decimal, types.Int, types.Bool},
   574  			input: tuples{
   575  				{nil, 1.1, 4, true},
   576  				{0, nil, nil, nil},
   577  				{0, 3.1, 5, nil},
   578  				{1, nil, nil, nil},
   579  				{1, nil, nil, false},
   580  			},
   581  			expected: tuples{
   582  				{nil, 1.1, 1, 1, 1.1, 4, 4, 4, 1.1, true, true},
   583  				{0, 3.1, 2, 1, 3.1, 5, 5, 5, 3.1, nil, nil},
   584  				{1, nil, 2, 0, nil, nil, nil, nil, nil, false, false},
   585  			},
   586  			convToDecimal: true,
   587  		},
   588  	}
   589  
   590  	for _, agg := range aggTypes {
   591  		for i, tc := range testCases {
   592  			t.Run(fmt.Sprintf("%s/%d", agg.name, i), func(t *testing.T) {
   593  				if err := tc.init(); err != nil {
   594  					t.Fatal(err)
   595  				}
   596  				verifier := orderedVerifier
   597  				if strings.Contains(agg.name, "hash") {
   598  					verifier = unorderedVerifier
   599  				}
   600  				runTests(
   601  					t,
   602  					[]tuples{tc.input},
   603  					tc.expected,
   604  					verifier,
   605  					func(input []colexecbase.Operator) (colexecbase.Operator, error) {
   606  						return agg.new(testAllocator, input[0], tc.typs, tc.aggFns, tc.groupCols, tc.aggCols, false /* isScalar */)
   607  					})
   608  			})
   609  		}
   610  	}
   611  }
   612  
   613  func TestAggregatorRandom(t *testing.T) {
   614  	defer leaktest.AfterTest(t)()
   615  
   616  	// This test aggregates random inputs, keeping track of the expected results
   617  	// to make sure the aggregations are correct.
   618  	rng, _ := randutil.NewPseudoRand()
   619  	for _, groupSize := range []int{1, 2, coldata.BatchSize() / 4, coldata.BatchSize() / 2} {
   620  		if groupSize == 0 {
   621  			// We might be varying coldata.BatchSize() so that when it is divided by
   622  			// 4, groupSize is 0. We want to skip such configuration.
   623  			continue
   624  		}
   625  		for _, numInputBatches := range []int{1, 2, 64} {
   626  			for _, hasNulls := range []bool{true, false} {
   627  				for _, agg := range aggTypes {
   628  					t.Run(fmt.Sprintf("%s/groupSize=%d/numInputBatches=%d/hasNulls=%t", agg.name, groupSize, numInputBatches, hasNulls),
   629  						func(t *testing.T) {
   630  							nTuples := coldata.BatchSize() * numInputBatches
   631  							typs := []*types.T{types.Int, types.Float}
   632  							cols := []coldata.Vec{
   633  								testAllocator.NewMemColumn(typs[0], nTuples),
   634  								testAllocator.NewMemColumn(typs[1], nTuples),
   635  							}
   636  							groups, aggCol, aggColNulls := cols[0].Int64(), cols[1].Float64(), cols[1].Nulls()
   637  							expectedTuples := tuples{}
   638  
   639  							var expRowCounts, expCounts []int64
   640  							var expSums, expMins, expMaxs []float64
   641  							// SUM, MIN, MAX, and AVG aggregators can output null.
   642  							var expNulls []bool
   643  							curGroup := -1
   644  							for i := range groups {
   645  								if i%groupSize == 0 {
   646  									if curGroup != -1 {
   647  										if expNulls[curGroup] {
   648  											expectedTuples = append(expectedTuples, tuple{
   649  												expRowCounts[curGroup], expCounts[curGroup], nil, nil, nil, nil,
   650  											})
   651  										} else {
   652  											expectedTuples = append(expectedTuples, tuple{
   653  												expRowCounts[curGroup], expCounts[curGroup], expSums[curGroup], expMins[curGroup], expMaxs[curGroup], expSums[curGroup] / float64(expCounts[curGroup]),
   654  											})
   655  										}
   656  									}
   657  									expRowCounts = append(expRowCounts, 0)
   658  									expCounts = append(expCounts, 0)
   659  									expSums = append(expSums, 0)
   660  									expMins = append(expMins, 2048)
   661  									expMaxs = append(expMaxs, -2048)
   662  									expNulls = append(expNulls, true)
   663  									curGroup++
   664  								}
   665  								// Keep the inputs small so they are a realistic size. Using a
   666  								// large range is not realistic and makes decimal operations
   667  								// slower.
   668  								aggCol[i] = 2048 * (rng.Float64() - 0.5)
   669  
   670  								// NULL values contribute to the row count, so we're updating
   671  								// the row counts outside of the if block.
   672  								expRowCounts[curGroup]++
   673  								if hasNulls && rng.Float64() < nullProbability {
   674  									aggColNulls.SetNull(i)
   675  								} else {
   676  									expNulls[curGroup] = false
   677  									expCounts[curGroup]++
   678  									expSums[curGroup] += aggCol[i]
   679  									expMins[curGroup] = min64(aggCol[i], expMins[curGroup])
   680  									expMaxs[curGroup] = max64(aggCol[i], expMaxs[curGroup])
   681  								}
   682  								groups[i] = int64(curGroup)
   683  							}
   684  							// Add result for last group.
   685  							if expNulls[curGroup] {
   686  								expectedTuples = append(expectedTuples, tuple{
   687  									expRowCounts[curGroup], expCounts[curGroup], nil, nil, nil, nil,
   688  								})
   689  							} else {
   690  								expectedTuples = append(expectedTuples, tuple{
   691  									expRowCounts[curGroup], expCounts[curGroup], expSums[curGroup], expMins[curGroup], expMaxs[curGroup], expSums[curGroup] / float64(expCounts[curGroup]),
   692  								})
   693  							}
   694  
   695  							source := newChunkingBatchSource(typs, cols, nTuples)
   696  							a, err := agg.new(
   697  								testAllocator,
   698  								source,
   699  								typs,
   700  								[]execinfrapb.AggregatorSpec_Func{
   701  									execinfrapb.AggregatorSpec_COUNT_ROWS,
   702  									execinfrapb.AggregatorSpec_COUNT,
   703  									execinfrapb.AggregatorSpec_SUM_INT,
   704  									execinfrapb.AggregatorSpec_MIN,
   705  									execinfrapb.AggregatorSpec_MAX,
   706  									execinfrapb.AggregatorSpec_AVG},
   707  								[]uint32{0},
   708  								[][]uint32{{}, {1}, {1}, {1}, {1}, {1}},
   709  								false, /* isScalar */
   710  							)
   711  							if err != nil {
   712  								t.Fatal(err)
   713  							}
   714  							a.Init()
   715  
   716  							testOutput := newOpTestOutput(a, expectedTuples)
   717  							if strings.Contains(agg.name, "hash") {
   718  								err = testOutput.VerifyAnyOrder()
   719  							} else {
   720  								err = testOutput.Verify()
   721  							}
   722  
   723  							if err != nil {
   724  								t.Fatal(err)
   725  							}
   726  						})
   727  				}
   728  			}
   729  		}
   730  	}
   731  }
   732  
   733  func BenchmarkAggregator(b *testing.B) {
   734  	rng, _ := randutil.NewPseudoRand()
   735  	ctx := context.Background()
   736  
   737  	const bytesFixedLength = 8
   738  	for _, aggFn := range []execinfrapb.AggregatorSpec_Func{
   739  		execinfrapb.AggregatorSpec_ANY_NOT_NULL,
   740  		execinfrapb.AggregatorSpec_AVG,
   741  		execinfrapb.AggregatorSpec_COUNT_ROWS,
   742  		execinfrapb.AggregatorSpec_COUNT,
   743  		execinfrapb.AggregatorSpec_SUM,
   744  		execinfrapb.AggregatorSpec_MIN,
   745  		execinfrapb.AggregatorSpec_MAX,
   746  		execinfrapb.AggregatorSpec_BOOL_AND,
   747  		execinfrapb.AggregatorSpec_BOOL_OR,
   748  	} {
   749  		fName := execinfrapb.AggregatorSpec_Func_name[int32(aggFn)]
   750  		b.Run(fName, func(b *testing.B) {
   751  			for _, agg := range aggTypes {
   752  				for typIdx, typ := range []*types.T{types.Int, types.Decimal, types.Bytes} {
   753  					for _, groupSize := range []int{1, 2, coldata.BatchSize() / 2, coldata.BatchSize()} {
   754  						for _, hasNulls := range []bool{false, true} {
   755  							for _, numInputBatches := range []int{64} {
   756  								if aggFn == execinfrapb.AggregatorSpec_BOOL_AND || aggFn == execinfrapb.AggregatorSpec_BOOL_OR {
   757  									typ = types.Bool
   758  									if typIdx > 0 {
   759  										// We don't need to run the benchmark of bool_and and
   760  										// bool_or multiple times, so we skip all runs except
   761  										// for the first one.
   762  										continue
   763  									}
   764  								}
   765  								b.Run(fmt.Sprintf("%s/%s/groupSize=%d/hasNulls=%t/numInputBatches=%d", agg.name, typ.String(),
   766  									groupSize, hasNulls, numInputBatches),
   767  									func(b *testing.B) {
   768  										typs := []*types.T{types.Int, typ}
   769  										nTuples := numInputBatches * coldata.BatchSize()
   770  										cols := []coldata.Vec{
   771  											testAllocator.NewMemColumn(types.Int, nTuples),
   772  											testAllocator.NewMemColumn(typ, nTuples),
   773  										}
   774  										groups := cols[0].Int64()
   775  										curGroup := -1
   776  										for i := 0; i < nTuples; i++ {
   777  											if groupSize == 1 || i%groupSize == 0 {
   778  												curGroup++
   779  											}
   780  											groups[i] = int64(curGroup)
   781  										}
   782  										nullProb := 0.0
   783  										if hasNulls {
   784  											nullProb = nullProbability
   785  										}
   786  										coldatatestutils.RandomVec(coldatatestutils.RandomVecArgs{
   787  											Rand:             rng,
   788  											Vec:              cols[1],
   789  											N:                nTuples,
   790  											NullProbability:  nullProb,
   791  											BytesFixedLength: bytesFixedLength,
   792  										})
   793  										if typ.Identical(types.Int) && aggFn == execinfrapb.AggregatorSpec_SUM {
   794  											// Summation of random Int64 values can lead to
   795  											// overflow, and we will panic. To go around it, we
   796  											// restrict the range of values.
   797  											vals := cols[1].Int64()
   798  											for i := range vals {
   799  												vals[i] = vals[i] % 1024
   800  											}
   801  										}
   802  										source := newChunkingBatchSource(typs, cols, nTuples)
   803  
   804  										nCols := 1
   805  										if aggFn == execinfrapb.AggregatorSpec_COUNT_ROWS {
   806  											nCols = 0
   807  										}
   808  										a, err := agg.new(
   809  											testAllocator,
   810  											source,
   811  											typs,
   812  											[]execinfrapb.AggregatorSpec_Func{aggFn},
   813  											[]uint32{0},
   814  											[][]uint32{[]uint32{1}[:nCols]},
   815  											false, /* isScalar */
   816  										)
   817  										if err != nil {
   818  											b.Skip()
   819  										}
   820  										a.Init()
   821  
   822  										b.ResetTimer()
   823  
   824  										// Only count the int64 column.
   825  										b.SetBytes(int64(8 * nTuples))
   826  										for i := 0; i < b.N; i++ {
   827  											a.(resetter).reset(ctx)
   828  											source.reset()
   829  											// Exhaust aggregator until all batches have been read.
   830  											for b := a.Next(ctx); b.Length() != 0; b = a.Next(ctx) {
   831  											}
   832  										}
   833  									},
   834  								)
   835  							}
   836  						}
   837  					}
   838  				}
   839  			}
   840  		})
   841  	}
   842  }
   843  
   844  func TestHashAggregator(t *testing.T) {
   845  	defer leaktest.AfterTest(t)()
   846  	tcs := []aggregatorTestCase{
   847  		{
   848  			// Test carry between output batches.
   849  			input: tuples{
   850  				{0, 1},
   851  				{1, 5},
   852  				{0, 4},
   853  				{0, 2},
   854  				{2, 6},
   855  				{0, 3},
   856  				{0, 7},
   857  			},
   858  			typs:      []*types.T{types.Int, types.Int},
   859  			groupCols: []uint32{0},
   860  			aggCols:   [][]uint32{{1}},
   861  
   862  			expected: tuples{
   863  				{5},
   864  				{6},
   865  				{17},
   866  			},
   867  
   868  			name: "carryBetweenBatches",
   869  		},
   870  		{
   871  			// Test a single row input source.
   872  			input: tuples{
   873  				{5},
   874  			},
   875  			typs:      []*types.T{types.Int},
   876  			groupCols: []uint32{0},
   877  			aggCols:   [][]uint32{{0}},
   878  
   879  			expected: tuples{
   880  				{5},
   881  			},
   882  
   883  			name: "singleRowInput",
   884  		},
   885  		{
   886  			// Test bucket collisions.
   887  			input: tuples{
   888  				{0, 3},
   889  				{0, 4},
   890  				{hashTableNumBuckets, 6},
   891  				{0, 5},
   892  				{hashTableNumBuckets, 7},
   893  			},
   894  			typs:      []*types.T{types.Int, types.Int},
   895  			groupCols: []uint32{0},
   896  			aggCols:   [][]uint32{{1}},
   897  
   898  			expected: tuples{
   899  				{12},
   900  				{13},
   901  			},
   902  
   903  			name: "bucketCollision",
   904  		},
   905  		{
   906  			input: tuples{
   907  				{0, 1, 1.3},
   908  				{0, 1, 1.6},
   909  				{0, 1, 0.5},
   910  				{1, 1, 1.2},
   911  			},
   912  			typs:          []*types.T{types.Int, types.Int, types.Decimal},
   913  			convToDecimal: true,
   914  
   915  			aggFns:    []execinfrapb.AggregatorSpec_Func{execinfrapb.AggregatorSpec_SUM, execinfrapb.AggregatorSpec_SUM},
   916  			groupCols: []uint32{0, 1},
   917  			aggCols: [][]uint32{
   918  				{2}, {1},
   919  			},
   920  
   921  			expected: tuples{
   922  				{3.4, 3},
   923  				{1.2, 1},
   924  			},
   925  
   926  			name: "decimalSums",
   927  		},
   928  		{
   929  			// Test unused input columns.
   930  			input: tuples{
   931  				{0, 1, 2, 3},
   932  				{0, 1, 4, 5},
   933  				{1, 1, 3, 7},
   934  				{1, 2, 4, 9},
   935  				{0, 1, 6, 11},
   936  				{1, 2, 6, 13},
   937  			},
   938  			typs:      []*types.T{types.Int, types.Int, types.Int, types.Int},
   939  			groupCols: []uint32{0, 1},
   940  			aggCols:   [][]uint32{{3}},
   941  
   942  			expected: tuples{
   943  				{7},
   944  				{19},
   945  				{22},
   946  			},
   947  
   948  			name: "unusedInputCol",
   949  		},
   950  	}
   951  
   952  	for _, numOfHashBuckets := range []int{0 /* no limit */, 1, coldata.BatchSize()} {
   953  		for _, tc := range tcs {
   954  			if err := tc.init(); err != nil {
   955  				t.Fatal(err)
   956  			}
   957  			t.Run(fmt.Sprintf("numOfHashBuckets=%d", numOfHashBuckets), func(t *testing.T) {
   958  				runTests(t, []tuples{tc.input}, tc.expected, unorderedVerifier, func(sources []colexecbase.Operator) (colexecbase.Operator, error) {
   959  					a, err := NewHashAggregator(testAllocator, sources[0], tc.typs, tc.aggFns, tc.groupCols, tc.aggCols)
   960  					a.(*hashAggregator).testingKnobs.numOfHashBuckets = uint64(numOfHashBuckets)
   961  					return a, err
   962  				})
   963  			})
   964  		}
   965  	}
   966  }
   967  
   968  func min64(a, b float64) float64 {
   969  	if a < b {
   970  		return a
   971  	}
   972  	return b
   973  }
   974  
   975  func max64(a, b float64) float64 {
   976  	if a > b {
   977  		return a
   978  	}
   979  	return b
   980  }