github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colexec/aggregate_funcs.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package colexec 12 13 import ( 14 "unsafe" 15 16 "github.com/cockroachdb/cockroach/pkg/col/coldata" 17 "github.com/cockroachdb/cockroach/pkg/sql/colmem" 18 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 19 "github.com/cockroachdb/cockroach/pkg/sql/types" 20 "github.com/cockroachdb/errors" 21 ) 22 23 // SupportedAggFns contains all aggregate functions supported by the vectorized 24 // engine. 25 var SupportedAggFns = []execinfrapb.AggregatorSpec_Func{ 26 execinfrapb.AggregatorSpec_ANY_NOT_NULL, 27 execinfrapb.AggregatorSpec_AVG, 28 execinfrapb.AggregatorSpec_SUM, 29 execinfrapb.AggregatorSpec_SUM_INT, 30 execinfrapb.AggregatorSpec_COUNT_ROWS, 31 execinfrapb.AggregatorSpec_COUNT, 32 execinfrapb.AggregatorSpec_MIN, 33 execinfrapb.AggregatorSpec_MAX, 34 execinfrapb.AggregatorSpec_BOOL_AND, 35 execinfrapb.AggregatorSpec_BOOL_OR, 36 } 37 38 // aggregateFunc is an aggregate function that performs computation on a batch 39 // when Compute(batch) is called and writes the output to the Vec passed in 40 // in Init. The aggregateFunc performs an aggregation per group and outputs the 41 // aggregation once the end of the group is reached. If the end of the group is 42 // not reached before the batch is finished, the aggregateFunc will store a 43 // carry value that it will use next time Compute is called. Note that this 44 // carry value is stored at the output index. Therefore if any memory 45 // modification of the output vector is made, the caller *MUST* copy the value 46 // at the current index inclusive for a correct aggregation. 47 type aggregateFunc interface { 48 // Init sets the groups for the aggregation and the output vector. Each index 49 // in groups corresponds to a column value in the input batch. true represents 50 // the first value of a new group. 51 Init(groups []bool, vec coldata.Vec) 52 53 // Reset resets the aggregate function for another run. Primarily used for 54 // benchmarks. 55 Reset() 56 57 // CurrentOutputIndex returns the current index in the output vector that the 58 // aggregate function is writing to. All indices < the index returned are 59 // finished aggregations for previous groups. A negative index may be returned 60 // to signify an aggregate function that has not yet performed any 61 // computation. 62 CurrentOutputIndex() int 63 // SetOutputIndex sets the output index to write to. The value for the current 64 // index is carried over. Note that calling SetOutputIndex is a noop if 65 // CurrentOutputIndex returns a negative value (i.e. the aggregate function 66 // has not yet performed any computation). This method also has the side 67 // effect of clearing the NULLs bitmap of the output buffer past the given 68 // index. 69 SetOutputIndex(idx int) 70 71 // Compute computes the aggregation on the input batch. 72 // Note: the implementations should be careful to account for their memory 73 // usage. 74 Compute(batch coldata.Batch, inputIdxs []uint32) 75 76 // Flush flushes the result of aggregation on the last group. It should be 77 // called once after input batches have been Compute()'d. 78 // Note: the implementations are free to not account for the memory used 79 // for the result of aggregation of the last group. 80 Flush() 81 82 // HandleEmptyInputScalar populates the output for a case of an empty input 83 // when the aggregate function is in scalar context. The output must always 84 // be a single value (either null or zero, depending on the function). 85 // TODO(yuzefovich): we can pull scratch field of aggregates into a shared 86 // aggregator and implement this method once on the shared base. 87 HandleEmptyInputScalar() 88 } 89 90 // aggregateFuncAlloc is an aggregate function allocator that pools allocations 91 // of the structs of the same statically-typed aggregate function. 92 type aggregateFuncAlloc interface { 93 // newAggFunc returns the aggregate function from the pool with all 94 // necessary fields initialized. 95 newAggFunc() aggregateFunc 96 } 97 98 // aggregateFuncsAlloc is a utility struct that pools allocations of multiple 99 // aggregate functions simultaneously (i.e. it supports a "schema of aggregate 100 // functions"). It will resolve the aggregate functions in its constructor to 101 // instantiate aggregateFuncAlloc objects and will use those to populate slices 102 // of new aggregation functions when requested. 103 type aggregateFuncsAlloc struct { 104 allocator *colmem.Allocator 105 // allocSize determines the number of objects allocated when the previous 106 // allocations have been used up. 107 allocSize int64 108 // returnFuncs is the pool for the slice to be returned in 109 // makeAggregateFuncs. 110 returnFuncs []aggregateFunc 111 // aggFuncAllocs are all necessary aggregate function allocators. Note that 112 // a separate aggregateFuncAlloc will be created for each aggFn from the 113 // schema (even if there are "duplicates" - exactly the same functions - in 114 // the function schema). 115 aggFuncAllocs []aggregateFuncAlloc 116 } 117 118 func newAggregateFuncsAlloc( 119 allocator *colmem.Allocator, 120 aggTyps [][]*types.T, 121 aggFns []execinfrapb.AggregatorSpec_Func, 122 allocSize int64, 123 ) (*aggregateFuncsAlloc, error) { 124 funcAllocs := make([]aggregateFuncAlloc, len(aggFns)) 125 for i := range aggFns { 126 var err error 127 switch aggFns[i] { 128 case execinfrapb.AggregatorSpec_ANY_NOT_NULL: 129 funcAllocs[i], err = newAnyNotNullAggAlloc(allocator, aggTyps[i][0], allocSize) 130 case execinfrapb.AggregatorSpec_AVG: 131 funcAllocs[i], err = newAvgAggAlloc(allocator, aggTyps[i][0], allocSize) 132 case execinfrapb.AggregatorSpec_SUM, execinfrapb.AggregatorSpec_SUM_INT: 133 funcAllocs[i], err = newSumAggAlloc(allocator, aggTyps[i][0], allocSize) 134 case execinfrapb.AggregatorSpec_COUNT_ROWS: 135 funcAllocs[i] = newCountRowsAggAlloc(allocator, allocSize) 136 case execinfrapb.AggregatorSpec_COUNT: 137 funcAllocs[i] = newCountAggAlloc(allocator, allocSize) 138 case execinfrapb.AggregatorSpec_MIN: 139 funcAllocs[i], err = newMinAggAlloc(allocator, aggTyps[i][0], allocSize) 140 case execinfrapb.AggregatorSpec_MAX: 141 funcAllocs[i], err = newMaxAggAlloc(allocator, aggTyps[i][0], allocSize) 142 case execinfrapb.AggregatorSpec_BOOL_AND: 143 funcAllocs[i] = newBoolAndAggAlloc(allocator, allocSize) 144 case execinfrapb.AggregatorSpec_BOOL_OR: 145 funcAllocs[i] = newBoolOrAggAlloc(allocator, allocSize) 146 // NOTE: if you're adding an implementation of a new aggregate 147 // function, make sure to account for the memory under that struct in 148 // its constructor. 149 default: 150 return nil, errors.Errorf("unsupported columnar aggregate function %s", aggFns[i].String()) 151 } 152 153 if err != nil { 154 return nil, err 155 } 156 } 157 return &aggregateFuncsAlloc{ 158 allocator: allocator, 159 allocSize: allocSize, 160 aggFuncAllocs: funcAllocs, 161 }, nil 162 } 163 164 // sizeOfAggregateFunc is the size of some aggregateFunc implementation. 165 // countAgg was chosen arbitrarily, but it's important that we use a pointer to 166 // the aggregate function struct. 167 const sizeOfAggregateFunc = int64(unsafe.Sizeof(&countAgg{})) 168 169 func (a *aggregateFuncsAlloc) makeAggregateFuncs() []aggregateFunc { 170 if len(a.returnFuncs) == 0 { 171 // We have exhausted the previously allocated pools of objects, so we 172 // need to allocate a new slice for a.returnFuncs, and we need it to be 173 // of 'allocSize x number of funcs in schema' length. Every 174 // aggFuncAlloc will allocate allocSize of objects on the newAggFunc 175 // call below. 176 a.allocator.AdjustMemoryUsage(sizeOfAggregateFunc * a.allocSize) 177 a.returnFuncs = make([]aggregateFunc, len(a.aggFuncAllocs)*int(a.allocSize)) 178 } 179 funcs := a.returnFuncs[:len(a.aggFuncAllocs)] 180 a.returnFuncs = a.returnFuncs[len(a.aggFuncAllocs):] 181 for i, alloc := range a.aggFuncAllocs { 182 funcs[i] = alloc.newAggFunc() 183 } 184 return funcs 185 } 186 187 func makeAggregateFuncsOutputTypes( 188 aggTyps [][]*types.T, aggFns []execinfrapb.AggregatorSpec_Func, 189 ) ([]*types.T, error) { 190 outTyps := make([]*types.T, len(aggFns)) 191 192 for i := range aggFns { 193 // Set the output type of the aggregate. 194 switch aggFns[i] { 195 case execinfrapb.AggregatorSpec_COUNT_ROWS, execinfrapb.AggregatorSpec_COUNT: 196 // TODO(jordan): this is a somewhat of a hack. The aggregate functions 197 // should come with their own output types, somehow. 198 outTyps[i] = types.Int 199 case 200 execinfrapb.AggregatorSpec_ANY_NOT_NULL, 201 execinfrapb.AggregatorSpec_AVG, 202 execinfrapb.AggregatorSpec_SUM, 203 execinfrapb.AggregatorSpec_SUM_INT, 204 execinfrapb.AggregatorSpec_MIN, 205 execinfrapb.AggregatorSpec_MAX, 206 execinfrapb.AggregatorSpec_BOOL_AND, 207 execinfrapb.AggregatorSpec_BOOL_OR: 208 // Output types are the input types for now. 209 outTyps[i] = aggTyps[i][0] 210 default: 211 return nil, errors.Errorf("unsupported columnar aggregate function %s", aggFns[i].String()) 212 } 213 } 214 215 return outTyps, nil 216 } 217 218 // extractAggTypes returns a nested array representing the input types 219 // corresponding to each aggregation function. 220 func extractAggTypes(aggCols [][]uint32, typs []*types.T) [][]*types.T { 221 aggTyps := make([][]*types.T, len(aggCols)) 222 223 for aggIdx := range aggCols { 224 aggTyps[aggIdx] = make([]*types.T, len(aggCols[aggIdx])) 225 for i, colIdx := range aggCols[aggIdx] { 226 aggTyps[aggIdx][i] = typs[colIdx] 227 } 228 } 229 230 return aggTyps 231 } 232 233 // isAggregateSupported returns whether the aggregate function that operates on 234 // columns of types 'inputTypes' (which can be empty in case of COUNT_ROWS) is 235 // supported. 236 func isAggregateSupported( 237 allocator *colmem.Allocator, aggFn execinfrapb.AggregatorSpec_Func, inputTypes []*types.T, 238 ) (bool, error) { 239 switch aggFn { 240 case execinfrapb.AggregatorSpec_SUM: 241 switch inputTypes[0].Family() { 242 case types.IntFamily: 243 // TODO(alfonso): plan ordinary SUM on integer types by casting to DECIMAL 244 // at the end, mod issues with overflow. Perhaps to avoid the overflow 245 // issues, at first, we could plan SUM for all types besides Int64. 246 return false, errors.Newf("sum on int cols not supported (use sum_int)") 247 } 248 case execinfrapb.AggregatorSpec_SUM_INT: 249 // TODO(yuzefovich): support this case through vectorize. 250 if inputTypes[0].Width() != 64 { 251 return false, errors.Newf("sum_int is only supported on Int64 through vectorized") 252 } 253 } 254 255 // We're only interested in resolving the aggregate functions and will not 256 // be actually creating them with the alloc, so we use 0 as the allocation 257 // size. 258 _, err := newAggregateFuncsAlloc( 259 allocator, 260 [][]*types.T{inputTypes}, 261 []execinfrapb.AggregatorSpec_Func{aggFn}, 262 0, /* allocSize */ 263 ) 264 if err != nil { 265 return false, err 266 } 267 outputTypes, err := makeAggregateFuncsOutputTypes( 268 [][]*types.T{inputTypes}, 269 []execinfrapb.AggregatorSpec_Func{aggFn}, 270 ) 271 if err != nil { 272 return false, err 273 } 274 _, retType, err := execinfrapb.GetAggregateInfo(aggFn, inputTypes...) 275 if err != nil { 276 return false, err 277 } 278 // The columnar aggregates will return the same physical output type as their 279 // input. However, our current builtin resolution might say that the return 280 // type is the canonical for the family (for example, MAX on INT4 is said to 281 // return INT8), so we explicitly check whether the type the columnar 282 // aggregate returns and the type the planning code will expect it to return 283 // are the same. If they are not, we fallback to row-by-row engine. 284 if !retType.Identical(outputTypes[0]) { 285 // TODO(yuzefovich): support this case through vectorize. Probably it needs 286 // to be done at the same time as #38845. 287 return false, errors.Newf("aggregates with different input and output types are not supported") 288 } 289 return true, nil 290 }