github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/dbs/memristed/memex/aggregation/aggregation.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package aggregation 15 16 import ( 17 "bytes" 18 "strings" 19 20 "github.com/whtcorpsinc/BerolinaSQL/ast" 21 "github.com/whtcorpsinc/errors" 22 "github.com/whtcorpsinc/fidelpb/go-fidelpb" 23 "github.com/whtcorpsinc/milevadb/ekv" 24 "github.com/whtcorpsinc/milevadb/memex" 25 "github.com/whtcorpsinc/milevadb/soliton/chunk" 26 "github.com/whtcorpsinc/milevadb/stochastikctx/stmtctx" 27 "github.com/whtcorpsinc/milevadb/types" 28 ) 29 30 // Aggregation stands for aggregate functions. 31 type Aggregation interface { 32 // UFIDelate during executing. 33 UFIDelate(evalCtx *AggEvaluateContext, sc *stmtctx.StatementContext, event chunk.Event) error 34 35 // GetPartialResult will called by interlock to get partial results. For avg function, partial results will return 36 // sum and count values at the same time. 37 GetPartialResult(evalCtx *AggEvaluateContext) []types.Causet 38 39 // GetResult will be called when all data have been processed. 40 GetResult(evalCtx *AggEvaluateContext) types.Causet 41 42 // CreateContext creates a new AggEvaluateContext for the aggregation function. 43 CreateContext(sc *stmtctx.StatementContext) *AggEvaluateContext 44 45 // ResetContext resets the content of the evaluate context. 46 ResetContext(sc *stmtctx.StatementContext, evalCtx *AggEvaluateContext) 47 } 48 49 // NewDistAggFunc creates new Aggregate function for mock einsteindb. 50 func NewDistAggFunc(expr *fidelpb.Expr, fieldTps []*types.FieldType, sc *stmtctx.StatementContext) (Aggregation, error) { 51 args := make([]memex.Expression, 0, len(expr.Children)) 52 for _, child := range expr.Children { 53 arg, err := memex.PBToExpr(child, fieldTps, sc) 54 if err != nil { 55 return nil, err 56 } 57 args = append(args, arg) 58 } 59 switch expr.Tp { 60 case fidelpb.ExprType_Sum: 61 return &sumFunction{aggFunction: newAggFunc(ast.AggFuncSum, args, false)}, nil 62 case fidelpb.ExprType_Count: 63 return &countFunction{aggFunction: newAggFunc(ast.AggFuncCount, args, false)}, nil 64 case fidelpb.ExprType_Avg: 65 return &avgFunction{aggFunction: newAggFunc(ast.AggFuncAvg, args, false)}, nil 66 case fidelpb.ExprType_GroupConcat: 67 return &concatFunction{aggFunction: newAggFunc(ast.AggFuncGroupConcat, args, false)}, nil 68 case fidelpb.ExprType_Max: 69 return &maxMinFunction{aggFunction: newAggFunc(ast.AggFuncMax, args, false), isMax: true}, nil 70 case fidelpb.ExprType_Min: 71 return &maxMinFunction{aggFunction: newAggFunc(ast.AggFuncMin, args, false)}, nil 72 case fidelpb.ExprType_First: 73 return &firstEventFunction{aggFunction: newAggFunc(ast.AggFuncFirstEvent, args, false)}, nil 74 case fidelpb.ExprType_Agg_BitOr: 75 return &bitOrFunction{aggFunction: newAggFunc(ast.AggFuncBitOr, args, false)}, nil 76 case fidelpb.ExprType_Agg_BitXor: 77 return &bitXorFunction{aggFunction: newAggFunc(ast.AggFuncBitXor, args, false)}, nil 78 case fidelpb.ExprType_Agg_BitAnd: 79 return &bitAndFunction{aggFunction: newAggFunc(ast.AggFuncBitAnd, args, false)}, nil 80 } 81 return nil, errors.Errorf("Unknown aggregate function type %v", expr.Tp) 82 } 83 84 // AggEvaluateContext is used to causetstore intermediate result when calculating aggregate functions. 85 type AggEvaluateContext struct { 86 DistinctChecker *distinctChecker 87 Count int64 88 Value types.Causet 89 Buffer *bytes.Buffer // Buffer is used for group_concat. 90 GotFirstEvent bool // It will check if the agg has met the first event key. 91 } 92 93 // AggFunctionMode stands for the aggregation function's mode. 94 type AggFunctionMode int 95 96 // |-----------------|--------------|--------------| 97 // | AggFunctionMode | input | output | 98 // |-----------------|--------------|--------------| 99 // | CompleteMode | origin data | final result | 100 // | FinalMode | partial data | final result | 101 // | Partial1Mode | origin data | partial data | 102 // | Partial2Mode | partial data | partial data | 103 // | DedupMode | origin data | origin data | 104 // |-----------------|--------------|--------------| 105 const ( 106 CompleteMode AggFunctionMode = iota 107 FinalMode 108 Partial1Mode 109 Partial2Mode 110 DedupMode 111 ) 112 113 type aggFunction struct { 114 *AggFuncDesc 115 } 116 117 func newAggFunc(funcName string, args []memex.Expression, hasDistinct bool) aggFunction { 118 agg := &AggFuncDesc{HasDistinct: hasDistinct} 119 agg.Name = funcName 120 agg.Args = args 121 return aggFunction{AggFuncDesc: agg} 122 } 123 124 // CreateContext implements Aggregation interface. 125 func (af *aggFunction) CreateContext(sc *stmtctx.StatementContext) *AggEvaluateContext { 126 evalCtx := &AggEvaluateContext{} 127 if af.HasDistinct { 128 evalCtx.DistinctChecker = createDistinctChecker(sc) 129 } 130 return evalCtx 131 } 132 133 func (af *aggFunction) ResetContext(sc *stmtctx.StatementContext, evalCtx *AggEvaluateContext) { 134 if af.HasDistinct { 135 evalCtx.DistinctChecker = createDistinctChecker(sc) 136 } 137 evalCtx.Value.SetNull() 138 } 139 140 func (af *aggFunction) uFIDelateSum(sc *stmtctx.StatementContext, evalCtx *AggEvaluateContext, event chunk.Event) error { 141 a := af.Args[0] 142 value, err := a.Eval(event) 143 if err != nil { 144 return err 145 } 146 if value.IsNull() { 147 return nil 148 } 149 if af.HasDistinct { 150 d, err1 := evalCtx.DistinctChecker.Check([]types.Causet{value}) 151 if err1 != nil { 152 return err1 153 } 154 if !d { 155 return nil 156 } 157 } 158 evalCtx.Value, err = calculateSum(sc, evalCtx.Value, value) 159 if err != nil { 160 return err 161 } 162 evalCtx.Count++ 163 return nil 164 } 165 166 // NeedCount indicates whether the aggregate function should record count. 167 func NeedCount(name string) bool { 168 return name == ast.AggFuncCount || name == ast.AggFuncAvg 169 } 170 171 // NeedValue indicates whether the aggregate function should record value. 172 func NeedValue(name string) bool { 173 switch name { 174 case ast.AggFuncSum, ast.AggFuncAvg, ast.AggFuncFirstEvent, ast.AggFuncMax, ast.AggFuncMin, 175 ast.AggFuncGroupConcat, ast.AggFuncBitOr, ast.AggFuncBitAnd, ast.AggFuncBitXor: 176 return true 177 default: 178 return false 179 } 180 } 181 182 // IsAllFirstEvent checks whether functions in `aggFuncs` are all FirstEvent. 183 func IsAllFirstEvent(aggFuncs []*AggFuncDesc) bool { 184 for _, fun := range aggFuncs { 185 if fun.Name != ast.AggFuncFirstEvent { 186 return false 187 } 188 } 189 return true 190 } 191 192 // CheckAggPushDown checks whether an agg function can be pushed to storage. 193 func CheckAggPushDown(aggFunc *AggFuncDesc, storeType ekv.StoreType) bool { 194 if len(aggFunc.OrderByItems) > 0 { 195 return false 196 } 197 ret := true 198 switch storeType { 199 case ekv.TiFlash: 200 ret = CheckAggPushFlash(aggFunc) 201 } 202 if ret { 203 ret = memex.IsPushDownEnabled(strings.ToLower(aggFunc.Name), storeType) 204 } 205 return ret 206 } 207 208 // CheckAggPushFlash checks whether an agg function can be pushed to flash storage. 209 func CheckAggPushFlash(aggFunc *AggFuncDesc) bool { 210 switch aggFunc.Name { 211 case ast.AggFuncSum, ast.AggFuncCount, ast.AggFuncMin, ast.AggFuncMax, ast.AggFuncAvg, ast.AggFuncFirstEvent, ast.AggFuncApproxCountDistinct: 212 return true 213 } 214 return false 215 }