github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/dbs/memristed/memex/aggregation/aggregation.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package aggregation
    15  
    16  import (
    17  	"bytes"
    18  	"strings"
    19  
    20  	"github.com/whtcorpsinc/BerolinaSQL/ast"
    21  	"github.com/whtcorpsinc/errors"
    22  	"github.com/whtcorpsinc/fidelpb/go-fidelpb"
    23  	"github.com/whtcorpsinc/milevadb/ekv"
    24  	"github.com/whtcorpsinc/milevadb/memex"
    25  	"github.com/whtcorpsinc/milevadb/soliton/chunk"
    26  	"github.com/whtcorpsinc/milevadb/stochastikctx/stmtctx"
    27  	"github.com/whtcorpsinc/milevadb/types"
    28  )
    29  
    30  // Aggregation stands for aggregate functions.
    31  type Aggregation interface {
    32  	// UFIDelate during executing.
    33  	UFIDelate(evalCtx *AggEvaluateContext, sc *stmtctx.StatementContext, event chunk.Event) error
    34  
    35  	// GetPartialResult will called by interlock to get partial results. For avg function, partial results will return
    36  	// sum and count values at the same time.
    37  	GetPartialResult(evalCtx *AggEvaluateContext) []types.Causet
    38  
    39  	// GetResult will be called when all data have been processed.
    40  	GetResult(evalCtx *AggEvaluateContext) types.Causet
    41  
    42  	// CreateContext creates a new AggEvaluateContext for the aggregation function.
    43  	CreateContext(sc *stmtctx.StatementContext) *AggEvaluateContext
    44  
    45  	// ResetContext resets the content of the evaluate context.
    46  	ResetContext(sc *stmtctx.StatementContext, evalCtx *AggEvaluateContext)
    47  }
    48  
    49  // NewDistAggFunc creates new Aggregate function for mock einsteindb.
    50  func NewDistAggFunc(expr *fidelpb.Expr, fieldTps []*types.FieldType, sc *stmtctx.StatementContext) (Aggregation, error) {
    51  	args := make([]memex.Expression, 0, len(expr.Children))
    52  	for _, child := range expr.Children {
    53  		arg, err := memex.PBToExpr(child, fieldTps, sc)
    54  		if err != nil {
    55  			return nil, err
    56  		}
    57  		args = append(args, arg)
    58  	}
    59  	switch expr.Tp {
    60  	case fidelpb.ExprType_Sum:
    61  		return &sumFunction{aggFunction: newAggFunc(ast.AggFuncSum, args, false)}, nil
    62  	case fidelpb.ExprType_Count:
    63  		return &countFunction{aggFunction: newAggFunc(ast.AggFuncCount, args, false)}, nil
    64  	case fidelpb.ExprType_Avg:
    65  		return &avgFunction{aggFunction: newAggFunc(ast.AggFuncAvg, args, false)}, nil
    66  	case fidelpb.ExprType_GroupConcat:
    67  		return &concatFunction{aggFunction: newAggFunc(ast.AggFuncGroupConcat, args, false)}, nil
    68  	case fidelpb.ExprType_Max:
    69  		return &maxMinFunction{aggFunction: newAggFunc(ast.AggFuncMax, args, false), isMax: true}, nil
    70  	case fidelpb.ExprType_Min:
    71  		return &maxMinFunction{aggFunction: newAggFunc(ast.AggFuncMin, args, false)}, nil
    72  	case fidelpb.ExprType_First:
    73  		return &firstEventFunction{aggFunction: newAggFunc(ast.AggFuncFirstEvent, args, false)}, nil
    74  	case fidelpb.ExprType_Agg_BitOr:
    75  		return &bitOrFunction{aggFunction: newAggFunc(ast.AggFuncBitOr, args, false)}, nil
    76  	case fidelpb.ExprType_Agg_BitXor:
    77  		return &bitXorFunction{aggFunction: newAggFunc(ast.AggFuncBitXor, args, false)}, nil
    78  	case fidelpb.ExprType_Agg_BitAnd:
    79  		return &bitAndFunction{aggFunction: newAggFunc(ast.AggFuncBitAnd, args, false)}, nil
    80  	}
    81  	return nil, errors.Errorf("Unknown aggregate function type %v", expr.Tp)
    82  }
    83  
    84  // AggEvaluateContext is used to causetstore intermediate result when calculating aggregate functions.
    85  type AggEvaluateContext struct {
    86  	DistinctChecker *distinctChecker
    87  	Count           int64
    88  	Value           types.Causet
    89  	Buffer          *bytes.Buffer // Buffer is used for group_concat.
    90  	GotFirstEvent   bool          // It will check if the agg has met the first event key.
    91  }
    92  
    93  // AggFunctionMode stands for the aggregation function's mode.
    94  type AggFunctionMode int
    95  
    96  // |-----------------|--------------|--------------|
    97  // | AggFunctionMode | input        | output       |
    98  // |-----------------|--------------|--------------|
    99  // | CompleteMode    | origin data  | final result |
   100  // | FinalMode       | partial data | final result |
   101  // | Partial1Mode    | origin data  | partial data |
   102  // | Partial2Mode    | partial data | partial data |
   103  // | DedupMode       | origin data  | origin data  |
   104  // |-----------------|--------------|--------------|
   105  const (
   106  	CompleteMode AggFunctionMode = iota
   107  	FinalMode
   108  	Partial1Mode
   109  	Partial2Mode
   110  	DedupMode
   111  )
   112  
   113  type aggFunction struct {
   114  	*AggFuncDesc
   115  }
   116  
   117  func newAggFunc(funcName string, args []memex.Expression, hasDistinct bool) aggFunction {
   118  	agg := &AggFuncDesc{HasDistinct: hasDistinct}
   119  	agg.Name = funcName
   120  	agg.Args = args
   121  	return aggFunction{AggFuncDesc: agg}
   122  }
   123  
   124  // CreateContext implements Aggregation interface.
   125  func (af *aggFunction) CreateContext(sc *stmtctx.StatementContext) *AggEvaluateContext {
   126  	evalCtx := &AggEvaluateContext{}
   127  	if af.HasDistinct {
   128  		evalCtx.DistinctChecker = createDistinctChecker(sc)
   129  	}
   130  	return evalCtx
   131  }
   132  
   133  func (af *aggFunction) ResetContext(sc *stmtctx.StatementContext, evalCtx *AggEvaluateContext) {
   134  	if af.HasDistinct {
   135  		evalCtx.DistinctChecker = createDistinctChecker(sc)
   136  	}
   137  	evalCtx.Value.SetNull()
   138  }
   139  
   140  func (af *aggFunction) uFIDelateSum(sc *stmtctx.StatementContext, evalCtx *AggEvaluateContext, event chunk.Event) error {
   141  	a := af.Args[0]
   142  	value, err := a.Eval(event)
   143  	if err != nil {
   144  		return err
   145  	}
   146  	if value.IsNull() {
   147  		return nil
   148  	}
   149  	if af.HasDistinct {
   150  		d, err1 := evalCtx.DistinctChecker.Check([]types.Causet{value})
   151  		if err1 != nil {
   152  			return err1
   153  		}
   154  		if !d {
   155  			return nil
   156  		}
   157  	}
   158  	evalCtx.Value, err = calculateSum(sc, evalCtx.Value, value)
   159  	if err != nil {
   160  		return err
   161  	}
   162  	evalCtx.Count++
   163  	return nil
   164  }
   165  
   166  // NeedCount indicates whether the aggregate function should record count.
   167  func NeedCount(name string) bool {
   168  	return name == ast.AggFuncCount || name == ast.AggFuncAvg
   169  }
   170  
   171  // NeedValue indicates whether the aggregate function should record value.
   172  func NeedValue(name string) bool {
   173  	switch name {
   174  	case ast.AggFuncSum, ast.AggFuncAvg, ast.AggFuncFirstEvent, ast.AggFuncMax, ast.AggFuncMin,
   175  		ast.AggFuncGroupConcat, ast.AggFuncBitOr, ast.AggFuncBitAnd, ast.AggFuncBitXor:
   176  		return true
   177  	default:
   178  		return false
   179  	}
   180  }
   181  
   182  // IsAllFirstEvent checks whether functions in `aggFuncs` are all FirstEvent.
   183  func IsAllFirstEvent(aggFuncs []*AggFuncDesc) bool {
   184  	for _, fun := range aggFuncs {
   185  		if fun.Name != ast.AggFuncFirstEvent {
   186  			return false
   187  		}
   188  	}
   189  	return true
   190  }
   191  
   192  // CheckAggPushDown checks whether an agg function can be pushed to storage.
   193  func CheckAggPushDown(aggFunc *AggFuncDesc, storeType ekv.StoreType) bool {
   194  	if len(aggFunc.OrderByItems) > 0 {
   195  		return false
   196  	}
   197  	ret := true
   198  	switch storeType {
   199  	case ekv.TiFlash:
   200  		ret = CheckAggPushFlash(aggFunc)
   201  	}
   202  	if ret {
   203  		ret = memex.IsPushDownEnabled(strings.ToLower(aggFunc.Name), storeType)
   204  	}
   205  	return ret
   206  }
   207  
   208  // CheckAggPushFlash checks whether an agg function can be pushed to flash storage.
   209  func CheckAggPushFlash(aggFunc *AggFuncDesc) bool {
   210  	switch aggFunc.Name {
   211  	case ast.AggFuncSum, ast.AggFuncCount, ast.AggFuncMin, ast.AggFuncMax, ast.AggFuncAvg, ast.AggFuncFirstEvent, ast.AggFuncApproxCountDistinct:
   212  		return true
   213  	}
   214  	return false
   215  }