github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/plan/partition_prune.go (about)

     1  // Copyright 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package plan
    16  
    17  import (
    18  	"strings"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    21  	"github.com/matrixorigin/matrixone/pkg/container/types"
    22  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    23  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    24  	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
    25  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    26  )
    27  
    28  func (builder *QueryBuilder) partitionPrune(nodeID int32) {
    29  	node := builder.qry.Nodes[nodeID]
    30  	for _, childID := range node.Children {
    31  		builder.partitionPrune(childID)
    32  	}
    33  
    34  	switch node.NodeType {
    35  	case plan.Node_TABLE_SCAN, plan.Node_MATERIAL_SCAN, plan.Node_EXTERNAL_SCAN:
    36  		if node.TableDef.GetPartition() != nil && len(node.FilterList) != 0 {
    37  			partitionByDef := node.TableDef.Partition
    38  			switch partitionByDef.Type {
    39  			case plan.PartitionType_KEY, plan.PartitionType_LINEAR_KEY, plan.PartitionType_HASH, plan.PartitionType_LINEAR_HASH:
    40  				pruner := &KeyHashPartitionPruner{
    41  					node:    node,
    42  					process: builder.compCtx.GetProcess(),
    43  				}
    44  				pruner.init()
    45  				pruner.prune()
    46  			case plan.PartitionType_LIST:
    47  				// XXX unimplement
    48  			case plan.PartitionType_LIST_COLUMNS:
    49  				// XXX unimplement
    50  			case plan.PartitionType_RANGE:
    51  				// XXX unimplement
    52  			case plan.PartitionType_RANGE_COLUMNS:
    53  				// XXX unimplement
    54  			}
    55  		}
    56  	}
    57  }
    58  
    59  // KEY and HASH Partition Pruner
    60  type KeyHashPartitionPruner struct {
    61  	partitionKeysMap map[string]int
    62  	partitionByDef   *plan.PartitionByDef
    63  	node             *Node
    64  	process          *process.Process
    65  }
    66  
    67  type PartitionPruneResult struct {
    68  	usedPartitions map[int32]bool
    69  	isUnablePrune  bool
    70  	needPushUp     bool
    71  }
    72  
    73  func (p *KeyHashPartitionPruner) init() {
    74  	partitionByDef := p.node.TableDef.Partition
    75  	p.partitionByDef = partitionByDef
    76  	p.partitionKeysMap = make(map[string]int)
    77  
    78  	switch partitionByDef.Type {
    79  	case plan.PartitionType_KEY, plan.PartitionType_LINEAR_KEY:
    80  		for _, partitionCol := range partitionByDef.PartitionColumns.PartitionColumns {
    81  			if _, ok := p.partitionKeysMap[partitionCol]; !ok {
    82  				p.partitionKeysMap[partitionCol] = 1
    83  			}
    84  		}
    85  	case plan.PartitionType_HASH, plan.PartitionType_LINEAR_HASH:
    86  		extractColumnsFromExpression(partitionByDef.PartitionExpr.Expr, p.partitionKeysMap)
    87  	}
    88  }
    89  
    90  // detachAndPrune Detach of filter conditions and partition prune
    91  func (p *KeyHashPartitionPruner) detachAndPrune() *PartitionPruneResult {
    92  	if len(p.node.FilterList) == 1 {
    93  		if exprF, ok := p.node.FilterList[0].Expr.(*plan.Expr_F); ok && exprF.F.Func.ObjName == "or" {
    94  			return p.detachDNFCondAndBuildPrune(p.node.FilterList[0])
    95  		}
    96  	}
    97  	return p.detachCNFCondAndBuildPrune(p.node.FilterList)
    98  }
    99  
   100  func (p *KeyHashPartitionPruner) prune() bool {
   101  	pruneResult := p.detachAndPrune()
   102  	if pruneResult.isUnablePrune {
   103  		return false
   104  	}
   105  
   106  	p.node.PartitionPrune = &plan.PartitionPrune{
   107  		IsPruned:           true,
   108  		SelectedPartitions: make([]*plan.PartitionItem, 0, len(pruneResult.usedPartitions)),
   109  	}
   110  
   111  	for pid := range pruneResult.usedPartitions {
   112  		partitionItem := p.partitionByDef.Partitions[pid]
   113  		partition := &plan.PartitionItem{
   114  			PartitionName:      partitionItem.PartitionName,
   115  			OrdinalPosition:    partitionItem.OrdinalPosition,
   116  			Description:        partitionItem.Description,
   117  			Comment:            partitionItem.Comment,
   118  			LessThan:           DeepCopyExprList(partitionItem.LessThan),
   119  			InValues:           DeepCopyExprList(partitionItem.InValues),
   120  			PartitionTableName: partitionItem.PartitionTableName,
   121  		}
   122  		p.node.PartitionPrune.SelectedPartitions = append(p.node.PartitionPrune.SelectedPartitions, partition)
   123  	}
   124  	return true
   125  }
   126  
   127  func (p *KeyHashPartitionPruner) detachDNFCondAndBuildPrune(orExpr *plan.Expr) *PartitionPruneResult {
   128  	unablePruneResult := &PartitionPruneResult{
   129  		isUnablePrune: true,
   130  	}
   131  
   132  	// split disjunctive expression
   133  	dnfItems := SplitDNFItems(orExpr)
   134  	if isAllColEqualConstExpr(dnfItems) {
   135  		if len(p.partitionKeysMap) == 1 {
   136  			usedPartitions := make(map[int32]bool)
   137  			for _, expr := range dnfItems {
   138  				// 1. extract all ColRef equals const value from expression
   139  				colEqValMap := make(map[string]*plan.Expr)
   140  				extractColEqValFromEqualExpr(expr, colEqValMap)
   141  
   142  				// 2. Check if all column equivalence expressions contain all partition keys
   143  				if !exprColsIncludePartitionKeys(p.partitionKeysMap, colEqValMap) {
   144  					return unablePruneResult
   145  				}
   146  
   147  				if ok, pidx := p.getUsedPartition(colEqValMap); ok {
   148  					// if pidx=-1, it means that no existing partitions can be selected
   149  					if pidx != -1 {
   150  						usedPartitions[pidx] = true
   151  					}
   152  				} else {
   153  					return unablePruneResult
   154  				}
   155  			}
   156  			return &PartitionPruneResult{
   157  				isUnablePrune:  false,
   158  				usedPartitions: usedPartitions,
   159  			}
   160  		} else {
   161  			return unablePruneResult
   162  		}
   163  	} else {
   164  		hitPartitions := make(map[int32]bool)
   165  		for i := range dnfItems {
   166  			if isLogicExpr(dnfItems[i], "and") {
   167  				exprs := SplitCNFItems(dnfItems[i])
   168  				tmp := p.detachCNFCondAndBuildPrune(exprs)
   169  				if tmp.needPushUp || tmp.isUnablePrune {
   170  					return unablePruneResult
   171  				} else {
   172  					hitPartitions = union(hitPartitions, tmp.usedPartitions)
   173  				}
   174  			} else if isExprColRefEqualConst(dnfItems[i]) {
   175  				// 2. extract all colRef to const value from filters
   176  				colEqValMap := make(map[string]*plan.Expr)
   177  				extractColEqValFromEqualExpr(dnfItems[i], colEqValMap)
   178  				if !exprColsIncludePartitionKeys(p.partitionKeysMap, colEqValMap) {
   179  					return unablePruneResult
   180  				}
   181  
   182  				if ok, pidx := p.getUsedPartition(colEqValMap); ok {
   183  					// if pidx=-1, it means that no existing partitions can be selected
   184  					if pidx != -1 {
   185  						hitPartitions[pidx] = true
   186  					}
   187  				} else {
   188  					return unablePruneResult
   189  				}
   190  			} else {
   191  				return unablePruneResult
   192  			}
   193  		}
   194  		return &PartitionPruneResult{
   195  			usedPartitions: hitPartitions,
   196  			isUnablePrune:  false,
   197  		}
   198  	}
   199  }
   200  
   201  func (p *KeyHashPartitionPruner) detachCNFCondAndBuildPrune(conditions []*Expr) *PartitionPruneResult {
   202  	if isAllSimpleExpr(conditions) {
   203  		// 1. Collect equivalent expressions
   204  		if ok, colEqValMap := extractColEqValFromExprs(conditions, p.partitionKeysMap); ok {
   205  			return p.buildPruneResult(colEqValMap)
   206  		} else {
   207  			return &PartitionPruneResult{
   208  				isUnablePrune: true,
   209  			}
   210  		}
   211  	} else if isAllLogicExpr(conditions, "or") {
   212  		return p.buildPruneResultForOrConditions(conditions)
   213  	} else {
   214  		return &PartitionPruneResult{
   215  			isUnablePrune: true,
   216  		}
   217  	}
   218  }
   219  
   220  // buildPruneResult Get hit partitions based on the set of equivalent expressions
   221  func (p *KeyHashPartitionPruner) buildPruneResult(colEqValMap map[string]*plan.Expr) *PartitionPruneResult {
   222  	// Check if the conditions meet the partitioning key
   223  	if len(colEqValMap) != len(p.partitionKeysMap) {
   224  		return &PartitionPruneResult{
   225  			isUnablePrune: true,
   226  			needPushUp:    true,
   227  		}
   228  	}
   229  
   230  	if ok, pid := p.getUsedPartition(colEqValMap); ok {
   231  		hitPartitions := make(map[int32]bool)
   232  		if pid != -1 {
   233  			hitPartitions[pid] = true
   234  		}
   235  		result := &PartitionPruneResult{
   236  			isUnablePrune:  false,
   237  			usedPartitions: hitPartitions,
   238  		}
   239  		return result
   240  	} else {
   241  		return &PartitionPruneResult{
   242  			isUnablePrune: true,
   243  		}
   244  	}
   245  }
   246  
   247  // buildPruneResultForOrConditions Get hit partitions based on the set of disjunction expressions
   248  func (p *KeyHashPartitionPruner) buildPruneResultForOrConditions(conditions []*Expr) *PartitionPruneResult {
   249  	hitPartitions := make(map[int32]bool)
   250  	for i, cond := range conditions {
   251  		tmp := p.detachDNFCondAndBuildPrune(cond)
   252  		if tmp.isUnablePrune {
   253  			return &PartitionPruneResult{
   254  				isUnablePrune: true,
   255  			}
   256  		}
   257  		if i == 0 {
   258  			hitPartitions = tmp.usedPartitions
   259  		} else {
   260  			hitPartitions = intersection(hitPartitions, tmp.usedPartitions)
   261  		}
   262  	}
   263  	return &PartitionPruneResult{
   264  		usedPartitions: hitPartitions,
   265  		isUnablePrune:  false,
   266  	}
   267  }
   268  
   269  // getUsedPartition Calculate the partition based on the constant expression of the partition key column
   270  func (p *KeyHashPartitionPruner) getUsedPartition(cnfColEqVal map[string]*plan.Expr) (bool, int32) {
   271  	// 1.evaluate the partition expr where the colRef assigned with const
   272  	inputBat := batch.NewWithSize(len(p.node.TableDef.GetCols()))
   273  	inputBat.SetRowCount(1)
   274  	defer inputBat.Clean(p.process.Mp())
   275  
   276  	for i, colDef := range p.node.TableDef.GetCols() {
   277  		if valueExpr, ok := cnfColEqVal[colDef.GetName()]; ok {
   278  			colVec, err := colexec.EvalExpressionOnce(p.process, valueExpr, []*batch.Batch{batch.EmptyForConstFoldBatch})
   279  			if err != nil {
   280  				return false, -1
   281  			}
   282  			inputBat.SetVector(int32(i), colVec)
   283  		} else {
   284  			typ := types.New(types.T(colDef.Typ.Id), colDef.Typ.Width, colDef.Typ.Scale)
   285  			colVec := vector.NewConstNull(typ, 1, p.process.Mp())
   286  			inputBat.SetVector(int32(i), colVec)
   287  		}
   288  	}
   289  
   290  	// 2. calculate partition expression
   291  	resVec, err := colexec.EvalExpressionOnce(p.process, p.partitionByDef.PartitionExpression, []*batch.Batch{inputBat})
   292  	if err != nil {
   293  		return false, -1
   294  	}
   295  	defer resVec.Free(p.process.Mp())
   296  
   297  	// 3. prune the partition
   298  	if resVec.IsConstNull() {
   299  		return false, -1
   300  	} else {
   301  		return true, vector.MustFixedCol[int32](resVec)[0]
   302  	}
   303  }
   304  
   305  // intersection Finding the Intersection of Two Map[int32]bool Sets
   306  func intersection(left, right map[int32]bool) map[int32]bool {
   307  	result := make(map[int32]bool)
   308  	for key, value := range left {
   309  		if _, ok := right[key]; ok {
   310  			result[key] = value
   311  		}
   312  	}
   313  	return result
   314  }
   315  
   316  // union Finding the Union of Two Map[int32]bool Sets
   317  func union(left, right map[int32]bool) map[int32]bool {
   318  	result := make(map[int32]bool)
   319  	for key, value := range left {
   320  		result[key] = value
   321  	}
   322  	for key, value := range right {
   323  		result[key] = value
   324  	}
   325  	return result
   326  }
   327  
   328  // SplitCNFItems splits CNF items.
   329  // CNF means conjunctive normal form, such as: "a and b and c".
   330  func SplitCNFItems(onExpr *Expr) []*Expr {
   331  	return splitNormalFormItems(onExpr, "and")
   332  }
   333  
   334  // SplitDNFItems splits DNF items.
   335  // DNF means disjunctive normal form, such as: "a or b or c".
   336  func SplitDNFItems(onExpr *Expr) []*Expr {
   337  	return splitNormalFormItems(onExpr, "or")
   338  }
   339  
   340  // splitNormalFormItems split CNF(conjunctive normal form) like "a and b and c", or DNF(disjunctive normal form) like "a or b or c"
   341  func splitNormalFormItems(onExpr *Expr, funcName string) []*Expr {
   342  	// nolint: revive
   343  	switch v := onExpr.Expr.(type) {
   344  	case *plan.Expr_F:
   345  		if v.F.Func.ObjName == funcName {
   346  			var ret []*Expr
   347  			for _, arg := range v.F.GetArgs() {
   348  				ret = append(ret, splitNormalFormItems(arg, funcName)...)
   349  			}
   350  			return ret
   351  		}
   352  	}
   353  	return []*Expr{onExpr}
   354  }
   355  
   356  // ----------------------------------------------------------------------------------------------------------------------
   357  // extract column equivalent pairs from a equality comparison expression
   358  func extractColEqValFromEqualExpr(expr *plan.Expr, colEqValMap map[string]*plan.Expr) {
   359  	switch exprImpl := expr.Expr.(type) {
   360  	case *plan.Expr_F:
   361  		if exprImpl.F.Func.ObjName == "=" {
   362  			if isColExpr(exprImpl.F.Args[0]) {
   363  				exprCol := exprImpl.F.Args[0].Expr.(*plan.Expr_Col)
   364  				colEqValMap[exprCol.Col.Name] = exprImpl.F.Args[1]
   365  			} else if isColExpr(exprImpl.F.Args[1]) {
   366  				exprCol := exprImpl.F.Args[1].Expr.(*plan.Expr_Col)
   367  				colEqValMap[exprCol.Col.Name] = exprImpl.F.Args[0]
   368  			}
   369  		}
   370  	}
   371  }
   372  
   373  // extract column equivalent pairs from some expressions
   374  func extractColEqValFromExprs(cnfExprs []*Expr, partKeysMap map[string]int) (bool, map[string]*plan.Expr) {
   375  	colEqValMap := make(map[string]*plan.Expr)
   376  	for i := range cnfExprs {
   377  		switch exprImpl := cnfExprs[i].Expr.(type) {
   378  		case *plan.Expr_F:
   379  			if exprImpl.F.Func.ObjName == "=" {
   380  				if isColExpr(exprImpl.F.Args[0]) && isConstExpr(exprImpl.F.Args[1]) {
   381  					exprCol := exprImpl.F.Args[0].Expr.(*plan.Expr_Col)
   382  					if _, ok := partKeysMap[exprCol.Col.Name]; ok {
   383  						colEqValMap[exprCol.Col.Name] = exprImpl.F.Args[1]
   384  					}
   385  				} else if isConstExpr(exprImpl.F.Args[0]) && isColExpr(exprImpl.F.Args[1]) {
   386  					exprCol := exprImpl.F.Args[0].Expr.(*plan.Expr_Col)
   387  					if _, ok := partKeysMap[exprCol.Col.Name]; ok {
   388  						colEqValMap[exprCol.Col.Name] = exprImpl.F.Args[0]
   389  					}
   390  				} else {
   391  					continue
   392  				}
   393  			} else {
   394  				if checkExprContainPartitionKey(cnfExprs[i], partKeysMap) {
   395  					return false, nil
   396  				}
   397  			}
   398  		default:
   399  			if checkExprContainPartitionKey(cnfExprs[i], partKeysMap) {
   400  				return false, nil
   401  			}
   402  		}
   403  	}
   404  	return true, colEqValMap
   405  }
   406  
   407  // Extract columns used in partition expressions
   408  func extractColumnsFromExpression(expr *plan.Expr, usedColumns map[string]int) {
   409  	switch e := expr.Expr.(type) {
   410  	case *plan.Expr_Col:
   411  		if v, ok := usedColumns[e.Col.Name]; ok {
   412  			usedColumns[e.Col.Name] = v + 1
   413  		} else {
   414  			usedColumns[e.Col.Name] = 1
   415  		}
   416  	case *plan.Expr_F:
   417  		for _, args := range e.F.Args {
   418  			extractColumnsFromExpression(args, usedColumns)
   419  		}
   420  	case *plan.Expr_List:
   421  		for _, exprl := range e.List.List {
   422  			extractColumnsFromExpression(exprl, usedColumns)
   423  		}
   424  	default:
   425  		return
   426  	}
   427  }
   428  
   429  // Check if all column equivalence expressions contain all partition keys
   430  func exprColsIncludePartitionKeys(partitionKeys map[string]int, exprCols map[string]*plan.Expr) bool {
   431  	for key := range partitionKeys {
   432  		if !keyIsInExprCols(key, exprCols) {
   433  			return false
   434  		}
   435  	}
   436  	return true
   437  }
   438  
   439  func keyIsInExprCols(key string, exprCols map[string]*plan.Expr) bool {
   440  	for c1 := range exprCols {
   441  		if strings.EqualFold(key, c1) {
   442  			return true
   443  		}
   444  	}
   445  	return false
   446  }
   447  
   448  func isAllSimpleExpr(exprs []*Expr) bool {
   449  	for _, expr := range exprs {
   450  		if !isSimpleExpr(expr) {
   451  			return false
   452  		}
   453  	}
   454  	return true
   455  }
   456  
   457  // checkExprContainPartitionKey Check if the expression contains partitioning keys
   458  func checkExprContainPartitionKey(expr *Expr, partitionKeys map[string]int) bool {
   459  	switch v := expr.Expr.(type) {
   460  	case *plan.Expr_Col:
   461  		if _, ok := partitionKeys[v.Col.Name]; ok {
   462  			return true
   463  		}
   464  	case *plan.Expr_F:
   465  		for _, arg := range v.F.GetArgs() {
   466  			if checkExprContainPartitionKey(arg, partitionKeys) {
   467  				return true
   468  			}
   469  		}
   470  	}
   471  	return false
   472  }
   473  
   474  func isAllColEqualConstExpr(exprs []*Expr) bool {
   475  	for _, expr := range exprs {
   476  		if !isExprColRefEqualConst(expr) {
   477  			return false
   478  		}
   479  	}
   480  	return true
   481  }
   482  
   483  func isAllLogicExpr(exprs []*Expr, funcName string) bool {
   484  	for _, expr := range exprs {
   485  		if !isLogicExpr(expr, funcName) {
   486  			return false
   487  		}
   488  	}
   489  	return true
   490  }
   491  
   492  func isSimpleExpr(expr *Expr) bool {
   493  	switch exprImpl := expr.Expr.(type) {
   494  	case *plan.Expr_F:
   495  		for _, arg := range exprImpl.F.Args {
   496  			if !isFactorExpr(arg) {
   497  				return false
   498  			}
   499  		}
   500  	}
   501  	return true
   502  }
   503  
   504  func isFactorExpr(expr *Expr) bool {
   505  	switch exprImpl := expr.Expr.(type) {
   506  	case *plan.Expr_Col, *plan.Expr_Lit, *plan.Expr_Max, *plan.Expr_T:
   507  		return true
   508  	case *plan.Expr_F:
   509  		if exprImpl.F.Func.ObjName == "cast" {
   510  			return isFactorExpr(exprImpl.F.Args[0])
   511  		} else {
   512  			return false
   513  		}
   514  	default:
   515  		return false
   516  	}
   517  }
   518  
   519  func isLogicExpr(expr *Expr, funcName string) bool {
   520  	switch exprImpl := expr.Expr.(type) {
   521  	case *plan.Expr_F:
   522  		if exprImpl.F.Func.ObjName == funcName {
   523  			return true
   524  		}
   525  	}
   526  	return false
   527  }
   528  
   529  func isExprColRefEqualConst(expr *plan.Expr) bool {
   530  	switch exprImpl := expr.Expr.(type) {
   531  	case *plan.Expr_F:
   532  		if exprImpl.F.Func.ObjName == "=" {
   533  			if isColExpr(exprImpl.F.Args[0]) && isConstExpr(exprImpl.F.Args[1]) ||
   534  				isConstExpr(exprImpl.F.Args[0]) && isColExpr(exprImpl.F.Args[1]) {
   535  				return true
   536  			}
   537  		}
   538  	}
   539  	return false
   540  }
   541  
   542  func isColExpr(expr *plan.Expr) bool {
   543  	switch expr.Expr.(type) {
   544  	case *plan.Expr_Col:
   545  		return true
   546  	}
   547  	return false
   548  }
   549  
   550  func isConstExpr(expr *plan.Expr) bool {
   551  	switch expr.Expr.(type) {
   552  	case *plan.Expr_Lit:
   553  		return true
   554  	}
   555  	return false
   556  }