github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/plan/opt_misc.go (about)

     1  // Copyright 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package plan
    16  
    17  import (
    18  	"strconv"
    19  	"strings"
    20  
    21  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    22  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    23  
    24  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    25  	"github.com/matrixorigin/matrixone/pkg/sql/plan/function"
    26  )
    27  
    28  func (builder *QueryBuilder) countColRefs(nodeID int32, colRefCnt map[[2]int32]int) {
    29  	node := builder.qry.Nodes[nodeID]
    30  
    31  	increaseRefCntForExprList(node.ProjectList, 1, colRefCnt)
    32  	increaseRefCntForExprList(node.OnList, 1, colRefCnt)
    33  	increaseRefCntForExprList(node.FilterList, 1, colRefCnt)
    34  	increaseRefCntForExprList(node.GroupBy, 1, colRefCnt)
    35  	increaseRefCntForExprList(node.GroupingSet, 1, colRefCnt)
    36  	increaseRefCntForExprList(node.AggList, 1, colRefCnt)
    37  	increaseRefCntForExprList(node.WinSpecList, 1, colRefCnt)
    38  	for i := range node.OrderBy {
    39  		increaseRefCnt(node.OrderBy[i].Expr, 1, colRefCnt)
    40  	}
    41  
    42  	for _, childID := range node.Children {
    43  		builder.countColRefs(childID, colRefCnt)
    44  	}
    45  }
    46  
    47  // removeSimpleProjections On top of each subquery or view it has a PROJECT node, which interrupts optimizer rules such as join order.
    48  func (builder *QueryBuilder) removeSimpleProjections(nodeID int32, parentType plan.Node_NodeType, flag bool, colRefCnt map[[2]int32]int) (int32, map[[2]int32]*plan.Expr) {
    49  	node := builder.qry.Nodes[nodeID]
    50  	if node.NodeType == plan.Node_SINK {
    51  		return builder.removeSimpleProjections(node.Children[0], plan.Node_UNKNOWN, flag, colRefCnt)
    52  	}
    53  	projMap := make(map[[2]int32]*plan.Expr)
    54  
    55  	switch node.NodeType {
    56  	case plan.Node_JOIN:
    57  		leftFlag := flag || node.JoinType == plan.Node_RIGHT || node.JoinType == plan.Node_OUTER
    58  		rightFlag := flag || node.JoinType == plan.Node_LEFT || node.JoinType == plan.Node_OUTER
    59  
    60  		newChildID, childProjMap := builder.removeSimpleProjections(node.Children[0], plan.Node_JOIN, leftFlag, colRefCnt)
    61  		node.Children[0] = newChildID
    62  		for ref, expr := range childProjMap {
    63  			projMap[ref] = expr
    64  		}
    65  
    66  		newChildID, childProjMap = builder.removeSimpleProjections(node.Children[1], plan.Node_JOIN, rightFlag, colRefCnt)
    67  		node.Children[1] = newChildID
    68  		for ref, expr := range childProjMap {
    69  			projMap[ref] = expr
    70  		}
    71  
    72  	case plan.Node_AGG, plan.Node_PROJECT, plan.Node_WINDOW, plan.Node_TIME_WINDOW, plan.Node_FILL:
    73  		for i, childID := range node.Children {
    74  			newChildID, childProjMap := builder.removeSimpleProjections(childID, node.NodeType, false, colRefCnt)
    75  			node.Children[i] = newChildID
    76  			for ref, expr := range childProjMap {
    77  				projMap[ref] = expr
    78  			}
    79  		}
    80  
    81  	default:
    82  		for i, childID := range node.Children {
    83  			newChildID, childProjMap := builder.removeSimpleProjections(childID, node.NodeType, flag, colRefCnt)
    84  			node.Children[i] = newChildID
    85  			for ref, expr := range childProjMap {
    86  				projMap[ref] = expr
    87  			}
    88  		}
    89  	}
    90  
    91  	replaceColumnsForNode(node, projMap)
    92  
    93  	if builder.canRemoveProject(parentType, node) {
    94  		allColRef := true
    95  		tag := node.BindingTags[0]
    96  		for i, proj := range node.ProjectList {
    97  			if flag || colRefCnt[[2]int32{tag, int32(i)}] > 1 {
    98  				if proj.GetCol() == nil && (proj.GetLit() == nil || flag) {
    99  					allColRef = false
   100  					break
   101  				}
   102  			}
   103  		}
   104  
   105  		if allColRef {
   106  			tag := node.BindingTags[0]
   107  			for i, proj := range node.ProjectList {
   108  				projMap[[2]int32{tag, int32(i)}] = proj
   109  			}
   110  
   111  			nodeID = node.Children[0]
   112  		}
   113  	}
   114  
   115  	return nodeID, projMap
   116  }
   117  
   118  func increaseRefCntForExprList(exprs []*plan.Expr, inc int, colRefCnt map[[2]int32]int) {
   119  	for _, expr := range exprs {
   120  		increaseRefCnt(expr, inc, colRefCnt)
   121  	}
   122  }
   123  
   124  // FIXME: We should remove PROJECT node for more cases, but keep them now to avoid intricate issues.
   125  func (builder *QueryBuilder) canRemoveProject(parentType plan.Node_NodeType, node *plan.Node) bool {
   126  	if node.NodeType != plan.Node_PROJECT || node.Limit != nil || node.Offset != nil {
   127  		return false
   128  	}
   129  
   130  	if parentType == plan.Node_DISTINCT || parentType == plan.Node_UNKNOWN {
   131  		return false
   132  	}
   133  	if parentType == plan.Node_UNION || parentType == plan.Node_UNION_ALL {
   134  		return false
   135  	}
   136  	if parentType == plan.Node_MINUS || parentType == plan.Node_MINUS_ALL {
   137  		return false
   138  	}
   139  	if parentType == plan.Node_INTERSECT || parentType == plan.Node_INTERSECT_ALL {
   140  		return false
   141  	}
   142  	if parentType == plan.Node_FUNCTION_SCAN || parentType == plan.Node_EXTERNAL_FUNCTION {
   143  		return false
   144  	}
   145  	if parentType == plan.Node_DELETE {
   146  		return false
   147  	}
   148  	if parentType == plan.Node_INSERT || parentType == plan.Node_PRE_INSERT || parentType == plan.Node_PRE_INSERT_UK || parentType == plan.Node_PRE_INSERT_SK {
   149  		return false
   150  	}
   151  
   152  	childType := builder.qry.Nodes[node.Children[0]].NodeType
   153  	if childType == plan.Node_VALUE_SCAN || childType == plan.Node_EXTERNAL_SCAN {
   154  		return false
   155  	}
   156  	if childType == plan.Node_FUNCTION_SCAN || childType == plan.Node_EXTERNAL_FUNCTION {
   157  		return false
   158  	}
   159  
   160  	return true
   161  }
   162  
   163  func replaceColumnsForNode(node *plan.Node, projMap map[[2]int32]*plan.Expr) {
   164  	replaceColumnsForExprList(node.ProjectList, projMap)
   165  	replaceColumnsForExprList(node.OnList, projMap)
   166  	replaceColumnsForExprList(node.FilterList, projMap)
   167  	replaceColumnsForExprList(node.GroupBy, projMap)
   168  	replaceColumnsForExprList(node.GroupingSet, projMap)
   169  	replaceColumnsForExprList(node.AggList, projMap)
   170  	replaceColumnsForExprList(node.WinSpecList, projMap)
   171  	for i := range node.OrderBy {
   172  		node.OrderBy[i].Expr = replaceColumnsForExpr(node.OrderBy[i].Expr, projMap)
   173  	}
   174  }
   175  
   176  func replaceColumnsForExprList(exprList []*plan.Expr, projMap map[[2]int32]*plan.Expr) {
   177  	for i, expr := range exprList {
   178  		exprList[i] = replaceColumnsForExpr(expr, projMap)
   179  	}
   180  }
   181  
   182  func replaceColumnsForExpr(expr *plan.Expr, projMap map[[2]int32]*plan.Expr) *plan.Expr {
   183  	if expr == nil {
   184  		return nil
   185  	}
   186  
   187  	switch ne := expr.Expr.(type) {
   188  	case *plan.Expr_Col:
   189  		mapID := [2]int32{ne.Col.RelPos, ne.Col.ColPos}
   190  		if projExpr, ok := projMap[mapID]; ok {
   191  			return DeepCopyExpr(projExpr)
   192  		}
   193  
   194  	case *plan.Expr_F:
   195  		for i, arg := range ne.F.Args {
   196  			ne.F.Args[i] = replaceColumnsForExpr(arg, projMap)
   197  		}
   198  
   199  	case *plan.Expr_W:
   200  		ne.W.WindowFunc = replaceColumnsForExpr(ne.W.WindowFunc, projMap)
   201  		for i, arg := range ne.W.PartitionBy {
   202  			ne.W.PartitionBy[i] = replaceColumnsForExpr(arg, projMap)
   203  		}
   204  		for i, order := range ne.W.OrderBy {
   205  			ne.W.OrderBy[i].Expr = replaceColumnsForExpr(order.Expr, projMap)
   206  		}
   207  	}
   208  	return expr
   209  }
   210  
   211  func (builder *QueryBuilder) swapJoinChildren(nodeID int32) {
   212  	node := builder.qry.Nodes[nodeID]
   213  
   214  	for _, child := range node.Children {
   215  		builder.swapJoinChildren(child)
   216  	}
   217  
   218  	if node.BuildOnLeft {
   219  		node.Children[0], node.Children[1] = node.Children[1], node.Children[0]
   220  		if node.JoinType == plan.Node_LEFT {
   221  			node.JoinType = plan.Node_RIGHT
   222  		}
   223  	}
   224  }
   225  
   226  func (builder *QueryBuilder) remapHavingClause(expr *plan.Expr, groupTag, aggregateTag int32, groupSize int32) {
   227  	switch exprImpl := expr.Expr.(type) {
   228  	case *plan.Expr_Col:
   229  		if exprImpl.Col.RelPos == groupTag {
   230  			exprImpl.Col.Name = builder.nameByColRef[[2]int32{groupTag, exprImpl.Col.ColPos}]
   231  			exprImpl.Col.RelPos = -1
   232  		} else {
   233  			exprImpl.Col.Name = builder.nameByColRef[[2]int32{aggregateTag, exprImpl.Col.ColPos}]
   234  			exprImpl.Col.RelPos = -2
   235  			exprImpl.Col.ColPos += groupSize
   236  		}
   237  
   238  	case *plan.Expr_F:
   239  		for _, arg := range exprImpl.F.Args {
   240  			builder.remapHavingClause(arg, groupTag, aggregateTag, groupSize)
   241  		}
   242  	}
   243  }
   244  
   245  func (builder *QueryBuilder) remapWindowClause(expr *plan.Expr, windowTag int32, projectionSize int32) {
   246  	switch exprImpl := expr.Expr.(type) {
   247  	case *plan.Expr_Col:
   248  		if exprImpl.Col.RelPos == windowTag {
   249  			exprImpl.Col.Name = builder.nameByColRef[[2]int32{windowTag, exprImpl.Col.ColPos}]
   250  			exprImpl.Col.RelPos = -1
   251  			exprImpl.Col.ColPos += projectionSize
   252  		}
   253  
   254  	case *plan.Expr_F:
   255  		for _, arg := range exprImpl.F.Args {
   256  			builder.remapWindowClause(arg, windowTag, projectionSize)
   257  		}
   258  	}
   259  }
   260  
   261  // if join cond is a=b and a=c, we can remove a=c to improve join performance
   262  func (builder *QueryBuilder) removeRedundantJoinCond(nodeID int32, colMap map[[2]int32]int, colGroup []int) []int {
   263  	if builder.optimizerHints != nil && builder.optimizerHints.removeRedundantJoinCond != 0 {
   264  		return colGroup
   265  	}
   266  	node := builder.qry.Nodes[nodeID]
   267  	for i := range node.Children {
   268  		colGroup = builder.removeRedundantJoinCond(node.Children[i], colMap, colGroup)
   269  	}
   270  	if len(node.OnList) == 0 {
   271  		return colGroup
   272  	}
   273  
   274  	newOnList := make([]*plan.Expr, 0)
   275  	for _, expr := range node.OnList {
   276  		if exprf := expr.GetF(); exprf != nil {
   277  			if IsEqualFunc(exprf.Func.GetObj()) {
   278  				leftcol := exprf.Args[0].GetCol()
   279  				rightcol := exprf.Args[1].GetCol()
   280  				if leftcol != nil && rightcol != nil {
   281  					left, leftok := colMap[[2]int32{leftcol.RelPos, leftcol.ColPos}]
   282  					if !leftok {
   283  						left = len(colGroup)
   284  						colGroup = append(colGroup, left)
   285  						colMap[[2]int32{leftcol.RelPos, leftcol.ColPos}] = left
   286  					}
   287  					right, rightok := colMap[[2]int32{rightcol.RelPos, rightcol.ColPos}]
   288  					if !rightok {
   289  						right = len(colGroup)
   290  						colGroup = append(colGroup, right)
   291  						colMap[[2]int32{rightcol.RelPos, rightcol.ColPos}] = right
   292  					}
   293  					for colGroup[left] != colGroup[colGroup[left]] {
   294  						colGroup[left] = colGroup[colGroup[left]]
   295  					}
   296  					for colGroup[right] != colGroup[colGroup[right]] {
   297  						colGroup[right] = colGroup[colGroup[right]]
   298  					}
   299  					if colGroup[left] == colGroup[right] {
   300  						continue
   301  					}
   302  					newOnList = append(newOnList, expr)
   303  					colGroup[colGroup[left]] = colGroup[right]
   304  				} else {
   305  					newOnList = append(newOnList, expr)
   306  				}
   307  			} else {
   308  				newOnList = append(newOnList, expr)
   309  			}
   310  		} else {
   311  			newOnList = append(newOnList, expr)
   312  		}
   313  	}
   314  	node.OnList = newOnList
   315  
   316  	return colGroup
   317  }
   318  
   319  func (builder *QueryBuilder) removeEffectlessLeftJoins(nodeID int32, tagCnt map[int32]int) int32 {
   320  	if builder.optimizerHints != nil && builder.optimizerHints.removeEffectLessLeftJoins != 0 {
   321  		return nodeID
   322  	}
   323  	node := builder.qry.Nodes[nodeID]
   324  	if len(node.Children) == 0 {
   325  		return nodeID
   326  	}
   327  
   328  	increaseTagCntForExprList(node.ProjectList, 1, tagCnt)
   329  	increaseTagCntForExprList(node.OnList, 1, tagCnt)
   330  	increaseTagCntForExprList(node.FilterList, 1, tagCnt)
   331  	increaseTagCntForExprList(node.GroupBy, 1, tagCnt)
   332  	increaseTagCntForExprList(node.GroupingSet, 1, tagCnt)
   333  	increaseTagCntForExprList(node.AggList, 1, tagCnt)
   334  	increaseTagCntForExprList(node.WinSpecList, 1, tagCnt)
   335  	for i := range node.OrderBy {
   336  		increaseTagCnt(node.OrderBy[i].Expr, 1, tagCnt)
   337  	}
   338  	for i, childID := range node.Children {
   339  		node.Children[i] = builder.removeEffectlessLeftJoins(childID, tagCnt)
   340  	}
   341  	increaseTagCntForExprList(node.OnList, -1, tagCnt)
   342  
   343  	if node.NodeType != plan.Node_JOIN || node.JoinType != plan.Node_LEFT {
   344  		goto END
   345  	}
   346  
   347  	// if output column is in right, can not optimize this one
   348  	for _, tag := range builder.enumerateTags(node.Children[1]) {
   349  		if tagCnt[tag] > 0 {
   350  			goto END
   351  		}
   352  	}
   353  
   354  	//reuse hash on primary key logic
   355  	if !node.Stats.HashmapStats.HashOnPK {
   356  		goto END
   357  	}
   358  
   359  	nodeID = node.Children[0]
   360  
   361  END:
   362  	increaseTagCntForExprList(node.ProjectList, -1, tagCnt)
   363  	increaseTagCntForExprList(node.FilterList, -1, tagCnt)
   364  	increaseTagCntForExprList(node.GroupBy, -1, tagCnt)
   365  	increaseTagCntForExprList(node.GroupingSet, -1, tagCnt)
   366  	increaseTagCntForExprList(node.AggList, -1, tagCnt)
   367  	increaseTagCntForExprList(node.WinSpecList, -1, tagCnt)
   368  	for i := range node.OrderBy {
   369  		increaseTagCnt(node.OrderBy[i].Expr, -1, tagCnt)
   370  	}
   371  
   372  	return nodeID
   373  }
   374  
   375  func increaseTagCntForExprList(exprs []*plan.Expr, inc int, tagCnt map[int32]int) {
   376  	for _, expr := range exprs {
   377  		increaseTagCnt(expr, inc, tagCnt)
   378  	}
   379  }
   380  
   381  func increaseTagCnt(expr *plan.Expr, inc int, tagCnt map[int32]int) {
   382  	switch exprImpl := expr.Expr.(type) {
   383  	case *plan.Expr_Col:
   384  		tagCnt[exprImpl.Col.RelPos] += inc
   385  
   386  	case *plan.Expr_F:
   387  		for _, arg := range exprImpl.F.Args {
   388  			increaseTagCnt(arg, inc, tagCnt)
   389  		}
   390  	case *plan.Expr_W:
   391  		increaseTagCnt(exprImpl.W.WindowFunc, inc, tagCnt)
   392  		for _, arg := range exprImpl.W.PartitionBy {
   393  			increaseTagCnt(arg, inc, tagCnt)
   394  		}
   395  		for _, order := range exprImpl.W.OrderBy {
   396  			increaseTagCnt(order.Expr, inc, tagCnt)
   397  		}
   398  	}
   399  }
   400  
   401  func findHashOnPKTable(nodeID, tag int32, builder *QueryBuilder) *plan.TableDef {
   402  	node := builder.qry.Nodes[nodeID]
   403  	if node.NodeType == plan.Node_TABLE_SCAN {
   404  		if node.BindingTags[0] == tag {
   405  			return node.TableDef
   406  		}
   407  	} else if node.NodeType == plan.Node_JOIN && node.JoinType == plan.Node_INNER {
   408  		if node.Stats.HashmapStats.HashOnPK {
   409  			return findHashOnPKTable(node.Children[0], tag, builder)
   410  		}
   411  	}
   412  	return nil
   413  }
   414  
   415  func determineHashOnPK(nodeID int32, builder *QueryBuilder) {
   416  	if builder.optimizerHints != nil && builder.optimizerHints.determineHashOnPK != 0 {
   417  		return
   418  	}
   419  	node := builder.qry.Nodes[nodeID]
   420  	if len(node.Children) > 0 {
   421  		for _, child := range node.Children {
   422  			determineHashOnPK(child, builder)
   423  		}
   424  	}
   425  
   426  	if node.NodeType != plan.Node_JOIN {
   427  		return
   428  	}
   429  
   430  	leftTags := make(map[int32]bool)
   431  	for _, tag := range builder.enumerateTags(node.Children[0]) {
   432  		leftTags[tag] = true
   433  	}
   434  
   435  	rightTags := make(map[int32]bool)
   436  	for _, tag := range builder.enumerateTags(node.Children[1]) {
   437  		rightTags[tag] = true
   438  	}
   439  
   440  	exprs := make([]*plan.Expr, 0)
   441  	for _, expr := range node.OnList {
   442  		if equi := isEquiCond(expr, leftTags, rightTags); equi {
   443  			exprs = append(exprs, expr)
   444  		}
   445  	}
   446  
   447  	hashCols := make([]*plan.ColRef, 0)
   448  	for _, cond := range exprs {
   449  		switch condImpl := cond.Expr.(type) {
   450  		case *plan.Expr_F:
   451  			expr := condImpl.F.Args[1]
   452  			switch exprImpl := expr.Expr.(type) {
   453  			case *plan.Expr_Col:
   454  				hashCols = append(hashCols, exprImpl.Col)
   455  			}
   456  		}
   457  	}
   458  
   459  	if len(hashCols) == 0 {
   460  		return
   461  	}
   462  
   463  	tableDef := findHashOnPKTable(node.Children[1], hashCols[0].RelPos, builder)
   464  	if tableDef == nil {
   465  		return
   466  	}
   467  	hashColPos := make([]int32, len(hashCols))
   468  	for i := range hashCols {
   469  		hashColPos[i] = hashCols[i].ColPos
   470  	}
   471  	if containsAllPKs(hashColPos, tableDef) {
   472  		node.Stats.HashmapStats.HashOnPK = true
   473  	}
   474  
   475  }
   476  
   477  func getHashColsNDVRatio(nodeID int32, builder *QueryBuilder) float64 {
   478  	node := builder.qry.Nodes[nodeID]
   479  	if node.NodeType != plan.Node_JOIN {
   480  		return 1
   481  	}
   482  	result := getHashColsNDVRatio(builder.qry.Nodes[node.Children[1]].NodeId, builder)
   483  
   484  	leftTags := make(map[int32]bool)
   485  	for _, tag := range builder.enumerateTags(node.Children[0]) {
   486  		leftTags[tag] = true
   487  	}
   488  
   489  	rightTags := make(map[int32]bool)
   490  	for _, tag := range builder.enumerateTags(node.Children[1]) {
   491  		rightTags[tag] = true
   492  	}
   493  
   494  	exprs := make([]*plan.Expr, 0)
   495  	for _, expr := range node.OnList {
   496  		if equi := isEquiCond(expr, leftTags, rightTags); equi {
   497  			exprs = append(exprs, expr)
   498  		}
   499  	}
   500  
   501  	hashCols := make([]*plan.ColRef, 0)
   502  	for _, cond := range exprs {
   503  		switch condImpl := cond.Expr.(type) {
   504  		case *plan.Expr_F:
   505  			expr := condImpl.F.Args[1]
   506  			switch exprImpl := expr.Expr.(type) {
   507  			case *plan.Expr_Col:
   508  				hashCols = append(hashCols, exprImpl.Col)
   509  			}
   510  		}
   511  	}
   512  
   513  	if len(hashCols) == 0 {
   514  		return 0.0001
   515  	}
   516  
   517  	tableDef := findHashOnPKTable(node.Children[1], hashCols[0].RelPos, builder)
   518  	if tableDef == nil {
   519  		return 0.0001
   520  	}
   521  	hashColPos := make([]int32, len(hashCols))
   522  	for i := range hashCols {
   523  		hashColPos[i] = hashCols[i].ColPos
   524  	}
   525  	return builder.getColNDVRatio(hashColPos, tableDef) * result
   526  }
   527  
   528  func checkExprInTags(expr *plan.Expr, tags []int32) bool {
   529  	switch exprImpl := expr.Expr.(type) {
   530  	case *plan.Expr_F:
   531  		for i := range exprImpl.F.Args {
   532  			if !checkExprInTags(exprImpl.F.Args[i], tags) {
   533  				return false
   534  			}
   535  		}
   536  		return true
   537  
   538  	case *plan.Expr_Col:
   539  		for i := range tags {
   540  			if tags[i] == exprImpl.Col.RelPos {
   541  				return true
   542  			}
   543  		}
   544  	}
   545  	return false
   546  }
   547  
   548  func (builder *QueryBuilder) rewriteDistinctToAGG(nodeID int32) {
   549  	node := builder.qry.Nodes[nodeID]
   550  	if len(node.Children) > 0 {
   551  		for _, child := range node.Children {
   552  			builder.rewriteDistinctToAGG(child)
   553  		}
   554  	}
   555  	if node.NodeType != plan.Node_DISTINCT {
   556  		return
   557  	}
   558  	project := builder.qry.Nodes[node.Children[0]]
   559  	if project.NodeType != plan.Node_PROJECT {
   560  		return
   561  	}
   562  	if builder.qry.Nodes[project.Children[0]].NodeType == plan.Node_VALUE_SCAN {
   563  		return
   564  	}
   565  
   566  	node.NodeType = plan.Node_AGG
   567  	node.GroupBy = project.ProjectList
   568  	node.BindingTags = project.BindingTags
   569  	node.BindingTags = append(node.BindingTags, builder.genNewTag())
   570  	node.Children[0] = project.Children[0]
   571  }
   572  
   573  // reuse removeSimpleProjections to delete this plan node
   574  func (builder *QueryBuilder) rewriteEffectlessAggToProject(nodeID int32) {
   575  	node := builder.qry.Nodes[nodeID]
   576  	if len(node.Children) > 0 {
   577  		for _, child := range node.Children {
   578  			builder.rewriteEffectlessAggToProject(child)
   579  		}
   580  	}
   581  	if node.NodeType != plan.Node_AGG {
   582  		return
   583  	}
   584  	if node.AggList != nil || node.ProjectList != nil || node.FilterList != nil {
   585  		return
   586  	}
   587  	scan := builder.qry.Nodes[node.Children[0]]
   588  	if scan.NodeType != plan.Node_TABLE_SCAN {
   589  		return
   590  	}
   591  	groupCol := make([]int32, 0)
   592  	for _, expr := range node.GroupBy {
   593  		if col := expr.GetCol(); col != nil {
   594  			groupCol = append(groupCol, col.ColPos)
   595  		}
   596  	}
   597  	if !containsAllPKs(groupCol, scan.TableDef) {
   598  		return
   599  	}
   600  	node.NodeType = plan.Node_PROJECT
   601  	node.BindingTags = node.BindingTags[:1]
   602  	node.ProjectList = node.GroupBy
   603  	node.GroupBy = nil
   604  }
   605  
   606  func makeBetweenExprFromDateFormat(equalFunc *plan.Function, dateformatFunc *plan.Function, intervalStr string, builder *QueryBuilder) *plan.Expr {
   607  	dateExpr := DeepCopyExpr(equalFunc.Args[1])
   608  	if intervalStr == "year" {
   609  		sval, _ := dateExpr.GetLit().GetValue().(*plan.Literal_Sval)
   610  		sval.Sval = sval.Sval + "0101"
   611  	}
   612  	begin, err := forceCastExpr(builder.GetContext(), dateExpr, dateformatFunc.Args[0].Typ)
   613  	if err != nil {
   614  		return nil
   615  	}
   616  	begin, err = ConstantFold(batch.EmptyForConstFoldBatch, begin, builder.compCtx.GetProcess(), false)
   617  	if err != nil {
   618  		return nil
   619  	}
   620  	interval := MakeIntervalExpr(1, intervalStr)
   621  	end, err := bindFuncExprAndConstFold(builder.GetContext(), builder.compCtx.GetProcess(), "+", []*Expr{DeepCopyExpr(begin), interval})
   622  	if err != nil {
   623  		return nil
   624  	}
   625  	interval = MakeIntervalExpr(1, "microsecond")
   626  	end, err = bindFuncExprAndConstFold(builder.GetContext(), builder.compCtx.GetProcess(), "-", []*Expr{DeepCopyExpr(end), interval})
   627  	if err != nil {
   628  		return nil
   629  	}
   630  	args := []*Expr{dateformatFunc.Args[0], begin, end}
   631  	newFilter, err := bindFuncExprAndConstFold(builder.GetContext(), builder.compCtx.GetProcess(), "between", args)
   632  	if err != nil {
   633  		return nil
   634  	}
   635  	return newFilter
   636  }
   637  
   638  func (builder *QueryBuilder) optimizeDateFormatExpr(nodeID int32) {
   639  	if builder.optimizerHints != nil && builder.optimizerHints.optimizeDateFormatExpr != 0 {
   640  		return
   641  	}
   642  	// for date_format(col,'%Y-%m-%d')= '2024-01-19', change this to col between [2024-01-19 00:00:00,2024-01-19 23:59:59]
   643  	node := builder.qry.Nodes[nodeID]
   644  	for _, childID := range node.Children {
   645  		builder.optimizeDateFormatExpr(childID)
   646  	}
   647  	if node.NodeType != plan.Node_TABLE_SCAN || len(node.FilterList) == 0 {
   648  		return
   649  	}
   650  	for i := range node.FilterList {
   651  		expr := node.FilterList[i]
   652  		equalFunc := expr.GetF()
   653  		if equalFunc != nil && equalFunc.Func.ObjName == "=" {
   654  			dateformatFunc := equalFunc.Args[0].GetF()
   655  			if dateformatFunc == nil || dateformatFunc.Func.ObjName != "date_format" {
   656  				continue
   657  			}
   658  			col := dateformatFunc.Args[0].GetCol()
   659  			if col == nil {
   660  				continue
   661  			}
   662  			if dateformatFunc.Args[1].GetLit() == nil {
   663  				continue
   664  			}
   665  			str := dateformatFunc.Args[1].GetLit().GetSval()
   666  			if len(str) == 0 {
   667  				continue
   668  			}
   669  			if equalFunc.Args[1].GetLit() == nil {
   670  				continue
   671  			}
   672  			dateSval := equalFunc.Args[1].GetLit().GetSval()
   673  			var newFilter *plan.Expr
   674  			switch str {
   675  			case "%Y-%m-%d":
   676  				if len(dateSval) != 10 || dateSval[4] != '-' || dateSval[7] != '-' {
   677  					continue
   678  				}
   679  				newFilter = makeBetweenExprFromDateFormat(equalFunc, dateformatFunc, "day", builder)
   680  			case "%Y%m%d":
   681  				if len(dateSval) != 8 {
   682  					continue
   683  				}
   684  				newFilter = makeBetweenExprFromDateFormat(equalFunc, dateformatFunc, "day", builder)
   685  			case "%Y":
   686  				if len(dateSval) != 4 {
   687  					continue
   688  				}
   689  				newFilter = makeBetweenExprFromDateFormat(equalFunc, dateformatFunc, "year", builder)
   690  			}
   691  			if newFilter != nil {
   692  				node.FilterList[i] = newFilter
   693  			}
   694  		}
   695  	}
   696  }
   697  
   698  func (builder *QueryBuilder) optimizeLikeExpr(nodeID int32) {
   699  	if builder.optimizerHints != nil && builder.optimizerHints.optimizeLikeExpr != 0 {
   700  		return
   701  	}
   702  	// for a like "abc%", change it to prefix_equal(a,"abc")
   703  	// for a like "abc%def", add an extra filter prefix_equal(a,"abc")
   704  	node := builder.qry.Nodes[nodeID]
   705  
   706  	for _, childID := range node.Children {
   707  		builder.optimizeLikeExpr(childID)
   708  	}
   709  	if node.NodeType != plan.Node_TABLE_SCAN || len(node.FilterList) == 0 {
   710  		return
   711  	}
   712  	var newFilters []*plan.Expr
   713  	for i := range node.FilterList {
   714  		expr := node.FilterList[i]
   715  		fun := expr.GetF()
   716  		if fun != nil && fun.Func.ObjName == "like" {
   717  			col := fun.Args[0].GetCol()
   718  			if col == nil {
   719  				continue
   720  			}
   721  			if fun.Args[1].GetLit() == nil {
   722  				continue
   723  			}
   724  			str := fun.Args[1].GetLit().GetSval()
   725  			if len(str) == 0 {
   726  				continue
   727  			}
   728  			index1 := strings.IndexByte(str, '_')
   729  			if index1 > 0 && str[index1-1] == '\\' {
   730  				index1--
   731  			}
   732  			index2 := strings.IndexByte(str, '%')
   733  			if index2 > 0 && str[index2-1] == '\\' {
   734  				index2--
   735  			}
   736  			if index1 == -1 && index2 == -1 {
   737  				// it's col like string without wildcard, can change to equal
   738  				fun.Func.ObjName = function.EqualFunctionName
   739  				fun.Func.Obj = function.EqualFunctionEncodedID
   740  				continue
   741  			}
   742  
   743  			indexOfWildCard := index1
   744  			if index1 == -1 {
   745  				indexOfWildCard = index2
   746  			}
   747  			if index2 != -1 && index2 < index1 {
   748  				indexOfWildCard = index2
   749  			}
   750  			if indexOfWildCard <= 0 {
   751  				continue
   752  			}
   753  			newStr := str[:indexOfWildCard]
   754  
   755  			newFilter := node.FilterList[i]
   756  			// if no _ and % in the last, we can replace the origin filter
   757  			replaceOrigin := (index1 == -1) && (index2 == len(str)-1)
   758  			if !replaceOrigin {
   759  				newFilter = DeepCopyExpr(newFilter)
   760  				newFilters = append(newFilters, newFilter)
   761  			}
   762  			newFunc := newFilter.GetF()
   763  			newFunc.Func.ObjName = function.PrefixEqualFunctionName
   764  			newFunc.Func.Obj = function.PrefixEqualFunctionEncodedID
   765  			newFunc.Args[1].GetLit().Value.(*plan.Literal_Sval).Sval = newStr
   766  			if replaceOrigin {
   767  				node.BlockFilterList = append(node.BlockFilterList, DeepCopyExpr(newFilter))
   768  			}
   769  		}
   770  	}
   771  	if len(newFilters) > 0 {
   772  		node.FilterList = append(node.FilterList, newFilters...)
   773  		node.BlockFilterList = append(node.BlockFilterList, DeepCopyExprList(newFilters)...)
   774  	}
   775  }
   776  
   777  func (builder *QueryBuilder) forceJoinOnOneCN(nodeID int32, force bool) {
   778  	node := builder.qry.Nodes[nodeID]
   779  	if node.NodeType == plan.Node_TABLE_SCAN {
   780  		node.Stats.ForceOneCN = force
   781  	} else if node.NodeType == plan.Node_JOIN {
   782  		if len(node.RuntimeFilterBuildList) > 0 {
   783  			switch node.JoinType {
   784  			case plan.Node_RIGHT:
   785  				if !node.Stats.HashmapStats.Shuffle {
   786  					force = true
   787  				}
   788  			case plan.Node_SEMI, plan.Node_ANTI:
   789  				if node.BuildOnLeft && !node.Stats.HashmapStats.Shuffle {
   790  					force = true
   791  				}
   792  			case plan.Node_INDEX:
   793  				force = true
   794  			}
   795  		}
   796  	}
   797  	for _, childID := range node.Children {
   798  		builder.forceJoinOnOneCN(childID, force)
   799  	}
   800  }
   801  
   802  func handleOptimizerHints(str string, builder *QueryBuilder) {
   803  	strs := strings.Split(str, "=")
   804  	if len(strs) != 2 {
   805  		return
   806  	}
   807  	key := strs[0]
   808  	value, err := strconv.Atoi(strs[1])
   809  	if err != nil {
   810  		return
   811  	}
   812  	if builder.optimizerHints == nil {
   813  		builder.optimizerHints = &OptimizerHints{}
   814  	}
   815  	switch key {
   816  	case "pushDownLimitToScan":
   817  		builder.optimizerHints.pushDownLimitToScan = value
   818  	case "pushDownTopThroughLeftJoin":
   819  		builder.optimizerHints.pushDownTopThroughLeftJoin = value
   820  	case "pushDownSemiAntiJoins":
   821  		builder.optimizerHints.pushDownSemiAntiJoins = value
   822  	case "aggPushDown":
   823  		builder.optimizerHints.aggPushDown = value
   824  	case "aggPullUp":
   825  		builder.optimizerHints.aggPullUp = value
   826  	case "removeEffectLessLeftJoins":
   827  		builder.optimizerHints.removeEffectLessLeftJoins = value
   828  	case "removeRedundantJoinCond":
   829  		builder.optimizerHints.removeRedundantJoinCond = value
   830  	case "optimizeLikeExpr":
   831  		builder.optimizerHints.optimizeLikeExpr = value
   832  	case "optimizeDateFormatExpr":
   833  		builder.optimizerHints.optimizeDateFormatExpr = value
   834  	case "determineHashOnPK":
   835  		builder.optimizerHints.determineHashOnPK = value
   836  	case "sendMessageFromTopToScan":
   837  		builder.optimizerHints.sendMessageFromTopToScan = value
   838  	case "determineShuffle":
   839  		builder.optimizerHints.determineShuffle = value
   840  	case "blockFilter":
   841  		builder.optimizerHints.blockFilter = value
   842  	case "applyIndices":
   843  		builder.optimizerHints.applyIndices = value
   844  	case "runtimeFilter":
   845  		builder.optimizerHints.runtimeFilter = value
   846  	case "joinOrdering":
   847  		builder.optimizerHints.joinOrdering = value
   848  	}
   849  }
   850  
   851  func (builder *QueryBuilder) parseOptimizeHints() {
   852  	v, ok := runtime.ProcessLevelRuntime().GetGlobalVariables("optimizer_hints")
   853  	if !ok {
   854  		return
   855  	}
   856  	str := v.(string)
   857  	if len(str) == 0 {
   858  		return
   859  	}
   860  	kvs := strings.Split(str, ",")
   861  	for i := range kvs {
   862  		handleOptimizerHints(kvs[i], builder)
   863  	}
   864  }