github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/plan/pushdown.go (about)

     1  // Copyright 2024 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package plan
    16  
    17  import "github.com/matrixorigin/matrixone/pkg/pb/plan"
    18  
    19  func (builder *QueryBuilder) pushdownFilters(nodeID int32, filters []*plan.Expr, separateNonEquiConds bool) (int32, []*plan.Expr) {
    20  	node := builder.qry.Nodes[nodeID]
    21  
    22  	var canPushdown, cantPushdown []*plan.Expr
    23  
    24  	if node.Limit != nil {
    25  		// can not push down over limit
    26  		cantPushdown = filters
    27  		filters = nil
    28  	}
    29  
    30  	switch node.NodeType {
    31  	case plan.Node_AGG:
    32  		groupTag := node.BindingTags[0]
    33  		aggregateTag := node.BindingTags[1]
    34  
    35  		for _, filter := range filters {
    36  			if !containsTag(filter, aggregateTag) {
    37  				canPushdown = append(canPushdown, replaceColRefs(filter, groupTag, node.GroupBy))
    38  			} else {
    39  				node.FilterList = append(node.FilterList, filter)
    40  			}
    41  		}
    42  
    43  		childID, cantPushdownChild := builder.pushdownFilters(node.Children[0], canPushdown, separateNonEquiConds)
    44  
    45  		if len(cantPushdownChild) > 0 {
    46  			childID = builder.appendNode(&plan.Node{
    47  				NodeType:   plan.Node_FILTER,
    48  				Children:   []int32{node.Children[0]},
    49  				FilterList: cantPushdownChild,
    50  			}, nil)
    51  		}
    52  
    53  		node.Children[0] = childID
    54  
    55  	case plan.Node_SAMPLE:
    56  		groupTag := node.BindingTags[0]
    57  		sampleTag := node.BindingTags[1]
    58  
    59  		for _, filter := range filters {
    60  			if !containsTag(filter, sampleTag) {
    61  				canPushdown = append(canPushdown, replaceColRefs(filter, groupTag, node.GroupBy))
    62  			} else {
    63  				node.FilterList = append(node.FilterList, filter)
    64  			}
    65  		}
    66  
    67  		childID, cantPushdownChild := builder.pushdownFilters(node.Children[0], canPushdown, separateNonEquiConds)
    68  
    69  		if len(cantPushdownChild) > 0 {
    70  			childID = builder.appendNode(&plan.Node{
    71  				NodeType:   plan.Node_FILTER,
    72  				Children:   []int32{node.Children[0]},
    73  				FilterList: cantPushdownChild,
    74  			}, nil)
    75  		}
    76  
    77  		node.Children[0] = childID
    78  
    79  	case plan.Node_WINDOW:
    80  		windowTag := node.BindingTags[0]
    81  
    82  		for _, filter := range filters {
    83  			if !containsTag(filter, windowTag) {
    84  				canPushdown = append(canPushdown, replaceColRefs(filter, windowTag, node.WinSpecList))
    85  			} else {
    86  				node.FilterList = append(node.FilterList, filter)
    87  			}
    88  		}
    89  
    90  		childID, cantPushdownChild := builder.pushdownFilters(node.Children[0], canPushdown, separateNonEquiConds)
    91  
    92  		if len(cantPushdownChild) > 0 {
    93  			childID = builder.appendNode(&plan.Node{
    94  				NodeType:   plan.Node_FILTER,
    95  				Children:   []int32{node.Children[0]},
    96  				FilterList: cantPushdownChild,
    97  			}, nil)
    98  		}
    99  
   100  		node.Children[0] = childID
   101  
   102  	case plan.Node_TIME_WINDOW:
   103  		windowTag := node.BindingTags[0]
   104  
   105  		for _, filter := range filters {
   106  			if !containsTag(filter, windowTag) {
   107  				canPushdown = append(canPushdown, replaceColRefs(filter, windowTag, node.WinSpecList))
   108  			} else {
   109  				node.FilterList = append(node.FilterList, filter)
   110  			}
   111  		}
   112  
   113  		childID, cantPushdownChild := builder.pushdownFilters(node.Children[0], canPushdown, separateNonEquiConds)
   114  
   115  		if len(cantPushdownChild) > 0 {
   116  			childID = builder.appendNode(&plan.Node{
   117  				NodeType:   plan.Node_FILTER,
   118  				Children:   []int32{node.Children[0]},
   119  				FilterList: cantPushdownChild,
   120  			}, nil)
   121  		}
   122  
   123  		node.Children[0] = childID
   124  
   125  	case plan.Node_FILTER:
   126  		canPushdown = filters
   127  		for _, filter := range node.FilterList {
   128  			canPushdown = append(canPushdown, splitPlanConjunction(applyDistributivity(builder.GetContext(), filter))...)
   129  		}
   130  
   131  		childID, cantPushdownChild := builder.pushdownFilters(node.Children[0], canPushdown, separateNonEquiConds)
   132  
   133  		if len(cantPushdownChild) > 0 {
   134  			node.Children[0] = childID
   135  			node.FilterList = cantPushdownChild
   136  		} else {
   137  			nodeID = childID
   138  		}
   139  
   140  	case plan.Node_JOIN:
   141  		leftTags := make(map[int32]bool)
   142  		for _, tag := range builder.enumerateTags(node.Children[0]) {
   143  			leftTags[tag] = true
   144  		}
   145  
   146  		rightTags := make(map[int32]bool)
   147  		for _, tag := range builder.enumerateTags(node.Children[1]) {
   148  			rightTags[tag] = true
   149  		}
   150  
   151  		var markTag int32
   152  		if node.JoinType == plan.Node_MARK {
   153  			markTag = node.BindingTags[0]
   154  		}
   155  
   156  		node.OnList = splitPlanConjunctions(node.OnList)
   157  
   158  		if node.JoinType == plan.Node_INNER {
   159  			for _, cond := range node.OnList {
   160  				filters = append(filters, splitPlanConjunction(applyDistributivity(builder.GetContext(), cond))...)
   161  			}
   162  
   163  			node.OnList = nil
   164  		}
   165  
   166  		var leftPushdown, rightPushdown []*plan.Expr
   167  		var turnInner bool
   168  
   169  		joinSides := make([]int8, len(filters))
   170  
   171  		for i, filter := range filters {
   172  			canTurnInner := true
   173  
   174  			joinSides[i] = getJoinSide(filter, leftTags, rightTags, markTag)
   175  			if f := filter.GetF(); f != nil {
   176  				for _, arg := range f.Args {
   177  					if getJoinSide(arg, leftTags, rightTags, markTag) == JoinSideBoth {
   178  						canTurnInner = false
   179  						break
   180  					}
   181  				}
   182  			}
   183  
   184  			if canTurnInner && node.JoinType == plan.Node_LEFT && joinSides[i]&JoinSideRight != 0 && rejectsNull(filter, builder.compCtx.GetProcess()) {
   185  				for _, cond := range node.OnList {
   186  					filters = append(filters, splitPlanConjunction(applyDistributivity(builder.GetContext(), cond))...)
   187  				}
   188  
   189  				node.JoinType = plan.Node_INNER
   190  				node.OnList = nil
   191  				turnInner = true
   192  
   193  				break
   194  			}
   195  
   196  			// TODO: FULL OUTER join should be handled here. However we don't have FULL OUTER join now.
   197  		}
   198  
   199  		if turnInner {
   200  			joinSides = make([]int8, len(filters))
   201  
   202  			for i, filter := range filters {
   203  				joinSides[i] = getJoinSide(filter, leftTags, rightTags, markTag)
   204  			}
   205  		} else if node.JoinType == plan.Node_LEFT {
   206  			var newOnList []*plan.Expr
   207  			for _, cond := range node.OnList {
   208  				conj := splitPlanConjunction(applyDistributivity(builder.GetContext(), cond))
   209  				for _, conjElem := range conj {
   210  					side := getJoinSide(conjElem, leftTags, rightTags, markTag)
   211  					if side&JoinSideLeft == 0 {
   212  						rightPushdown = append(rightPushdown, conjElem)
   213  					} else {
   214  						newOnList = append(newOnList, conjElem)
   215  					}
   216  				}
   217  			}
   218  
   219  			node.OnList = newOnList
   220  		}
   221  
   222  		if !separateNonEquiConds {
   223  			var extraFilters []*plan.Expr
   224  			for i, filter := range filters {
   225  				if joinSides[i] != JoinSideBoth {
   226  					continue
   227  				}
   228  				switch exprImpl := filter.Expr.(type) {
   229  				case *plan.Expr_F:
   230  					if exprImpl.F.Func.ObjName == "or" {
   231  						keys := checkDNF(filter)
   232  						for _, key := range keys {
   233  							extraFilter := walkThroughDNF(builder.GetContext(), filter, key)
   234  							if extraFilter != nil {
   235  								extraFilters = append(extraFilters, DeepCopyExpr(extraFilter))
   236  								joinSides = append(joinSides, getJoinSide(extraFilter, leftTags, rightTags, markTag))
   237  							}
   238  						}
   239  					}
   240  				}
   241  			}
   242  			filters = append(filters, extraFilters...)
   243  		}
   244  
   245  		for i, filter := range filters {
   246  			switch joinSides[i] {
   247  			case JoinSideNone:
   248  				if filter.GetLit().GetBval() {
   249  					break
   250  				}
   251  
   252  				switch node.JoinType {
   253  				case plan.Node_INNER:
   254  					leftPushdown = append(leftPushdown, DeepCopyExpr(filter))
   255  					rightPushdown = append(rightPushdown, filter)
   256  
   257  				case plan.Node_LEFT, plan.Node_SEMI, plan.Node_ANTI, plan.Node_SINGLE, plan.Node_MARK:
   258  					leftPushdown = append(leftPushdown, filter)
   259  
   260  				default:
   261  					cantPushdown = append(cantPushdown, filter)
   262  				}
   263  
   264  			case JoinSideLeft:
   265  				if node.JoinType != plan.Node_OUTER {
   266  					leftPushdown = append(leftPushdown, filter)
   267  				} else {
   268  					cantPushdown = append(cantPushdown, filter)
   269  				}
   270  
   271  			case JoinSideRight:
   272  				if node.JoinType == plan.Node_INNER {
   273  					rightPushdown = append(rightPushdown, filter)
   274  				} else {
   275  					cantPushdown = append(cantPushdown, filter)
   276  				}
   277  
   278  			case JoinSideBoth:
   279  				if node.JoinType == plan.Node_INNER {
   280  					if separateNonEquiConds {
   281  						if f := filter.GetF(); f != nil {
   282  							if f.Func.ObjName == "=" {
   283  								if getJoinSide(f.Args[0], leftTags, rightTags, markTag) != JoinSideBoth {
   284  									if getJoinSide(f.Args[1], leftTags, rightTags, markTag) != JoinSideBoth {
   285  										node.OnList = append(node.OnList, filter)
   286  										break
   287  									}
   288  								}
   289  							}
   290  						}
   291  					} else {
   292  						node.OnList = append(node.OnList, filter)
   293  						break
   294  					}
   295  				}
   296  
   297  				cantPushdown = append(cantPushdown, filter)
   298  
   299  			case JoinSideMark:
   300  				if tryMark := filter.GetCol(); tryMark != nil {
   301  					if tryMark.RelPos == node.BindingTags[0] {
   302  						node.JoinType = plan.Node_SEMI
   303  						node.BindingTags = nil
   304  						break
   305  					}
   306  				} else if fExpr := filter.GetF(); fExpr != nil && filter.Typ.NotNullable && fExpr.Func.ObjName == "not" {
   307  					arg := fExpr.Args[0]
   308  					if tryMark := arg.GetCol(); tryMark != nil {
   309  						if tryMark.RelPos == node.BindingTags[0] {
   310  							node.JoinType = plan.Node_ANTI
   311  							node.BindingTags = nil
   312  							break
   313  						}
   314  					}
   315  				}
   316  
   317  				cantPushdown = append(cantPushdown, filter)
   318  
   319  			default:
   320  				cantPushdown = append(cantPushdown, filter)
   321  			}
   322  		}
   323  
   324  		//when onlist is empty, it will be a cross join, performance will be very poor
   325  		//in this situation, we put the non equal conds in the onlist and go loop join
   326  		//todo: when equal conds and non equal conds both exists, put them in the on list and go hash equal join
   327  		if node.JoinType == plan.Node_INNER && len(node.OnList) == 0 {
   328  			// for tpch q22, do not change the plan for now. will fix in the future
   329  			leftStats := builder.qry.Nodes[node.Children[0]].Stats
   330  			rightStats := builder.qry.Nodes[node.Children[1]].Stats
   331  			if leftStats.Outcnt != 1 && rightStats.Outcnt != 1 {
   332  				node.OnList = append(node.OnList, cantPushdown...)
   333  				cantPushdown = nil
   334  			}
   335  		}
   336  
   337  		switch node.JoinType {
   338  		case plan.Node_INNER, plan.Node_SEMI:
   339  			//inner and semi join can deduce new predicate from both side
   340  			builder.pushdownFilters(node.Children[0], deduceNewFilterList(rightPushdown, node.OnList), separateNonEquiConds)
   341  			builder.pushdownFilters(node.Children[1], deduceNewFilterList(leftPushdown, node.OnList), separateNonEquiConds)
   342  		case plan.Node_RIGHT:
   343  			//right join can deduce new predicate only from right side to left
   344  			builder.pushdownFilters(node.Children[0], deduceNewFilterList(rightPushdown, node.OnList), separateNonEquiConds)
   345  		case plan.Node_LEFT:
   346  			//left join can deduce new predicate only from left side to right
   347  			builder.pushdownFilters(node.Children[1], deduceNewFilterList(leftPushdown, node.OnList), separateNonEquiConds)
   348  		}
   349  
   350  		if builder.qry.Nodes[node.Children[1]].NodeType == plan.Node_FUNCTION_SCAN {
   351  
   352  			for _, filter := range filters {
   353  				down := false
   354  				if builder.checkExprCanPushdown(filter, builder.qry.Nodes[node.Children[0]]) {
   355  					leftPushdown = append(leftPushdown, DeepCopyExpr(filter))
   356  					down = true
   357  				}
   358  				if builder.checkExprCanPushdown(filter, builder.qry.Nodes[node.Children[1]]) {
   359  					rightPushdown = append(rightPushdown, DeepCopyExpr(filter))
   360  					down = true
   361  				}
   362  				if !down {
   363  					cantPushdown = append(cantPushdown, DeepCopyExpr(filter))
   364  				}
   365  			}
   366  		}
   367  
   368  		childID, cantPushdownChild := builder.pushdownFilters(node.Children[0], leftPushdown, separateNonEquiConds)
   369  
   370  		if len(cantPushdownChild) > 0 {
   371  			childID = builder.appendNode(&plan.Node{
   372  				NodeType:   plan.Node_FILTER,
   373  				Children:   []int32{node.Children[0]},
   374  				FilterList: cantPushdownChild,
   375  			}, nil)
   376  		}
   377  
   378  		node.Children[0] = childID
   379  
   380  		childID, cantPushdownChild = builder.pushdownFilters(node.Children[1], rightPushdown, separateNonEquiConds)
   381  
   382  		if len(cantPushdownChild) > 0 {
   383  			childID = builder.appendNode(&plan.Node{
   384  				NodeType:   plan.Node_FILTER,
   385  				Children:   []int32{node.Children[1]},
   386  				FilterList: cantPushdownChild,
   387  			}, nil)
   388  		}
   389  
   390  		node.Children[1] = childID
   391  
   392  	case plan.Node_UNION, plan.Node_UNION_ALL, plan.Node_MINUS, plan.Node_MINUS_ALL, plan.Node_INTERSECT, plan.Node_INTERSECT_ALL:
   393  		leftChild := builder.qry.Nodes[node.Children[0]]
   394  		rightChild := builder.qry.Nodes[node.Children[1]]
   395  		var canPushDownRight []*plan.Expr
   396  
   397  		for _, filter := range filters {
   398  			canPushdown = append(canPushdown, replaceColRefsForSet(DeepCopyExpr(filter), leftChild.ProjectList))
   399  			canPushDownRight = append(canPushDownRight, replaceColRefsForSet(filter, rightChild.ProjectList))
   400  		}
   401  
   402  		childID, cantPushdownChild := builder.pushdownFilters(node.Children[0], canPushdown, separateNonEquiConds)
   403  		if len(cantPushdownChild) > 0 {
   404  			childID = builder.appendNode(&plan.Node{
   405  				NodeType:   plan.Node_FILTER,
   406  				Children:   []int32{node.Children[0]},
   407  				FilterList: cantPushdownChild,
   408  			}, nil)
   409  		}
   410  		node.Children[0] = childID
   411  
   412  		childID, cantPushdownChild = builder.pushdownFilters(node.Children[1], canPushDownRight, separateNonEquiConds)
   413  		if len(cantPushdownChild) > 0 {
   414  			childID = builder.appendNode(&plan.Node{
   415  				NodeType:   plan.Node_FILTER,
   416  				Children:   []int32{node.Children[1]},
   417  				FilterList: cantPushdownChild,
   418  			}, nil)
   419  		}
   420  		node.Children[1] = childID
   421  
   422  	case plan.Node_PROJECT:
   423  		child := builder.qry.Nodes[node.Children[0]]
   424  		if (child.NodeType == plan.Node_VALUE_SCAN || child.NodeType == plan.Node_EXTERNAL_SCAN) && child.RowsetData == nil {
   425  			cantPushdown = filters
   426  			break
   427  		}
   428  
   429  		projectTag := node.BindingTags[0]
   430  
   431  		for _, filter := range filters {
   432  			canPushdown = append(canPushdown, replaceColRefs(filter, projectTag, node.ProjectList))
   433  		}
   434  
   435  		childID, cantPushdownChild := builder.pushdownFilters(node.Children[0], canPushdown, separateNonEquiConds)
   436  
   437  		if len(cantPushdownChild) > 0 {
   438  			childID = builder.appendNode(&plan.Node{
   439  				NodeType:   plan.Node_FILTER,
   440  				Children:   []int32{node.Children[0]},
   441  				FilterList: cantPushdownChild,
   442  			}, nil)
   443  		}
   444  
   445  		node.Children[0] = childID
   446  
   447  	case plan.Node_TABLE_SCAN, plan.Node_EXTERNAL_SCAN:
   448  		for _, filter := range filters {
   449  			if onlyContainsTag(filter, node.BindingTags[0]) {
   450  				node.FilterList = append(node.FilterList, filter)
   451  			} else {
   452  				cantPushdown = append(cantPushdown, filter)
   453  			}
   454  		}
   455  	case plan.Node_FUNCTION_SCAN:
   456  		downFilters := make([]*plan.Expr, 0)
   457  		selfFilters := make([]*plan.Expr, 0)
   458  		for _, filter := range filters {
   459  			if onlyContainsTag(filter, node.BindingTags[0]) {
   460  				selfFilters = append(selfFilters, DeepCopyExpr(filter))
   461  			} else {
   462  				downFilters = append(downFilters, DeepCopyExpr(filter))
   463  			}
   464  		}
   465  		node.FilterList = append(node.FilterList, selfFilters...)
   466  		childId := node.Children[0]
   467  		childId, _ = builder.pushdownFilters(childId, downFilters, separateNonEquiConds)
   468  		node.Children[0] = childId
   469  	default:
   470  		if len(node.Children) > 0 {
   471  			childID, cantPushdownChild := builder.pushdownFilters(node.Children[0], filters, separateNonEquiConds)
   472  
   473  			if len(cantPushdownChild) > 0 {
   474  				childID = builder.appendNode(&plan.Node{
   475  					NodeType:   plan.Node_FILTER,
   476  					Children:   []int32{node.Children[0]},
   477  					FilterList: cantPushdownChild,
   478  				}, nil)
   479  			}
   480  
   481  			node.Children[0] = childID
   482  		} else {
   483  			cantPushdown = filters
   484  		}
   485  	}
   486  
   487  	return nodeID, cantPushdown
   488  }
   489  
   490  // order by limit can be pushed down to left child of left join
   491  func (builder *QueryBuilder) pushdownTopThroughLeftJoin(nodeID int32) {
   492  	if builder.optimizerHints != nil && builder.optimizerHints.pushDownTopThroughLeftJoin != 0 {
   493  		return
   494  	}
   495  	node := builder.qry.Nodes[nodeID]
   496  	var joinnode, nodePushDown *plan.Node
   497  	var tags []int32
   498  	var newNodeID int32
   499  
   500  	if node.NodeType != plan.Node_SORT || node.Limit == nil {
   501  		goto END
   502  	}
   503  	joinnode = builder.qry.Nodes[node.Children[0]]
   504  	if joinnode.NodeType != plan.Node_JOIN {
   505  		goto END
   506  	}
   507  
   508  	//before join order, only left join
   509  	if joinnode.JoinType != plan.Node_LEFT {
   510  		goto END
   511  	}
   512  
   513  	// check orderby column
   514  	tags = builder.enumerateTags(builder.qry.Nodes[joinnode.Children[0]].NodeId)
   515  	for i := range node.OrderBy {
   516  		if !checkExprInTags(node.OrderBy[i].Expr, tags) {
   517  			goto END
   518  		}
   519  	}
   520  
   521  	nodePushDown = DeepCopyNode(node)
   522  
   523  	if nodePushDown.Offset != nil {
   524  		newExpr, err := bindFuncExprAndConstFold(builder.GetContext(), builder.compCtx.GetProcess(), "+", []*Expr{nodePushDown.Limit, nodePushDown.Offset})
   525  		if err != nil {
   526  			goto END
   527  		}
   528  		nodePushDown.Offset = nil
   529  		nodePushDown.Limit = newExpr
   530  	}
   531  	newNodeID = builder.appendNode(nodePushDown, nil)
   532  	nodePushDown.Children[0] = joinnode.Children[0]
   533  	joinnode.Children[0] = newNodeID
   534  
   535  END:
   536  	if len(node.Children) > 0 {
   537  		for _, child := range node.Children {
   538  			builder.pushdownTopThroughLeftJoin(child)
   539  		}
   540  	}
   541  }
   542  
   543  func (builder *QueryBuilder) pushdownLimitToTableScan(nodeID int32) {
   544  	if builder.optimizerHints != nil && builder.optimizerHints.pushDownLimitToScan != 0 {
   545  		return
   546  	}
   547  	node := builder.qry.Nodes[nodeID]
   548  	for _, childID := range node.Children {
   549  		builder.pushdownLimitToTableScan(childID)
   550  	}
   551  	if node.NodeType == plan.Node_PROJECT && len(node.Children) > 0 {
   552  		child := builder.qry.Nodes[node.Children[0]]
   553  		if child.NodeType == plan.Node_TABLE_SCAN {
   554  			child.Limit, child.Offset = node.Limit, node.Offset
   555  			node.Limit, node.Offset = nil, nil
   556  		}
   557  	}
   558  }