vitess.io/vitess@v0.16.2/go/vt/sqlparser/predicate_rewriting.go (about)

     1  /*
     2  Copyright 2022 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package sqlparser
    18  
    19  import (
    20  	"vitess.io/vitess/go/vt/log"
    21  )
    22  
    23  // RewritePredicate walks the input AST and rewrites any boolean logic into a simpler form
    24  // This simpler form is CNF plus logic for extracting predicates from OR, plus logic for turning ORs into IN
    25  // Note: In order to re-plan, we need to empty the accumulated metadata in the AST,
    26  // so ColName.Metadata will be nil:ed out as part of this rewrite
    27  func RewritePredicate(ast SQLNode) SQLNode {
    28  	for {
    29  		printExpr(ast)
    30  		exprChanged := false
    31  		stopOnChange := func(SQLNode, SQLNode) bool {
    32  			return !exprChanged
    33  		}
    34  		ast = SafeRewrite(ast, stopOnChange, func(cursor *Cursor) bool {
    35  			e, isExpr := cursor.node.(Expr)
    36  			if !isExpr {
    37  				return true
    38  			}
    39  
    40  			rewritten, state := simplifyExpression(e)
    41  			if ch, isChange := state.(changed); isChange {
    42  				printRule(ch.rule, ch.exprMatched)
    43  				exprChanged = true
    44  				cursor.Replace(rewritten)
    45  			}
    46  
    47  			if col, isCol := cursor.node.(*ColName); isCol {
    48  				col.Metadata = nil
    49  			}
    50  			return !exprChanged
    51  		})
    52  
    53  		if !exprChanged {
    54  			return ast
    55  		}
    56  	}
    57  }
    58  
    59  func simplifyExpression(expr Expr) (Expr, rewriteState) {
    60  	switch expr := expr.(type) {
    61  	case *NotExpr:
    62  		return simplifyNot(expr)
    63  	case *OrExpr:
    64  		return simplifyOr(expr)
    65  	case *XorExpr:
    66  		return simplifyXor(expr)
    67  	case *AndExpr:
    68  		return simplifyAnd(expr)
    69  	}
    70  	return expr, noChange{}
    71  }
    72  
    73  func simplifyNot(expr *NotExpr) (Expr, rewriteState) {
    74  	switch child := expr.Expr.(type) {
    75  	case *NotExpr:
    76  		return child.Expr,
    77  			newChange("NOT NOT A => A", f(expr))
    78  	case *OrExpr:
    79  		return &AndExpr{Right: &NotExpr{Expr: child.Right}, Left: &NotExpr{Expr: child.Left}},
    80  			newChange("NOT (A OR B) => NOT A AND NOT B", f(expr))
    81  	case *AndExpr:
    82  		return &OrExpr{Right: &NotExpr{Expr: child.Right}, Left: &NotExpr{Expr: child.Left}},
    83  			newChange("NOT (A AND B) => NOT A OR NOT B", f(expr))
    84  	}
    85  	return expr, noChange{}
    86  }
    87  
    88  // ExtractINFromOR will add additional predicated to an OR.
    89  // this rewriter should not be used in a fixed point way, since it returns the original expression with additions,
    90  // and it will therefor OOM before it stops rewriting
    91  func ExtractINFromOR(expr *OrExpr) []Expr {
    92  	// we check if we have two comparisons on either side of the OR
    93  	// that we can add as an ANDed comparison.
    94  	// WHERE (a = 5 and B) or (a = 6 AND C) =>
    95  	// WHERE (a = 5 AND B) OR (a = 6 AND C) AND a IN (5,6)
    96  	// This rewrite makes it possible to find a better route than Scatter if the `a` column has a helpful vindex
    97  	lftPredicates := SplitAndExpression(nil, expr.Left)
    98  	rgtPredicates := SplitAndExpression(nil, expr.Right)
    99  	var ins []Expr
   100  	for _, lft := range lftPredicates {
   101  		l, ok := lft.(*ComparisonExpr)
   102  		if !ok {
   103  			continue
   104  		}
   105  		for _, rgt := range rgtPredicates {
   106  			r, ok := rgt.(*ComparisonExpr)
   107  			if !ok {
   108  				continue
   109  			}
   110  			in, state := tryTurningOrIntoIn(l, r)
   111  			if state.changed() {
   112  				ins = append(ins, in)
   113  			}
   114  		}
   115  	}
   116  
   117  	return uniquefy(ins)
   118  }
   119  
   120  func simplifyOr(expr *OrExpr) (Expr, rewriteState) {
   121  	or := expr
   122  
   123  	// first we search for ANDs and see how they can be simplified
   124  	land, lok := or.Left.(*AndExpr)
   125  	rand, rok := or.Right.(*AndExpr)
   126  	switch {
   127  	case lok && rok:
   128  		// (<> AND <>) OR (<> AND <>)
   129  		var a, b, c Expr
   130  		var change changed
   131  		switch {
   132  		case Equals.Expr(land.Left, rand.Left):
   133  			change = newChange("(A and B) or (A and C) => A AND (B OR C)", f(expr))
   134  			a, b, c = land.Left, land.Right, rand.Right
   135  		case Equals.Expr(land.Left, rand.Right):
   136  			change = newChange("(A and B) or (C and A) => A AND (B OR C)", f(expr))
   137  			a, b, c = land.Left, land.Right, rand.Left
   138  		case Equals.Expr(land.Right, rand.Left):
   139  			change = newChange("(B and A) or (A and C) => A AND (B OR C)", f(expr))
   140  			a, b, c = land.Right, land.Left, rand.Right
   141  		case Equals.Expr(land.Right, rand.Right):
   142  			change = newChange("(B and A) or (C and A) => A AND (B OR C)", f(expr))
   143  			a, b, c = land.Right, land.Left, rand.Left
   144  		default:
   145  			return expr, noChange{}
   146  		}
   147  		return &AndExpr{Left: a, Right: &OrExpr{Left: b, Right: c}}, change
   148  	case lok:
   149  		// (<> AND <>) OR <>
   150  		// Simplification
   151  		if Equals.Expr(or.Right, land.Left) || Equals.Expr(or.Right, land.Right) {
   152  			return or.Right, newChange("(A AND B) OR A => A", f(expr))
   153  		}
   154  		// Distribution Law
   155  		return &AndExpr{Left: &OrExpr{Left: land.Left, Right: or.Right}, Right: &OrExpr{Left: land.Right, Right: or.Right}},
   156  			newChange("(A AND B) OR C => (A OR C) AND (B OR C)", f(expr))
   157  	case rok:
   158  		// <> OR (<> AND <>)
   159  		// Simplification
   160  		if Equals.Expr(or.Left, rand.Left) || Equals.Expr(or.Left, rand.Right) {
   161  			return or.Left, newChange("A OR (A AND B) => A", f(expr))
   162  		}
   163  		// Distribution Law
   164  		return &AndExpr{
   165  				Left:  &OrExpr{Left: or.Left, Right: rand.Left},
   166  				Right: &OrExpr{Left: or.Left, Right: rand.Right},
   167  			},
   168  			newChange("C OR (A AND B) => (C OR A) AND (C OR B)", f(expr))
   169  	}
   170  
   171  	// next, we want to try to turn multiple ORs into an IN when possible
   172  	lftCmp, lok := or.Left.(*ComparisonExpr)
   173  	rgtCmp, rok := or.Right.(*ComparisonExpr)
   174  	if lok && rok {
   175  		newExpr, rewritten := tryTurningOrIntoIn(lftCmp, rgtCmp)
   176  		if rewritten.changed() {
   177  			return newExpr, rewritten
   178  		}
   179  	}
   180  
   181  	// Try to make distinct
   182  	return distinctOr(expr)
   183  }
   184  
   185  func tryTurningOrIntoIn(l, r *ComparisonExpr) (Expr, rewriteState) {
   186  	// looks for A = X OR A = Y and turns them into A IN (X, Y)
   187  	col, ok := l.Left.(*ColName)
   188  	if !ok || !Equals.Expr(col, r.Left) {
   189  		return nil, noChange{}
   190  	}
   191  
   192  	var tuple ValTuple
   193  	var ruleStr string
   194  	switch l.Operator {
   195  	case EqualOp:
   196  		tuple = ValTuple{l.Right}
   197  		ruleStr = "A = <>"
   198  	case InOp:
   199  		lft, ok := l.Right.(ValTuple)
   200  		if !ok {
   201  			return nil, noChange{}
   202  		}
   203  		tuple = lft
   204  		ruleStr = "A IN (<>, <>)"
   205  	default:
   206  		return nil, noChange{}
   207  	}
   208  
   209  	ruleStr += " OR "
   210  
   211  	switch r.Operator {
   212  	case EqualOp:
   213  		tuple = append(tuple, r.Right)
   214  		ruleStr += "A = <>"
   215  	case InOp:
   216  		lft, ok := r.Right.(ValTuple)
   217  		if !ok {
   218  			return nil, noChange{}
   219  		}
   220  		tuple = append(tuple, lft...)
   221  		ruleStr += "A IN (<>, <>)"
   222  	default:
   223  		return nil, noChange{}
   224  	}
   225  
   226  	ruleStr += " => A IN (<>, <>)"
   227  
   228  	return &ComparisonExpr{
   229  		Operator: InOp,
   230  		Left:     col,
   231  		Right:    uniquefy(tuple),
   232  	}, newChange(ruleStr, f(&OrExpr{Left: l, Right: r}))
   233  }
   234  
   235  func uniquefy(tuple ValTuple) (output ValTuple) {
   236  outer:
   237  	for _, expr := range tuple {
   238  		for _, seen := range output {
   239  			if Equals.Expr(expr, seen) {
   240  				continue outer
   241  			}
   242  		}
   243  		output = append(output, expr)
   244  	}
   245  	return
   246  }
   247  
   248  func simplifyXor(expr *XorExpr) (Expr, rewriteState) {
   249  	// DeMorgan Rewriter
   250  	return &AndExpr{
   251  		Left:  &OrExpr{Left: expr.Left, Right: expr.Right},
   252  		Right: &NotExpr{Expr: &AndExpr{Left: expr.Left, Right: expr.Right}},
   253  	}, newChange("(A XOR B) => (A OR B) AND NOT (A AND B)", f(expr))
   254  }
   255  
   256  func simplifyAnd(expr *AndExpr) (Expr, rewriteState) {
   257  	res, rewritten := distinctAnd(expr)
   258  	if rewritten.changed() {
   259  		return res, rewritten
   260  	}
   261  	and := expr
   262  	if or, ok := and.Left.(*OrExpr); ok {
   263  		// Simplification
   264  
   265  		if Equals.Expr(or.Left, and.Right) {
   266  			return and.Right, newChange("(A OR B) AND A => A", f(expr))
   267  		}
   268  		if Equals.Expr(or.Right, and.Right) {
   269  			return and.Right, newChange("(A OR B) AND B => B", f(expr))
   270  		}
   271  	}
   272  	if or, ok := and.Right.(*OrExpr); ok {
   273  		// Simplification
   274  		if Equals.Expr(or.Left, and.Left) {
   275  			return and.Left, newChange("A AND (A OR B) => A", f(expr))
   276  		}
   277  		if Equals.Expr(or.Right, and.Left) {
   278  			return and.Left, newChange("A AND (B OR A) => A", f(expr))
   279  		}
   280  	}
   281  
   282  	return expr, noChange{}
   283  }
   284  
   285  func distinctOr(in *OrExpr) (Expr, rewriteState) {
   286  	var skipped []*OrExpr
   287  	todo := []*OrExpr{in}
   288  	var leaves []Expr
   289  	for len(todo) > 0 {
   290  		curr := todo[0]
   291  		todo = todo[1:]
   292  		addAnd := func(in Expr) {
   293  			and, ok := in.(*OrExpr)
   294  			if ok {
   295  				todo = append(todo, and)
   296  			} else {
   297  				leaves = append(leaves, in)
   298  			}
   299  		}
   300  		addAnd(curr.Left)
   301  		addAnd(curr.Right)
   302  	}
   303  	original := len(leaves)
   304  	var predicates []Expr
   305  
   306  outer1:
   307  	for len(leaves) > 0 {
   308  		curr := leaves[0]
   309  		leaves = leaves[1:]
   310  		for _, alreadyIn := range predicates {
   311  			if Equals.Expr(alreadyIn, curr) {
   312  				if log.V(0) {
   313  					skipped = append(skipped, &OrExpr{Left: alreadyIn, Right: curr})
   314  				}
   315  				continue outer1
   316  			}
   317  		}
   318  		predicates = append(predicates, curr)
   319  	}
   320  	if original == len(predicates) {
   321  		return in, noChange{}
   322  	}
   323  	var result Expr
   324  	for i, curr := range predicates {
   325  		if i == 0 {
   326  			result = curr
   327  			continue
   328  		}
   329  		result = &OrExpr{Left: result, Right: curr}
   330  	}
   331  
   332  	return result, newChange("A OR A => A", func() Expr {
   333  		var result Expr
   334  		for _, orExpr := range skipped {
   335  			if result == nil {
   336  				result = orExpr
   337  				continue
   338  			}
   339  
   340  			result = &OrExpr{
   341  				Left:  result,
   342  				Right: orExpr,
   343  			}
   344  		}
   345  		return result
   346  	})
   347  }
   348  
   349  func distinctAnd(in *AndExpr) (Expr, rewriteState) {
   350  	var skipped []*AndExpr
   351  	todo := []*AndExpr{in}
   352  	var leaves []Expr
   353  	for len(todo) > 0 {
   354  		curr := todo[0]
   355  		todo = todo[1:]
   356  		addExpr := func(in Expr) {
   357  			if and, ok := in.(*AndExpr); ok {
   358  				todo = append(todo, and)
   359  			} else {
   360  				leaves = append(leaves, in)
   361  			}
   362  		}
   363  		addExpr(curr.Left)
   364  		addExpr(curr.Right)
   365  	}
   366  	original := len(leaves)
   367  	var predicates []Expr
   368  
   369  outer1:
   370  	for _, curr := range leaves {
   371  		for _, alreadyIn := range predicates {
   372  			if Equals.Expr(alreadyIn, curr) {
   373  				if log.V(0) {
   374  					skipped = append(skipped, &AndExpr{Left: alreadyIn, Right: curr})
   375  				}
   376  				continue outer1
   377  			}
   378  		}
   379  		predicates = append(predicates, curr)
   380  	}
   381  	if original == len(predicates) {
   382  		return in, noChange{}
   383  	}
   384  	var result Expr
   385  	for i, curr := range predicates {
   386  		if i == 0 {
   387  			result = curr
   388  			continue
   389  		}
   390  		result = &AndExpr{Left: result, Right: curr}
   391  	}
   392  	return AndExpressions(leaves...), newChange("A AND A => A", func() Expr {
   393  		var result Expr
   394  		for _, andExpr := range skipped {
   395  			if result == nil {
   396  				result = andExpr
   397  				continue
   398  			}
   399  
   400  			result = &AndExpr{
   401  				Left:  result,
   402  				Right: andExpr,
   403  			}
   404  		}
   405  		return result
   406  	})
   407  }
   408  
   409  type (
   410  	rewriteState interface {
   411  		changed() bool
   412  	}
   413  	noChange struct{}
   414  
   415  	// changed makes it possible to make sure we have a rule string for each change we do in the expression tree
   416  	changed struct {
   417  		rule string
   418  
   419  		// ExprMatched is a function here so building of this expression can be paid only when we are debug logging
   420  		exprMatched func() Expr
   421  	}
   422  )
   423  
   424  func (noChange) changed() bool { return false }
   425  func (changed) changed() bool  { return true }
   426  
   427  // f returns a function that returns the expression. It's short by design, so it interferes minimally
   428  // used for logging
   429  func f(e Expr) func() Expr {
   430  	return func() Expr { return e }
   431  }
   432  
   433  func printRule(rule string, expr func() Expr) {
   434  	if log.V(10) {
   435  		log.Infof("Rule: %s   ON   %s", rule, String(expr()))
   436  	}
   437  }
   438  
   439  func printExpr(expr SQLNode) {
   440  	if log.V(10) {
   441  		log.Infof("Current: %s", String(expr))
   442  	}
   443  }
   444  
   445  func newChange(rule string, exprMatched func() Expr) changed {
   446  	return changed{
   447  		rule:        rule,
   448  		exprMatched: exprMatched,
   449  	}
   450  }