vitess.io/vitess@v0.16.2/go/vt/sqlparser/predicate_rewriting.go (about) 1 /* 2 Copyright 2022 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package sqlparser 18 19 import ( 20 "vitess.io/vitess/go/vt/log" 21 ) 22 23 // RewritePredicate walks the input AST and rewrites any boolean logic into a simpler form 24 // This simpler form is CNF plus logic for extracting predicates from OR, plus logic for turning ORs into IN 25 // Note: In order to re-plan, we need to empty the accumulated metadata in the AST, 26 // so ColName.Metadata will be nil:ed out as part of this rewrite 27 func RewritePredicate(ast SQLNode) SQLNode { 28 for { 29 printExpr(ast) 30 exprChanged := false 31 stopOnChange := func(SQLNode, SQLNode) bool { 32 return !exprChanged 33 } 34 ast = SafeRewrite(ast, stopOnChange, func(cursor *Cursor) bool { 35 e, isExpr := cursor.node.(Expr) 36 if !isExpr { 37 return true 38 } 39 40 rewritten, state := simplifyExpression(e) 41 if ch, isChange := state.(changed); isChange { 42 printRule(ch.rule, ch.exprMatched) 43 exprChanged = true 44 cursor.Replace(rewritten) 45 } 46 47 if col, isCol := cursor.node.(*ColName); isCol { 48 col.Metadata = nil 49 } 50 return !exprChanged 51 }) 52 53 if !exprChanged { 54 return ast 55 } 56 } 57 } 58 59 func simplifyExpression(expr Expr) (Expr, rewriteState) { 60 switch expr := expr.(type) { 61 case *NotExpr: 62 return simplifyNot(expr) 63 case *OrExpr: 64 return simplifyOr(expr) 65 case *XorExpr: 66 return simplifyXor(expr) 67 case *AndExpr: 68 return simplifyAnd(expr) 69 } 70 return expr, noChange{} 71 } 72 73 func simplifyNot(expr *NotExpr) (Expr, rewriteState) { 74 switch child := expr.Expr.(type) { 75 case *NotExpr: 76 return child.Expr, 77 newChange("NOT NOT A => A", f(expr)) 78 case *OrExpr: 79 return &AndExpr{Right: &NotExpr{Expr: child.Right}, Left: &NotExpr{Expr: child.Left}}, 80 newChange("NOT (A OR B) => NOT A AND NOT B", f(expr)) 81 case *AndExpr: 82 return &OrExpr{Right: &NotExpr{Expr: child.Right}, Left: &NotExpr{Expr: child.Left}}, 83 newChange("NOT (A AND B) => NOT A OR NOT B", f(expr)) 84 } 85 return expr, noChange{} 86 } 87 88 // ExtractINFromOR will add additional predicated to an OR. 89 // this rewriter should not be used in a fixed point way, since it returns the original expression with additions, 90 // and it will therefor OOM before it stops rewriting 91 func ExtractINFromOR(expr *OrExpr) []Expr { 92 // we check if we have two comparisons on either side of the OR 93 // that we can add as an ANDed comparison. 94 // WHERE (a = 5 and B) or (a = 6 AND C) => 95 // WHERE (a = 5 AND B) OR (a = 6 AND C) AND a IN (5,6) 96 // This rewrite makes it possible to find a better route than Scatter if the `a` column has a helpful vindex 97 lftPredicates := SplitAndExpression(nil, expr.Left) 98 rgtPredicates := SplitAndExpression(nil, expr.Right) 99 var ins []Expr 100 for _, lft := range lftPredicates { 101 l, ok := lft.(*ComparisonExpr) 102 if !ok { 103 continue 104 } 105 for _, rgt := range rgtPredicates { 106 r, ok := rgt.(*ComparisonExpr) 107 if !ok { 108 continue 109 } 110 in, state := tryTurningOrIntoIn(l, r) 111 if state.changed() { 112 ins = append(ins, in) 113 } 114 } 115 } 116 117 return uniquefy(ins) 118 } 119 120 func simplifyOr(expr *OrExpr) (Expr, rewriteState) { 121 or := expr 122 123 // first we search for ANDs and see how they can be simplified 124 land, lok := or.Left.(*AndExpr) 125 rand, rok := or.Right.(*AndExpr) 126 switch { 127 case lok && rok: 128 // (<> AND <>) OR (<> AND <>) 129 var a, b, c Expr 130 var change changed 131 switch { 132 case Equals.Expr(land.Left, rand.Left): 133 change = newChange("(A and B) or (A and C) => A AND (B OR C)", f(expr)) 134 a, b, c = land.Left, land.Right, rand.Right 135 case Equals.Expr(land.Left, rand.Right): 136 change = newChange("(A and B) or (C and A) => A AND (B OR C)", f(expr)) 137 a, b, c = land.Left, land.Right, rand.Left 138 case Equals.Expr(land.Right, rand.Left): 139 change = newChange("(B and A) or (A and C) => A AND (B OR C)", f(expr)) 140 a, b, c = land.Right, land.Left, rand.Right 141 case Equals.Expr(land.Right, rand.Right): 142 change = newChange("(B and A) or (C and A) => A AND (B OR C)", f(expr)) 143 a, b, c = land.Right, land.Left, rand.Left 144 default: 145 return expr, noChange{} 146 } 147 return &AndExpr{Left: a, Right: &OrExpr{Left: b, Right: c}}, change 148 case lok: 149 // (<> AND <>) OR <> 150 // Simplification 151 if Equals.Expr(or.Right, land.Left) || Equals.Expr(or.Right, land.Right) { 152 return or.Right, newChange("(A AND B) OR A => A", f(expr)) 153 } 154 // Distribution Law 155 return &AndExpr{Left: &OrExpr{Left: land.Left, Right: or.Right}, Right: &OrExpr{Left: land.Right, Right: or.Right}}, 156 newChange("(A AND B) OR C => (A OR C) AND (B OR C)", f(expr)) 157 case rok: 158 // <> OR (<> AND <>) 159 // Simplification 160 if Equals.Expr(or.Left, rand.Left) || Equals.Expr(or.Left, rand.Right) { 161 return or.Left, newChange("A OR (A AND B) => A", f(expr)) 162 } 163 // Distribution Law 164 return &AndExpr{ 165 Left: &OrExpr{Left: or.Left, Right: rand.Left}, 166 Right: &OrExpr{Left: or.Left, Right: rand.Right}, 167 }, 168 newChange("C OR (A AND B) => (C OR A) AND (C OR B)", f(expr)) 169 } 170 171 // next, we want to try to turn multiple ORs into an IN when possible 172 lftCmp, lok := or.Left.(*ComparisonExpr) 173 rgtCmp, rok := or.Right.(*ComparisonExpr) 174 if lok && rok { 175 newExpr, rewritten := tryTurningOrIntoIn(lftCmp, rgtCmp) 176 if rewritten.changed() { 177 return newExpr, rewritten 178 } 179 } 180 181 // Try to make distinct 182 return distinctOr(expr) 183 } 184 185 func tryTurningOrIntoIn(l, r *ComparisonExpr) (Expr, rewriteState) { 186 // looks for A = X OR A = Y and turns them into A IN (X, Y) 187 col, ok := l.Left.(*ColName) 188 if !ok || !Equals.Expr(col, r.Left) { 189 return nil, noChange{} 190 } 191 192 var tuple ValTuple 193 var ruleStr string 194 switch l.Operator { 195 case EqualOp: 196 tuple = ValTuple{l.Right} 197 ruleStr = "A = <>" 198 case InOp: 199 lft, ok := l.Right.(ValTuple) 200 if !ok { 201 return nil, noChange{} 202 } 203 tuple = lft 204 ruleStr = "A IN (<>, <>)" 205 default: 206 return nil, noChange{} 207 } 208 209 ruleStr += " OR " 210 211 switch r.Operator { 212 case EqualOp: 213 tuple = append(tuple, r.Right) 214 ruleStr += "A = <>" 215 case InOp: 216 lft, ok := r.Right.(ValTuple) 217 if !ok { 218 return nil, noChange{} 219 } 220 tuple = append(tuple, lft...) 221 ruleStr += "A IN (<>, <>)" 222 default: 223 return nil, noChange{} 224 } 225 226 ruleStr += " => A IN (<>, <>)" 227 228 return &ComparisonExpr{ 229 Operator: InOp, 230 Left: col, 231 Right: uniquefy(tuple), 232 }, newChange(ruleStr, f(&OrExpr{Left: l, Right: r})) 233 } 234 235 func uniquefy(tuple ValTuple) (output ValTuple) { 236 outer: 237 for _, expr := range tuple { 238 for _, seen := range output { 239 if Equals.Expr(expr, seen) { 240 continue outer 241 } 242 } 243 output = append(output, expr) 244 } 245 return 246 } 247 248 func simplifyXor(expr *XorExpr) (Expr, rewriteState) { 249 // DeMorgan Rewriter 250 return &AndExpr{ 251 Left: &OrExpr{Left: expr.Left, Right: expr.Right}, 252 Right: &NotExpr{Expr: &AndExpr{Left: expr.Left, Right: expr.Right}}, 253 }, newChange("(A XOR B) => (A OR B) AND NOT (A AND B)", f(expr)) 254 } 255 256 func simplifyAnd(expr *AndExpr) (Expr, rewriteState) { 257 res, rewritten := distinctAnd(expr) 258 if rewritten.changed() { 259 return res, rewritten 260 } 261 and := expr 262 if or, ok := and.Left.(*OrExpr); ok { 263 // Simplification 264 265 if Equals.Expr(or.Left, and.Right) { 266 return and.Right, newChange("(A OR B) AND A => A", f(expr)) 267 } 268 if Equals.Expr(or.Right, and.Right) { 269 return and.Right, newChange("(A OR B) AND B => B", f(expr)) 270 } 271 } 272 if or, ok := and.Right.(*OrExpr); ok { 273 // Simplification 274 if Equals.Expr(or.Left, and.Left) { 275 return and.Left, newChange("A AND (A OR B) => A", f(expr)) 276 } 277 if Equals.Expr(or.Right, and.Left) { 278 return and.Left, newChange("A AND (B OR A) => A", f(expr)) 279 } 280 } 281 282 return expr, noChange{} 283 } 284 285 func distinctOr(in *OrExpr) (Expr, rewriteState) { 286 var skipped []*OrExpr 287 todo := []*OrExpr{in} 288 var leaves []Expr 289 for len(todo) > 0 { 290 curr := todo[0] 291 todo = todo[1:] 292 addAnd := func(in Expr) { 293 and, ok := in.(*OrExpr) 294 if ok { 295 todo = append(todo, and) 296 } else { 297 leaves = append(leaves, in) 298 } 299 } 300 addAnd(curr.Left) 301 addAnd(curr.Right) 302 } 303 original := len(leaves) 304 var predicates []Expr 305 306 outer1: 307 for len(leaves) > 0 { 308 curr := leaves[0] 309 leaves = leaves[1:] 310 for _, alreadyIn := range predicates { 311 if Equals.Expr(alreadyIn, curr) { 312 if log.V(0) { 313 skipped = append(skipped, &OrExpr{Left: alreadyIn, Right: curr}) 314 } 315 continue outer1 316 } 317 } 318 predicates = append(predicates, curr) 319 } 320 if original == len(predicates) { 321 return in, noChange{} 322 } 323 var result Expr 324 for i, curr := range predicates { 325 if i == 0 { 326 result = curr 327 continue 328 } 329 result = &OrExpr{Left: result, Right: curr} 330 } 331 332 return result, newChange("A OR A => A", func() Expr { 333 var result Expr 334 for _, orExpr := range skipped { 335 if result == nil { 336 result = orExpr 337 continue 338 } 339 340 result = &OrExpr{ 341 Left: result, 342 Right: orExpr, 343 } 344 } 345 return result 346 }) 347 } 348 349 func distinctAnd(in *AndExpr) (Expr, rewriteState) { 350 var skipped []*AndExpr 351 todo := []*AndExpr{in} 352 var leaves []Expr 353 for len(todo) > 0 { 354 curr := todo[0] 355 todo = todo[1:] 356 addExpr := func(in Expr) { 357 if and, ok := in.(*AndExpr); ok { 358 todo = append(todo, and) 359 } else { 360 leaves = append(leaves, in) 361 } 362 } 363 addExpr(curr.Left) 364 addExpr(curr.Right) 365 } 366 original := len(leaves) 367 var predicates []Expr 368 369 outer1: 370 for _, curr := range leaves { 371 for _, alreadyIn := range predicates { 372 if Equals.Expr(alreadyIn, curr) { 373 if log.V(0) { 374 skipped = append(skipped, &AndExpr{Left: alreadyIn, Right: curr}) 375 } 376 continue outer1 377 } 378 } 379 predicates = append(predicates, curr) 380 } 381 if original == len(predicates) { 382 return in, noChange{} 383 } 384 var result Expr 385 for i, curr := range predicates { 386 if i == 0 { 387 result = curr 388 continue 389 } 390 result = &AndExpr{Left: result, Right: curr} 391 } 392 return AndExpressions(leaves...), newChange("A AND A => A", func() Expr { 393 var result Expr 394 for _, andExpr := range skipped { 395 if result == nil { 396 result = andExpr 397 continue 398 } 399 400 result = &AndExpr{ 401 Left: result, 402 Right: andExpr, 403 } 404 } 405 return result 406 }) 407 } 408 409 type ( 410 rewriteState interface { 411 changed() bool 412 } 413 noChange struct{} 414 415 // changed makes it possible to make sure we have a rule string for each change we do in the expression tree 416 changed struct { 417 rule string 418 419 // ExprMatched is a function here so building of this expression can be paid only when we are debug logging 420 exprMatched func() Expr 421 } 422 ) 423 424 func (noChange) changed() bool { return false } 425 func (changed) changed() bool { return true } 426 427 // f returns a function that returns the expression. It's short by design, so it interferes minimally 428 // used for logging 429 func f(e Expr) func() Expr { 430 return func() Expr { return e } 431 } 432 433 func printRule(rule string, expr func() Expr) { 434 if log.V(10) { 435 log.Infof("Rule: %s ON %s", rule, String(expr())) 436 } 437 } 438 439 func printExpr(expr SQLNode) { 440 if log.V(10) { 441 log.Infof("Current: %s", String(expr)) 442 } 443 } 444 445 func newChange(rule string, exprMatched func() Expr) changed { 446 return changed{ 447 rule: rule, 448 exprMatched: exprMatched, 449 } 450 }