github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/memo/constraint_builder.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package memo 12 13 import ( 14 "regexp" 15 "strings" 16 17 "github.com/cockroachdb/cockroach/pkg/sql/opt" 18 "github.com/cockroachdb/cockroach/pkg/sql/opt/constraint" 19 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 20 "github.com/cockroachdb/cockroach/pkg/sql/types" 21 "github.com/cockroachdb/cockroach/pkg/util/log" 22 "github.com/cockroachdb/errors" 23 ) 24 25 // Convenience aliases to avoid the constraint prefix everywhere. 26 const includeBoundary = constraint.IncludeBoundary 27 const excludeBoundary = constraint.ExcludeBoundary 28 29 var emptyKey = constraint.EmptyKey 30 var unconstrained = constraint.Unconstrained 31 var contradiction = constraint.Contradiction 32 33 // constraintsBuilder is used to create constraints (constraint.Set) from 34 // boolean scalar expressions. The constraints are stored in the logical 35 // properties; in certain cases, they become constraints for relational 36 // operators (e.g. Select). They can also be used to transfer over conditions 37 // between the two sides of a join. 38 // 39 // A constraint is "tight" if it is exactly equivalent to the expression. A 40 // constraint that is not tight is weaker than the expression. 41 type constraintsBuilder struct { 42 md *opt.Metadata 43 evalCtx *tree.EvalContext 44 } 45 46 // buildSingleColumnConstraint creates a constraint set implied by 47 // a binary boolean operator. 48 func (cb *constraintsBuilder) buildSingleColumnConstraint( 49 col opt.ColumnID, op opt.Operator, val opt.Expr, 50 ) (_ *constraint.Set, tight bool) { 51 if op == opt.InOp && CanExtractConstTuple(val) { 52 els := val.(*TupleExpr).Elems 53 keyCtx := constraint.KeyContext{EvalCtx: cb.evalCtx} 54 keyCtx.Columns.InitSingle(opt.MakeOrderingColumn(col, false /* descending */)) 55 56 var spans constraint.Spans 57 spans.Alloc(len(els)) 58 var sp constraint.Span 59 for _, child := range els { 60 datum := ExtractConstDatum(child) 61 if !cb.verifyType(col, datum.ResolvedType()) { 62 return unconstrained, false 63 } 64 if datum == tree.DNull { 65 // Ignore NULLs - they can't match any values 66 continue 67 } 68 key := constraint.MakeKey(datum) 69 sp.Init(key, includeBoundary, key, includeBoundary) 70 spans.Append(&sp) 71 } 72 var c constraint.Constraint 73 spans.SortAndMerge(&keyCtx) 74 c.Init(&keyCtx, &spans) 75 return constraint.SingleConstraint(&c), true 76 } 77 78 if opt.IsConstValueOp(val) || CanExtractConstTuple(val) { 79 res, tight := cb.buildSingleColumnConstraintConst(col, op, ExtractConstDatum(val)) 80 if res != unconstrained { 81 return res, tight 82 } 83 } 84 85 // Try to at least deduce a not-null constraint. 86 if opt.BoolOperatorRequiresNotNullArgs(op) { 87 res := cb.notNullSpan(col) 88 // Check if the right-hand side is a variable too (e.g. a > b). 89 if v, ok := val.(*VariableExpr); ok { 90 res = res.Intersect(cb.evalCtx, cb.notNullSpan(v.Col)) 91 } 92 return res, false 93 } 94 95 return unconstrained, false 96 } 97 98 func (cb *constraintsBuilder) buildSingleColumnConstraintConst( 99 col opt.ColumnID, op opt.Operator, datum tree.Datum, 100 ) (_ *constraint.Set, tight bool) { 101 if !cb.verifyType(col, datum.ResolvedType()) { 102 return unconstrained, false 103 } 104 if datum == tree.DNull { 105 switch op { 106 case opt.EqOp, opt.LtOp, opt.GtOp, opt.LeOp, opt.GeOp, opt.NeOp: 107 // The result of this expression is always NULL. Normally, this expression 108 // should have been converted to NULL during type checking; but if the 109 // NULL is coming from a placeholder, that doesn't happen. 110 return contradiction, true 111 112 case opt.IsOp: 113 return cb.eqSpan(col, tree.DNull), true 114 115 case opt.IsNotOp: 116 return cb.notNullSpan(col), true 117 } 118 return unconstrained, false 119 } 120 121 switch op { 122 case opt.EqOp, opt.IsOp: 123 return cb.eqSpan(col, datum), true 124 125 case opt.LtOp, opt.GtOp, opt.LeOp, opt.GeOp: 126 startKey, startBoundary := constraint.MakeKey(tree.DNull), excludeBoundary 127 endKey, endBoundary := emptyKey, includeBoundary 128 k := constraint.MakeKey(datum) 129 switch op { 130 case opt.LtOp: 131 endKey, endBoundary = k, excludeBoundary 132 case opt.LeOp: 133 endKey, endBoundary = k, includeBoundary 134 case opt.GtOp: 135 startKey, startBoundary = k, excludeBoundary 136 case opt.GeOp: 137 startKey, startBoundary = k, includeBoundary 138 } 139 140 return cb.singleSpan(col, startKey, startBoundary, endKey, endBoundary), true 141 142 case opt.NeOp, opt.IsNotOp: 143 // Build constraint that doesn't contain the key: 144 // IsNotOp : [ - key) (key - ] 145 // NeOp : (/NULL - key) (key - ] 146 // 147 // If the key is the minimum possible value for the column type, the span 148 // (/NULL - key) will never contain any values and can be omitted. 149 // 150 // Similarly, if the key is the maximum possible value, the span (key - ] 151 // can be omitted. 152 startKey, startBoundary := emptyKey, includeBoundary 153 if op == opt.NeOp { 154 startKey, startBoundary = constraint.MakeKey(tree.DNull), excludeBoundary 155 } 156 key := constraint.MakeKey(datum) 157 c := contradiction 158 if startKey.IsEmpty() || !datum.IsMin(cb.evalCtx) { 159 c = cb.singleSpan(col, startKey, startBoundary, key, excludeBoundary) 160 } 161 if !datum.IsMax(cb.evalCtx) { 162 other := cb.singleSpan(col, key, excludeBoundary, emptyKey, includeBoundary) 163 c = c.Union(cb.evalCtx, other) 164 } 165 return c, true 166 167 case opt.LikeOp: 168 if s, ok := tree.AsDString(datum); ok { 169 if i := strings.IndexAny(string(s), "_%"); i >= 0 { 170 if i == 0 { 171 // Mask starts with _ or %. 172 return unconstrained, false 173 } 174 c := cb.makeStringPrefixSpan(col, string(s[:i])) 175 // A mask like ABC% is equivalent to restricting the prefix to ABC. 176 // A mask like ABC%Z requires restricting the prefix, but is a stronger 177 // condition. 178 tight := (i == len(s)-1) && s[i] == '%' 179 return c, tight 180 } 181 // No wildcard characters, this is an equality. 182 return cb.eqSpan(col, &s), true 183 } 184 185 case opt.SimilarToOp: 186 // a SIMILAR TO 'foo_*' -> prefix "foo" 187 if s, ok := tree.AsDString(datum); ok { 188 pattern := tree.SimilarEscape(string(s)) 189 if re, err := regexp.Compile(pattern); err == nil { 190 prefix, complete := re.LiteralPrefix() 191 if complete { 192 return cb.eqSpan(col, tree.NewDString(prefix)), true 193 } 194 return cb.makeStringPrefixSpan(col, prefix), false 195 } 196 } 197 } 198 return unconstrained, false 199 } 200 201 // buildConstraintForTupleIn handles the case where we have a tuple IN another 202 // tuple, for instance: 203 // 204 // (a, b, c) IN ((1, 2, 3), (4, 5, 6)) 205 // 206 // This function is a less powerful version of makeSpansForTupleIn, since it 207 // does not operate on a particular index. The <tight> return value indicates 208 // if the spans are exactly equivalent to the expression (and not weaker). 209 // Assumes that ev is an InOp and both children are TupleOps. 210 func (cb *constraintsBuilder) buildConstraintForTupleIn( 211 in *InExpr, 212 ) (_ *constraint.Set, tight bool) { 213 lhs, rhs := in.Left.(*TupleExpr), in.Right.(*TupleExpr) 214 215 // We can only constrain here if every element of rhs is a TupleOp. 216 for _, elem := range rhs.Elems { 217 if elem.Op() != opt.TupleOp { 218 return unconstrained, false 219 } 220 } 221 222 constrainedCols := make([]opt.OrderingColumn, 0, len(lhs.Elems)) 223 colIdxsInLHS := make([]int, 0, len(lhs.Elems)) 224 for i, lelem := range lhs.Elems { 225 if v, ok := lelem.(*VariableExpr); ok { 226 // We can't constrain a column if it's compared to anything besides a constant. 227 allConstant := true 228 for _, relem := range rhs.Elems { 229 // Element must be tuple (checked above). 230 tup := relem.(*TupleExpr) 231 if !opt.IsConstValueOp(tup.Elems[i]) { 232 allConstant = false 233 break 234 } 235 } 236 237 if allConstant { 238 constrainedCols = append( 239 constrainedCols, 240 opt.MakeOrderingColumn(v.Col, false /* descending */), 241 ) 242 colIdxsInLHS = append(colIdxsInLHS, i) 243 } 244 } 245 } 246 247 if len(constrainedCols) == 0 { 248 return unconstrained, false 249 } 250 251 // If any of the LHS entries are not constrained then our constraints are not 252 // tight. 253 tight = (len(constrainedCols) == len(lhs.Elems)) 254 255 keyCtx := constraint.KeyContext{EvalCtx: cb.evalCtx} 256 keyCtx.Columns.Init(constrainedCols) 257 var sp constraint.Span 258 var spans constraint.Spans 259 spans.Alloc(len(rhs.Elems)) 260 261 keyCtx.Columns.Init(constrainedCols) 262 for _, elem := range rhs.Elems { 263 // Element must be tuple (checked above). 264 tup := elem.(*TupleExpr) 265 vals := make(tree.Datums, len(colIdxsInLHS)) 266 267 hasNull := false 268 for j := range colIdxsInLHS { 269 constval := tup.Elems[colIdxsInLHS[j]] 270 datum := ExtractConstDatum(constval) 271 if datum == tree.DNull { 272 hasNull = true 273 break 274 } 275 vals[j] = datum 276 } 277 278 // Nothing can match a tuple containing a NULL, so it introduces no 279 // constraints. 280 if hasNull { 281 // TODO(justin): consider redefining "tight" so that this is included in 282 // it. The spans are not "exactly equivalent" in the presence of NULLs, 283 // because of examples like the following: 284 // (x, y) IN ((1, 2), (NULL, 4)) 285 // is not the same as 286 // (x, y) IN ((1, 2)), 287 // because the former is NULL (not false) on (3,4). 288 tight = false 289 continue 290 } 291 292 key := constraint.MakeCompositeKey(vals...) 293 sp.Init(key, constraint.IncludeBoundary, key, constraint.IncludeBoundary) 294 spans.Append(&sp) 295 } 296 297 spans.SortAndMerge(&keyCtx) 298 299 var c constraint.Constraint 300 c.Init(&keyCtx, &spans) 301 con := constraint.SingleConstraint(&c) 302 303 // Now add a constraint for each individual column. This makes extracting 304 // constant columns much simpler. 305 // TODO(justin): remove this when #27018 is resolved. 306 // We already have a constraint starting with the first column: the 307 // multi-column constraint we added above. 308 for i := 1; i < len(colIdxsInLHS); i++ { 309 var spans constraint.Spans 310 keyCtx := constraint.KeyContext{EvalCtx: cb.evalCtx} 311 keyCtx.Columns.InitSingle(constrainedCols[i]) 312 for _, elem := range rhs.Elems { 313 // Element must be tuple (checked above). 314 constVal := elem.(*TupleExpr).Elems[colIdxsInLHS[i]] 315 datum := ExtractConstDatum(constVal) 316 key := constraint.MakeKey(datum) 317 var sp constraint.Span 318 sp.Init(key, constraint.IncludeBoundary, key, constraint.IncludeBoundary) 319 spans.Append(&sp) 320 } 321 322 spans.SortAndMerge(&keyCtx) 323 var c constraint.Constraint 324 c.Init(&keyCtx, &spans) 325 con = con.Intersect(cb.evalCtx, constraint.SingleConstraint(&c)) 326 } 327 328 return con, tight 329 } 330 331 func (cb *constraintsBuilder) buildConstraintForTupleInequality( 332 e opt.ScalarExpr, 333 ) (_ *constraint.Set, tight bool) { 334 lhs, rhs := e.Child(0).(*TupleExpr), e.Child(1).(*TupleExpr) 335 if !CanExtractConstDatum(rhs) { 336 return unconstrained, false 337 } 338 339 // Find the longest prefix that has only variables on the left side and only 340 // non-NULL constants on the right side. 341 for i, leftChild := range lhs.Elems { 342 rightChild := rhs.Elems[i] 343 variable, ok := leftChild.(*VariableExpr) 344 if !ok { 345 return unconstrained, false 346 } 347 if !cb.verifyType(variable.Col, rightChild.DataType()) { 348 // We have a mixed-type comparison. 349 return unconstrained, false 350 } 351 if rightChild.Op() == opt.NullOp { 352 // TODO(radu): NULLs are tricky and require special handling; we ignore 353 // the expression for now. 354 return unconstrained, false 355 } 356 } 357 358 datums := make(tree.Datums, len(lhs.Elems)) 359 for i := range datums { 360 datums[i] = ExtractConstDatum(rhs.Elems[i]) 361 } 362 key := constraint.MakeCompositeKey(datums...) 363 364 // less is true if the op is < or <= and false if the op is > or >=. 365 // boundary is inclusive if the op is <= or >= and exclusive if the op 366 // is < or >. 367 var less bool 368 var boundary constraint.SpanBoundary 369 370 switch e.Op() { 371 case opt.NeOp: 372 // TODO(radu) 373 return unconstrained, false 374 case opt.LtOp: 375 less, boundary = true, excludeBoundary 376 case opt.LeOp: 377 less, boundary = true, includeBoundary 378 case opt.GtOp: 379 less, boundary = false, excludeBoundary 380 case opt.GeOp: 381 less, boundary = false, includeBoundary 382 default: 383 panic(errors.AssertionFailedf("unsupported operator type %s", log.Safe(e.Op()))) 384 } 385 // Disallow NULLs on the first column. 386 startKey, startBoundary := constraint.MakeKey(tree.DNull), excludeBoundary 387 endKey, endBoundary := emptyKey, includeBoundary 388 if less { 389 endKey, endBoundary = key, boundary 390 } else { 391 startKey, startBoundary = key, boundary 392 } 393 394 var span constraint.Span 395 span.Init(startKey, startBoundary, endKey, endBoundary) 396 397 keyCtx := constraint.KeyContext{EvalCtx: cb.evalCtx} 398 cols := make([]opt.OrderingColumn, len(lhs.Elems)) 399 for i := range cols { 400 v := lhs.Elems[i].(*VariableExpr) 401 cols[i] = opt.MakeOrderingColumn(v.Col, false /* descending */) 402 } 403 keyCtx.Columns.Init(cols) 404 span.PreferInclusive(&keyCtx) 405 return constraint.SingleSpanConstraint(&keyCtx, &span), true 406 } 407 408 func (cb *constraintsBuilder) buildConstraints(e opt.ScalarExpr) (_ *constraint.Set, tight bool) { 409 switch t := e.(type) { 410 case *NullExpr: 411 return contradiction, true 412 413 case *VariableExpr: 414 // (x) is equivalent to (x = TRUE) if x is boolean. 415 if cb.md.ColumnMeta(t.Col).Type.Family() == types.BoolFamily { 416 return cb.buildSingleColumnConstraintConst(t.Col, opt.EqOp, tree.DBoolTrue) 417 } 418 return unconstrained, false 419 420 case *NotExpr: 421 // (NOT x) is equivalent to (x = FALSE) if x is boolean. 422 if v, ok := t.Input.(*VariableExpr); ok { 423 if cb.md.ColumnMeta(v.Col).Type.Family() == types.BoolFamily { 424 return cb.buildSingleColumnConstraintConst(v.Col, opt.EqOp, tree.DBoolFalse) 425 } 426 } 427 return unconstrained, false 428 429 case *AndExpr: 430 cl, tightl := cb.buildConstraints(t.Left) 431 cr, tightr := cb.buildConstraints(t.Right) 432 cl = cl.Intersect(cb.evalCtx, cr) 433 tightl = tightl && tightr 434 return cl, (tightl || cl == contradiction) 435 436 case *OrExpr: 437 cl, tightl := cb.buildConstraints(t.Left) 438 cr, tightr := cb.buildConstraints(t.Right) 439 res := cl.Union(cb.evalCtx, cr) 440 441 // The union may not be "tight" because the new constraint set might 442 // allow combinations of values that the expression does not allow. 443 // 444 // For example, consider the expression: 445 // 446 // (@1 = 4 AND @2 = 6) OR (@1 = 5 AND @2 = 7) 447 // 448 // The resulting constraint set is: 449 // 450 // /1: [/4 - /4] [/5 - /5] 451 // /2: [/6 - /6] [/7 - /7] 452 // 453 // This constraint set is not tight, because it allows values for @1 454 // and @2 that the original expression does not, such as @1=4, @2=7. 455 // 456 // However, there are three cases in which the union constraint set is 457 // tight. 458 // 459 // First, if the left, right, and result sets have a single constraint, 460 // then the result constraint is tight if the left and right are tight. 461 // If there is a single constraint for all three sets, it implies that 462 // the sets involve the same column. Therefore it is safe to determine 463 // the tightness of the union based on the tightness of the left and 464 // right. 465 // 466 // Second, if one of the left or right set is a contradiction, then the 467 // result constraint is tight if the other input set is tight. This is 468 // because contradictions are tight and fully describe the set of 469 // values that the original expression allows - none. 470 // 471 // For example, consider the expression: 472 // 473 // (@1 = 4 AND @1 = 6) OR (@1 = 5 AND @2 = 7) 474 // 475 // The resulting constraint set is: 476 // 477 // /1: [/5 - /5] 478 // /2: [/7 - /7] 479 // 480 // This constraint set is tight, because there are no values for @1 and 481 // @2 that satisfy the set but do not satisfy the expression. 482 // 483 // Third, if both the left and the right set are contradictions, then 484 // the result set is tight. This is because contradictions are tight 485 // and, as explained above, they fully describe the set of values that 486 // satisfy their expression. Note that this third case is generally 487 // covered by the second case, but it's mentioned here for the sake of 488 // explicitness. 489 if cl == contradiction { 490 return res, tightr 491 } 492 if cr == contradiction { 493 return res, tightl 494 } 495 tight := tightl && tightr && cl.Length() == 1 && cr.Length() == 1 && res.Length() == 1 496 return res, tight 497 498 case *RangeExpr: 499 return cb.buildConstraints(t.And) 500 } 501 502 if e.ChildCount() < 2 { 503 return unconstrained, false 504 } 505 506 child0, child1 := e.Child(0), e.Child(1) 507 // Check for an operation where the left-hand side is an 508 // indexed var for this column. 509 510 // Check for tuple operations. 511 if child0.Op() == opt.TupleOp && child1.Op() == opt.TupleOp { 512 switch e.Op() { 513 case opt.LtOp, opt.LeOp, opt.GtOp, opt.GeOp, opt.NeOp: 514 // Tuple inequality. 515 return cb.buildConstraintForTupleInequality(e) 516 517 case opt.InOp: 518 return cb.buildConstraintForTupleIn(e.(*InExpr)) 519 } 520 } 521 if v, ok := child0.(*VariableExpr); ok { 522 return cb.buildSingleColumnConstraint(v.Col, e.Op(), child1) 523 } 524 return unconstrained, false 525 } 526 527 func (cb *constraintsBuilder) singleSpan( 528 col opt.ColumnID, 529 start constraint.Key, 530 startBoundary constraint.SpanBoundary, 531 end constraint.Key, 532 endBoundary constraint.SpanBoundary, 533 ) *constraint.Set { 534 var span constraint.Span 535 span.Init(start, startBoundary, end, endBoundary) 536 keyCtx := constraint.KeyContext{EvalCtx: cb.evalCtx} 537 keyCtx.Columns.InitSingle(opt.MakeOrderingColumn(col, false /* descending */)) 538 span.PreferInclusive(&keyCtx) 539 return constraint.SingleSpanConstraint(&keyCtx, &span) 540 } 541 542 func (cb *constraintsBuilder) notNullSpan(col opt.ColumnID) *constraint.Set { 543 key := constraint.MakeKey(tree.DNull) 544 return cb.singleSpan(col, key, excludeBoundary, emptyKey, includeBoundary) 545 } 546 547 // eqSpan constrains a column to a single value (which can be DNull). 548 func (cb *constraintsBuilder) eqSpan(col opt.ColumnID, value tree.Datum) *constraint.Set { 549 key := constraint.MakeKey(value) 550 return cb.singleSpan(col, key, includeBoundary, key, includeBoundary) 551 } 552 553 // makeStringPrefixSpan constraints a string column to strings having the given prefix. 554 func (cb *constraintsBuilder) makeStringPrefixSpan( 555 col opt.ColumnID, prefix string, 556 ) *constraint.Set { 557 startKey, startBoundary := constraint.MakeKey(tree.NewDString(prefix)), includeBoundary 558 endKey, endBoundary := emptyKey, includeBoundary 559 560 i := len(prefix) - 1 561 for ; i >= 0 && prefix[i] == 0xFF; i-- { 562 } 563 564 // If i < 0, we have a prefix like "\xff\xff\xff"; there is no ending value. 565 if i >= 0 { 566 // A few examples: 567 // prefix -> endValue 568 // ABC -> ABD 569 // ABC\xff -> ABD 570 // ABC\xff\xff -> ABD 571 endVal := []byte(prefix[:i+1]) 572 endVal[i]++ 573 endDatum := tree.NewDString(string(endVal)) 574 endKey = constraint.MakeKey(endDatum) 575 endBoundary = excludeBoundary 576 } 577 return cb.singleSpan(col, startKey, startBoundary, endKey, endBoundary) 578 } 579 580 // verifyType checks that the type of column matches the given type. We disallow 581 // mixed-type comparisons because if they become index constraints, we would 582 // generate incorrect encodings (#4313). 583 func (cb *constraintsBuilder) verifyType(col opt.ColumnID, typ *types.T) bool { 584 return typ.Family() == types.UnknownFamily || cb.md.ColumnMeta(col).Type.Equivalent(typ) 585 }