github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/norm/join_funcs.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package norm 12 13 import ( 14 "github.com/cockroachdb/cockroach/pkg/sql/opt" 15 "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" 16 "github.com/cockroachdb/cockroach/pkg/sql/opt/props" 17 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 18 "github.com/cockroachdb/cockroach/pkg/util" 19 "github.com/cockroachdb/cockroach/pkg/util/log" 20 "github.com/cockroachdb/errors" 21 ) 22 23 // ---------------------------------------------------------------------- 24 // 25 // Join Rules 26 // Custom match and replace functions used with join.opt rules. 27 // 28 // ---------------------------------------------------------------------- 29 30 // EmptyJoinPrivate returns an unset JoinPrivate. 31 func (c *CustomFuncs) EmptyJoinPrivate() *memo.JoinPrivate { 32 return memo.EmptyJoinPrivate 33 } 34 35 // ConstructNonLeftJoin maps a left join to an inner join and a full join to a 36 // right join when it can be proved that the right side of the join always 37 // produces at least one row for every row on the left. 38 func (c *CustomFuncs) ConstructNonLeftJoin( 39 joinOp opt.Operator, left, right memo.RelExpr, on memo.FiltersExpr, private *memo.JoinPrivate, 40 ) memo.RelExpr { 41 switch joinOp { 42 case opt.LeftJoinOp: 43 return c.f.ConstructInnerJoin(left, right, on, private) 44 case opt.LeftJoinApplyOp: 45 return c.f.ConstructInnerJoinApply(left, right, on, private) 46 case opt.FullJoinOp: 47 return c.f.ConstructRightJoin(left, right, on, private) 48 } 49 panic(errors.AssertionFailedf("unexpected join operator: %v", log.Safe(joinOp))) 50 } 51 52 // SimplifyNotNullEquality simplifies an expression of the following form: 53 // 54 // (Is | IsNot (Eq) (True | False | Null)) 55 // 56 // in the case where the Eq expression is guaranteed to never result in null. 57 // The testOp argument must be IsOp or IsNotOp, and the constOp argument must be 58 // TrueOp, FalseOp, or NullOp. 59 func (c *CustomFuncs) SimplifyNotNullEquality( 60 eq opt.ScalarExpr, testOp, constOp opt.Operator, 61 ) opt.ScalarExpr { 62 switch testOp { 63 case opt.IsOp: 64 switch constOp { 65 case opt.TrueOp: 66 return eq 67 case opt.FalseOp: 68 return c.f.ConstructNot(eq) 69 case opt.NullOp: 70 return c.f.ConstructFalse() 71 } 72 73 case opt.IsNotOp: 74 switch constOp { 75 case opt.TrueOp: 76 return c.f.ConstructNot(eq) 77 case opt.FalseOp: 78 return eq 79 case opt.NullOp: 80 return c.f.ConstructTrue() 81 } 82 } 83 panic(errors.AssertionFailedf("invalid ops: %v, %v", testOp, constOp)) 84 } 85 86 // CanMapJoinOpEqualities checks whether it is possible to map equality 87 // conditions in a join to use different variables so that the number of 88 // conditions crossing both sides of a join are minimized. 89 // See canMapJoinOpEquivalenceGroup for details. 90 func (c *CustomFuncs) CanMapJoinOpEqualities( 91 filters memo.FiltersExpr, leftCols, rightCols opt.ColSet, 92 ) bool { 93 var equivFD props.FuncDepSet 94 for i := range filters { 95 equivFD.AddEquivFrom(&filters[i].ScalarProps().FuncDeps) 96 } 97 equivReps := equivFD.EquivReps() 98 99 for col, ok := equivReps.Next(0); ok; col, ok = equivReps.Next(col + 1) { 100 if c.canMapJoinOpEquivalenceGroup(filters, col, leftCols, rightCols, equivFD) { 101 return true 102 } 103 } 104 105 return false 106 } 107 108 // canMapJoinOpEquivalenceGroup checks whether it is possible to map equality 109 // conditions in a join that form an equivalence group to use different 110 // variables so that the number of conditions crossing both sides of a join 111 // are minimized. 112 // 113 // Specifically, it finds the set of columns containing col that forms an 114 // equivalence group in the provided FuncDepSet, equivFD, which should contain 115 // the equivalence dependencies from the filters. It splits that group into 116 // columns from the left and right sides of the join, and checks whether there 117 // are multiple equality conditions in filters that connect the two groups. If 118 // so, canMapJoinOpEquivalenceGroup returns true. 119 func (c *CustomFuncs) canMapJoinOpEquivalenceGroup( 120 filters memo.FiltersExpr, 121 col opt.ColumnID, 122 leftCols, rightCols opt.ColSet, 123 equivFD props.FuncDepSet, 124 ) bool { 125 eqCols := c.GetEquivColsWithEquivType(col, equivFD) 126 127 // To map equality conditions, the equivalent columns must intersect 128 // both sides and must be fully bound by both sides. 129 if !(eqCols.Intersects(leftCols) && 130 eqCols.Intersects(rightCols) && 131 eqCols.SubsetOf(leftCols.Union(rightCols))) { 132 return false 133 } 134 135 // If more than one equality condition connecting columns in the equivalence 136 // group spans both sides of the join, these conditions can be remapped. 137 found := 0 138 for i := range filters { 139 fd := &filters[i].ScalarProps().FuncDeps 140 filterEqCols := fd.ComputeEquivClosure(fd.EquivReps()) 141 if filterEqCols.Intersects(leftCols) && filterEqCols.Intersects(rightCols) && 142 filterEqCols.SubsetOf(eqCols) { 143 found++ 144 if found > 1 { 145 return true 146 } 147 } 148 } 149 150 return false 151 } 152 153 // MapJoinOpEqualities maps all variable equality conditions in filters to 154 // use columns in either leftCols or rightCols where possible. See 155 // canMapJoinOpEquivalenceGroup and mapJoinOpEquivalenceGroup for more info. 156 func (c *CustomFuncs) MapJoinOpEqualities( 157 filters memo.FiltersExpr, leftCols, rightCols opt.ColSet, 158 ) memo.FiltersExpr { 159 var equivFD props.FuncDepSet 160 for i := range filters { 161 equivFD.AddEquivFrom(&filters[i].ScalarProps().FuncDeps) 162 } 163 equivReps := equivFD.EquivReps() 164 165 newFilters := filters 166 equivReps.ForEach(func(col opt.ColumnID) { 167 if c.canMapJoinOpEquivalenceGroup(newFilters, col, leftCols, rightCols, equivFD) { 168 newFilters = c.mapJoinOpEquivalenceGroup(newFilters, col, leftCols, rightCols, equivFD) 169 } 170 }) 171 172 return newFilters 173 } 174 175 // mapJoinOpEquivalenceGroup maps equality conditions in a join that form an 176 // equivalence group to use different variables so that the number of 177 // conditions crossing both sides of a join are minimized. This is useful for 178 // creating additional filter conditions that can be pushed down to either side 179 // of the join. 180 // 181 // To perform the mapping, mapJoinOpEquivalenceGroup finds the set of columns 182 // containing col that forms an equivalence group in filters. The result is 183 // a set of columns that are all equivalent, some on the left side of the join 184 // and some on the right side. mapJoinOpEquivalenceGroup constructs a new set of 185 // equalities that implies the same equivalency group, with the property that 186 // there is a single condition with one left column and one right column. 187 // For example, consider this query: 188 // 189 // SELECT * FROM a, b WHERE a.x = b.x AND a.x = a.y AND a.y = b.y 190 // 191 // It has an equivalence group {a.x, a.y, b.x, b.y}. The columns a.x and a.y 192 // are on the left side, and b.x and b.y are on the right side. Initially there 193 // are two conditions that cross both sides. After mapping, the query would be 194 // converted to: 195 // 196 // SELECT * FROM a, b WHERE a.x = a.y AND b.x = b.y AND a.x = b.x 197 // 198 func (c *CustomFuncs) mapJoinOpEquivalenceGroup( 199 filters memo.FiltersExpr, 200 col opt.ColumnID, 201 leftCols, rightCols opt.ColSet, 202 equivFD props.FuncDepSet, 203 ) memo.FiltersExpr { 204 eqCols := c.GetEquivColsWithEquivType(col, equivFD) 205 206 // First remove all the equality conditions for this equivalence group. 207 newFilters := make(memo.FiltersExpr, 0, len(filters)) 208 for i := range filters { 209 fd := &filters[i].ScalarProps().FuncDeps 210 filterEqCols := fd.ComputeEquivClosure(fd.EquivReps()) 211 if !filterEqCols.Empty() && filterEqCols.SubsetOf(eqCols) { 212 continue 213 } 214 newFilters = append(newFilters, filters[i]) 215 } 216 217 // Now append new equality conditions that imply the same equivalency group, 218 // but only one condition should contain columns from both sides. 219 leftEqCols := leftCols.Intersection(eqCols) 220 rightEqCols := rightCols.Intersection(eqCols) 221 firstLeftCol, ok := leftEqCols.Next(0) 222 if !ok { 223 panic(errors.AssertionFailedf( 224 "mapJoinOpEquivalenceGroup called with equivalence group that does not intersect both sides", 225 )) 226 } 227 firstRightCol, ok := rightEqCols.Next(0) 228 if !ok { 229 panic(errors.AssertionFailedf( 230 "mapJoinOpEquivalenceGroup called with equivalence group that does not intersect both sides", 231 )) 232 } 233 234 // Connect all the columns on the left. 235 for col, ok := leftEqCols.Next(firstLeftCol + 1); ok; col, ok = leftEqCols.Next(col + 1) { 236 newFilters = append(newFilters, c.f.ConstructFiltersItem( 237 c.f.ConstructEq(c.f.ConstructVariable(firstLeftCol), c.f.ConstructVariable(col)), 238 )) 239 } 240 241 // Connect all the columns on the right. 242 for col, ok := rightEqCols.Next(firstRightCol + 1); ok; col, ok = rightEqCols.Next(col + 1) { 243 newFilters = append(newFilters, c.f.ConstructFiltersItem( 244 c.f.ConstructEq(c.f.ConstructVariable(firstRightCol), c.f.ConstructVariable(col)), 245 )) 246 } 247 248 // Connect the two sides. 249 newFilters = append(newFilters, c.f.ConstructFiltersItem( 250 c.f.ConstructEq( 251 c.f.ConstructVariable(firstLeftCol), c.f.ConstructVariable(firstRightCol), 252 ), 253 )) 254 255 return newFilters 256 } 257 258 // CanMapJoinOpFilter returns true if it is possible to map a boolean expression 259 // src, which is a conjunct in the given filters expression, to use the output 260 // columns of the relational expression dst. 261 // 262 // In order for one column to map to another, the two columns must be 263 // equivalent. This happens when there is an equality predicate such as a.x=b.x 264 // in the ON or WHERE clause. Additionally, the two columns must be of the same 265 // type (see GetEquivColsWithEquivType for details). CanMapJoinOpFilter checks 266 // that for each column in src, there is at least one equivalent column in dst. 267 // 268 // For example, consider this query: 269 // 270 // SELECT * FROM a INNER JOIN b ON a.x=b.x AND a.x + b.y = 5 271 // 272 // Since there is an equality predicate on a.x=b.x, it is possible to map 273 // a.x + b.y = 5 to b.x + b.y = 5, and that allows the filter to be pushed down 274 // to the right side of the join. In this case, CanMapJoinOpFilter returns true 275 // when src is a.x + b.y = 5 and dst is (Scan b), but false when src is 276 // a.x + b.y = 5 and dst is (Scan a). 277 // 278 // If src has a correlated subquery, CanMapJoinOpFilter returns false. 279 func (c *CustomFuncs) CanMapJoinOpFilter( 280 src *memo.FiltersItem, dstCols opt.ColSet, equivFD props.FuncDepSet, 281 ) bool { 282 // Fast path if src is already bound by dst. 283 if c.IsBoundBy(src, dstCols) { 284 return true 285 } 286 287 scalarProps := src.ScalarProps() 288 if scalarProps.HasCorrelatedSubquery { 289 return false 290 } 291 292 // For CanMapJoinOpFilter to be true, each column in src must map to at 293 // least one column in dst. 294 for i, ok := scalarProps.OuterCols.Next(0); ok; i, ok = scalarProps.OuterCols.Next(i + 1) { 295 eqCols := c.GetEquivColsWithEquivType(i, equivFD) 296 if !eqCols.Intersects(dstCols) { 297 return false 298 } 299 } 300 301 return true 302 } 303 304 // MapJoinOpFilter maps a boolean expression src, which is a conjunct in 305 // the given filters expression, to use the output columns of the relational 306 // expression dst. 307 // 308 // MapJoinOpFilter assumes that CanMapJoinOpFilter has already returned true, 309 // and therefore a mapping is possible (see comment above CanMapJoinOpFilter 310 // for details). 311 // 312 // For each column in src that is not also in dst, MapJoinOpFilter replaces it 313 // with an equivalent column in dst. If there are multiple equivalent columns 314 // in dst, it chooses one arbitrarily. MapJoinOpFilter does not replace any 315 // columns in subqueries, since we know there are no correlated subqueries 316 // (otherwise CanMapJoinOpFilter would have returned false). 317 // 318 // For example, consider this query: 319 // 320 // SELECT * FROM a INNER JOIN b ON a.x=b.x AND a.x + b.y = 5 321 // 322 // If MapJoinOpFilter is called with src as a.x + b.y = 5 and dst as (Scan b), 323 // it returns b.x + b.y = 5. MapJoinOpFilter should not be called with the 324 // equality predicate a.x = b.x, because it would just return the tautology 325 // b.x = b.x. 326 func (c *CustomFuncs) MapJoinOpFilter( 327 src *memo.FiltersItem, dstCols opt.ColSet, equivFD props.FuncDepSet, 328 ) opt.ScalarExpr { 329 // Fast path if src is already bound by dst. 330 if c.IsBoundBy(src, dstCols) { 331 return src.Condition 332 } 333 334 // Map each column in src to one column in dst. We choose an arbitrary column 335 // (the one with the smallest ColumnID) if there are multiple choices. 336 var colMap util.FastIntMap 337 outerCols := src.ScalarProps().OuterCols 338 for srcCol, ok := outerCols.Next(0); ok; srcCol, ok = outerCols.Next(srcCol + 1) { 339 eqCols := c.GetEquivColsWithEquivType(srcCol, equivFD) 340 eqCols.IntersectionWith(dstCols) 341 if eqCols.Contains(srcCol) { 342 colMap.Set(int(srcCol), int(srcCol)) 343 } else { 344 dstCol, ok := eqCols.Next(0) 345 if !ok { 346 panic(errors.AssertionFailedf( 347 "MapJoinOpFilter called on src that cannot be mapped to dst. src:\n%s\ndst:\n%s", 348 src, dstCols, 349 )) 350 } 351 colMap.Set(int(srcCol), int(dstCol)) 352 } 353 } 354 355 // Recursively walk the scalar sub-tree looking for references to columns 356 // that need to be replaced. 357 var replace ReplaceFunc 358 replace = func(nd opt.Expr) opt.Expr { 359 switch t := nd.(type) { 360 case *memo.VariableExpr: 361 outCol, _ := colMap.Get(int(t.Col)) 362 if int(t.Col) == outCol { 363 // Avoid constructing a new variable if possible. 364 return nd 365 } 366 return c.f.ConstructVariable(opt.ColumnID(outCol)) 367 368 case *memo.SubqueryExpr, *memo.ExistsExpr, *memo.AnyExpr: 369 // There are no correlated subqueries, so we don't need to recurse here. 370 return nd 371 } 372 373 return c.f.Replace(nd, replace) 374 } 375 376 return replace(src.Condition).(opt.ScalarExpr) 377 } 378 379 // GetEquivColsWithEquivType uses the given FuncDepSet to find columns that are 380 // equivalent to col, and returns only those columns that also have the same 381 // type as col. This function is used when inferring new filters based on 382 // equivalent columns, because operations that are valid with one type may be 383 // invalid with a different type. 384 // 385 // In addition, if col has a composite key encoding, we cannot guarantee that 386 // it will be exactly equal to other "equivalent" columns, so in that case we 387 // return a set containing only col. This is a conservative measure to ensure 388 // that we don't infer filters incorrectly. For example, consider this query: 389 // 390 // SELECT * FROM 391 // (VALUES (1.0)) AS t1(x), 392 // (VALUES (1.00)) AS t2(y) 393 // WHERE x=y AND x::text = '1.0'; 394 // 395 // It should return the following result: 396 // 397 // x | y 398 // -----+------ 399 // 1.0 | 1.00 400 // 401 // But if we use the equality predicate x=y to map x to y and infer an 402 // additional filter y::text = '1.0', the query would return nothing. 403 // 404 // TODO(rytaft): In the future, we may want to allow the mapping if the 405 // filter involves a comparison operator, such as x < 5. 406 func (c *CustomFuncs) GetEquivColsWithEquivType( 407 col opt.ColumnID, equivFD props.FuncDepSet, 408 ) opt.ColSet { 409 var res opt.ColSet 410 colType := c.f.Metadata().ColumnMeta(col).Type 411 412 // Don't bother looking for equivalent columns if colType has a composite 413 // key encoding. 414 if sqlbase.HasCompositeKeyEncoding(colType) { 415 res.Add(col) 416 return res 417 } 418 419 // Compute all equivalent columns. 420 eqCols := equivFD.ComputeEquivGroup(col) 421 422 eqCols.ForEach(func(i opt.ColumnID) { 423 // Only include columns that have the same type as col. 424 eqColType := c.f.Metadata().ColumnMeta(i).Type 425 if colType.Equivalent(eqColType) { 426 res.Add(i) 427 } 428 }) 429 430 return res 431 } 432 433 // GetEquivFD gets a FuncDepSet with all equivalence dependencies from 434 // filters, left and right. 435 func (c *CustomFuncs) GetEquivFD( 436 filters memo.FiltersExpr, left, right memo.RelExpr, 437 ) (equivFD props.FuncDepSet) { 438 for i := range filters { 439 equivFD.AddEquivFrom(&filters[i].ScalarProps().FuncDeps) 440 } 441 equivFD.AddEquivFrom(&left.Relational().FuncDeps) 442 equivFD.AddEquivFrom(&right.Relational().FuncDeps) 443 return equivFD 444 } 445 446 // JoinFiltersMatchAllLeftRows returns true when each row in the given join's 447 // left input matches at least one row from the right input, according to the 448 // join filters. 449 func (c *CustomFuncs) JoinFiltersMatchAllLeftRows( 450 left, right memo.RelExpr, on memo.FiltersExpr, 451 ) bool { 452 // Asking whether a join will match all left rows is the same as asking 453 // whether an inner join with the same inputs would filter any rows from its 454 // left input. 455 multiplicity := memo.GetJoinMultiplicityFromInputs(opt.InnerJoinOp, left, right, on) 456 return multiplicity.JoinPreservesLeftRows() 457 } 458 459 // CanExtractJoinEquality returns true if: 460 // - one of a, b is bound by the left columns; 461 // - the other is bound by the right columns; 462 // - a and b are not "bare" variables; 463 // - a and b contain no correlated subqueries; 464 // - neither a or b are constants. 465 // 466 // Such an equality can be converted to a column equality by pushing down 467 // expressions as projections. 468 func (c *CustomFuncs) CanExtractJoinEquality( 469 a, b opt.ScalarExpr, leftCols, rightCols opt.ColSet, 470 ) bool { 471 // Disallow simple equality between variables. 472 if a.Op() == opt.VariableOp && b.Op() == opt.VariableOp { 473 return false 474 } 475 476 // Recursively compute properties for left and right sides. 477 var leftProps, rightProps props.Shared 478 memo.BuildSharedProps(a, &leftProps) 479 memo.BuildSharedProps(b, &rightProps) 480 481 // Disallow cases when one side has a correlated subquery. 482 // TODO(radu): investigate relaxing this. 483 if leftProps.HasCorrelatedSubquery || rightProps.HasCorrelatedSubquery { 484 return false 485 } 486 487 if leftProps.OuterCols.Empty() || rightProps.OuterCols.Empty() { 488 // It's possible for one side to have no outer cols and still not be a 489 // ConstValue (see #44746). 490 return false 491 } 492 493 if (leftProps.OuterCols.SubsetOf(leftCols) && rightProps.OuterCols.SubsetOf(rightCols)) || 494 (leftProps.OuterCols.SubsetOf(rightCols) && rightProps.OuterCols.SubsetOf(leftCols)) { 495 // The equality is of the form: 496 // expression(leftCols) = expression(rightCols) 497 return true 498 } 499 return false 500 } 501 502 // ExtractJoinEquality takes an equality FiltersItem that was identified via a 503 // call to CanExtractJoinEquality, and converts it to an equality on "bare" 504 // variables, by pushing down more complicated expressions as projections. See 505 // the ExtractJoinEqualities rule. 506 func (c *CustomFuncs) ExtractJoinEquality( 507 joinOp opt.Operator, 508 left, right memo.RelExpr, 509 filters memo.FiltersExpr, 510 item *memo.FiltersItem, 511 private *memo.JoinPrivate, 512 ) memo.RelExpr { 513 leftCols := c.OutputCols(left) 514 rightCols := c.OutputCols(right) 515 516 eq := item.Condition.(*memo.EqExpr) 517 a, b := eq.Left, eq.Right 518 519 var eqLeftProps props.Shared 520 memo.BuildSharedProps(eq.Left, &eqLeftProps) 521 if eqLeftProps.OuterCols.SubsetOf(rightCols) { 522 a, b = b, a 523 } 524 525 var leftProj, rightProj projectBuilder 526 leftProj.init(c.f) 527 rightProj.init(c.f) 528 529 newFilters := make(memo.FiltersExpr, len(filters)) 530 for i := range filters { 531 if &filters[i] != item { 532 newFilters[i] = filters[i] 533 continue 534 } 535 536 newFilters[i] = c.f.ConstructFiltersItem( 537 c.f.ConstructEq(leftProj.add(a), rightProj.add(b)), 538 ) 539 } 540 if leftProj.empty() && rightProj.empty() { 541 panic(errors.AssertionFailedf("no equalities to extract")) 542 } 543 544 join := c.f.ConstructJoin( 545 joinOp, 546 leftProj.buildProject(left, leftCols), 547 rightProj.buildProject(right, rightCols), 548 newFilters, 549 private, 550 ) 551 552 // Project away the synthesized columns. 553 return c.f.ConstructProject(join, memo.EmptyProjectionsExpr, leftCols.Union(rightCols)) 554 } 555 556 // CommuteJoinFlags returns a join private for the commuted join (where the left 557 // and right sides are swapped). It adjusts any join flags that are specific to 558 // one side. 559 func (c *CustomFuncs) CommuteJoinFlags(p *memo.JoinPrivate) *memo.JoinPrivate { 560 if p.Flags.Empty() { 561 return p 562 } 563 564 // swap is a helper function which swaps the values of two (single-bit) flags. 565 swap := func(f, a, b memo.JoinFlags) memo.JoinFlags { 566 // If the bits are different, flip them both. 567 if f.Has(a) != f.Has(b) { 568 f ^= (a | b) 569 } 570 return f 571 } 572 f := p.Flags 573 f = swap(f, memo.AllowLookupJoinIntoLeft, memo.AllowLookupJoinIntoRight) 574 f = swap(f, memo.AllowHashJoinStoreLeft, memo.AllowHashJoinStoreRight) 575 if p.Flags == f { 576 return p 577 } 578 res := *p 579 res.Flags = f 580 return &res 581 }