github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/norm/rules/join.opt (about)

     1  # =============================================================================
     2  # join.opt contains normalization rules for Join operators.
     3  # =============================================================================
     4  
     5  # CommuteRightJoin converts a RightJoin to a LeftJoin with the left and right
     6  # inputs swapped. This allows other normalization rules to only worry about the
     7  # LeftJoin case.
     8  [CommuteRightJoin, Normalize, HighPriority]
     9  (RightJoin $left:* $right:* $on:* $private:*)
    10  =>
    11  (LeftJoin $right $left $on (CommuteJoinFlags $private))
    12  
    13  # SimplifyJoinFilters works like SimplifySelectFilters, except that it operates
    14  # on Join filters rather than Select filters.
    15  [SimplifyJoinFilters, Normalize, HighPriority]
    16  (Join
    17      $left:*
    18      $right:*
    19      $on:[
    20              ...
    21              $item:(FiltersItem
    22                      (And | True | False | Null | Or)
    23                  ) &
    24                  ^(IsUnsimplifiableOr $item)
    25              ...
    26          ] &
    27          ^(IsFilterFalse $on)
    28      $private:*
    29  )
    30  =>
    31  ((OpName) $left $right (SimplifyFilters $on) $private)
    32  
    33  # DetectJoinContradiction replaces a Join condition with False if it detects a
    34  # contradiction in the filter.
    35  [DetectJoinContradiction, Normalize]
    36  (Join
    37      $left:*
    38      $right:*
    39      [ ... $item:(FiltersItem) & (IsContradiction $item) ... ]
    40      $private:*
    41  )
    42  =>
    43  ((OpName) $left $right [ (FiltersItem (False)) ] $private)
    44  
    45  # PushFilterIntoJoinLeftAndRight pushes a filter into both the left and right
    46  # sides of an InnerJoin or SemiJoin if it can be mapped to use the columns of
    47  # both sides. For example, consider this query:
    48  #
    49  #   SELECT * FROM a JOIN b ON a.x=b.x AND a.y=b.y AND a.x + b.y < 5
    50  #
    51  # In this case, we can map a.x + b.y < 5 to both sides based on the equality
    52  # filters a.x=b.x AND a.y=b.y. For the left side, we can map it to
    53  # a.x + a.y < 5, and for the right side, we can map it to b.x + b.y < 5.
    54  # Given this mapping, we can safely push the filter down to both sides and
    55  # remove it from the ON filters list.
    56  #
    57  # Note that this rule is only applied when the left and right inputs do not have
    58  # outer columns. If they do, then this rule can cause undetectable cycles with
    59  # TryDecorrelateSelect, since the filter is pushed down to both sides, but then
    60  # only pulled up from the right side by TryDecorrelateSelect. For this reason,
    61  # the rule also does not apply to InnerJoinApply or SemiJoinApply.
    62  #
    63  # NOTE: It is important that this rule is first among the join filter push-down
    64  #       rules.
    65  [PushFilterIntoJoinLeftAndRight, Normalize]
    66  (InnerJoin | SemiJoin
    67      $left:* & ^(HasOuterCols $left)
    68      $right:* & ^(HasOuterCols $right)
    69      $on:[
    70          ...
    71          $item:* &
    72              ^(FiltersItem (Eq (Variable) (Variable))) &
    73              (CanMapJoinOpFilter
    74                  $item
    75                  $leftCols:(OutputCols $left)
    76                  $equivFD:(GetEquivFD $on $left $right)
    77              ) &
    78              (CanMapJoinOpFilter
    79                  $item
    80                  $rightCols:(OutputCols $right)
    81                  $equivFD
    82              )
    83          ...
    84      ]
    85      $private:*
    86  )
    87  =>
    88  ((OpName)
    89      (Select
    90          $left
    91          [
    92              (FiltersItem
    93                  (MapJoinOpFilter $item $leftCols $equivFD)
    94              )
    95          ]
    96      )
    97      (Select
    98          $right
    99          [
   100              (FiltersItem
   101                  (MapJoinOpFilter $item $rightCols $equivFD)
   102              )
   103          ]
   104      )
   105      (RemoveFiltersItem $on $item)
   106      $private
   107  )
   108  
   109  # MapFilterIntoJoinLeft maps a filter that is not bound by the left side of
   110  # the join to use the columns from the left side. This will allow
   111  # the filter to be pushed down by the PushFilterIntoJoinLeft rule.
   112  # For example, consider this query:
   113  #
   114  #   SELECT * FROM a INNER JOIN b ON a.x = b.x AND b.x + a.y < 5
   115  #
   116  # In this case, we can map b.x + a.y < 5 to the left side by replacing b.x
   117  # with the equivalent column a.x.
   118  # NOTE: This rule only applies to cases where it is not possible or not safe
   119  #       to map the filter to both sides. If it can be mapped to both sides, it
   120  #       will be handled by PushFilterIntoJoinLeftAndRight (which must be
   121  #       ordered above this rule). For performance reasons, this rule should
   122  #       be ordered before PushFilterIntoJoinLeft (otherwise,
   123  #       PushFilterIntoJoinLeft might need to be applied multiple times).
   124  [MapFilterIntoJoinLeft, Normalize]
   125  (InnerJoin | InnerJoinApply | SemiJoin | SemiJoinApply
   126      $left:* & ^(HasOuterCols $left)
   127      $right:*
   128      $on:[
   129          ...
   130          $item:* &
   131              ^(FiltersItem (Eq (Variable) (Variable))) &
   132              ^(IsBoundBy $item $leftCols:(OutputCols $left)) &
   133              (CanMapJoinOpFilter
   134                  $item
   135                  $leftCols
   136                  $equivFD:(GetEquivFD $on $left $right)
   137              )
   138          ...
   139      ]
   140      $private:*
   141  )
   142  =>
   143  ((OpName)
   144      $left
   145      $right
   146      (ReplaceFiltersItem
   147          $on
   148          $item
   149          (MapJoinOpFilter $item $leftCols $equivFD)
   150      )
   151      $private
   152  )
   153  
   154  # MapFilterIntoJoinRight is symmetric with MapFilterIntoJoinLeft. It maps
   155  # Join filter conditions to use columns from the right side of the join rather
   156  # than the left side. See that rule's comments for more details.
   157  [MapFilterIntoJoinRight, Normalize]
   158  (InnerJoin | InnerJoinApply | LeftJoin | LeftJoinApply | SemiJoin
   159          | SemiJoinApply | AntiJoin | AntiJoinApply
   160      $left:*
   161      $right:* & ^(HasOuterCols $right)
   162      $on:[
   163          ...
   164          $item:* &
   165              ^(FiltersItem (Eq (Variable) (Variable))) &
   166              ^(IsBoundBy $item $rightCols:(OutputCols $right)) &
   167              (CanMapJoinOpFilter
   168                  $item
   169                  $rightCols
   170                  $equivFD:(GetEquivFD $on $left $right)
   171              )
   172          ...
   173      ]
   174      $private:*
   175  )
   176  =>
   177  ((OpName)
   178      $left
   179      $right
   180      (ReplaceFiltersItem
   181          $on
   182          $item
   183          (MapJoinOpFilter $item $rightCols $equivFD)
   184      )
   185      $private
   186  )
   187  
   188  # MapEqualityIntoJoinLeftAndRight checks whether it is possible to map
   189  # equality conditions in a join to use different variables so that the
   190  # number of conditions crossing both sides of a join are minimized. If so,
   191  # the MapEqualityConditions function performs this mapping to construct new
   192  # filters.
   193  #
   194  # For example, consider this query:
   195  #
   196  #   SELECT * FROM a, b WHERE a.x = b.x AND b.x = a.y;
   197  #
   198  # As written, both equality conditions contain variables from both sides of
   199  # the join. We can rewrite this query, however, so that only one condition
   200  # spans both sides:
   201  #
   202  #   SELECT * FROM a, b WHERE a.x = a.y AND b.x = a.y;
   203  #
   204  # Now the condition a.x = a.y is fully bound by the left side of the join,
   205  # and is available to be pushed down by PushFilterIntoJoinLeft.
   206  #
   207  # See the MapEqualityConditions function for more details.
   208  [MapEqualityIntoJoinLeftAndRight, Normalize]
   209  (InnerJoin | InnerJoinApply | LeftJoin | LeftJoinApply | SemiJoin
   210          | SemiJoinApply | AntiJoin | AntiJoinApply
   211      $left:* & ^(HasOuterCols $left)
   212      $right:* & ^(HasOuterCols $right)
   213      $on:* &
   214          (CanMapJoinOpEqualities
   215              $on
   216              $leftCols:(OutputCols $left)
   217              $rightCols:(OutputCols $right)
   218          )
   219      $private:*
   220  )
   221  =>
   222  ((OpName)
   223      $left
   224      $right
   225      (MapJoinOpEqualities $on $leftCols $rightCols)
   226      $private
   227  )
   228  
   229  # PushFilterIntoJoinLeft pushes Join filter conditions into the left side of the
   230  # join. This is possible in the case of InnerJoin, as long as the condition has
   231  # no dependencies on the right side of the join. Left and Full joins are not
   232  # eligible, since filtering left rows will change the number of rows in the
   233  # result for those types of joins:
   234  #
   235  #   -- A row with nulls on the right side is returned for a.x=1, a.y=2, b.x=1.
   236  #   SELECT * FROM a LEFT JOIN b ON a.x=b.x AND a.y < 0
   237  #
   238  #   -- But if the filter is incorrectly pushed down, then no row is returned.
   239  #   SELECT * FROM (SELECT * FROM a WHERE a.y < 0) a LEFT JOIN b ON a.x=b.x
   240  #
   241  # In addition, AntiJoin is not eligible for this rule, as illustrated by this
   242  # example:
   243  #
   244  #   -- A row is returned for a.y=2.
   245  #   SELECT * FROM a ANTI JOIN b ON a.y < 0
   246  #
   247  #   -- But if the filter is incorrectly pushed down, then no row is returned.
   248  #   SELECT * FROM (SELECT * FROM a WHERE a.y < 0) a ANTI JOIN b ON True
   249  #
   250  # Citations: [1]
   251  [PushFilterIntoJoinLeft, Normalize]
   252  (InnerJoin | InnerJoinApply | SemiJoin | SemiJoinApply
   253      $left:* & ^(HasOuterCols $left)
   254      $right:*
   255      $on:[
   256          ...
   257          $item:* & (IsBoundBy $item $leftCols:(OutputCols $left))
   258          ...
   259      ]
   260      $private:*
   261  )
   262  =>
   263  ((OpName)
   264      (Select $left (ExtractBoundConditions $on $leftCols))
   265      $right
   266      (ExtractUnboundConditions $on $leftCols)
   267      $private
   268  )
   269  
   270  # PushFilterIntoJoinRight is symmetric with PushFilterIntoJoinLeft. It pushes
   271  # Join filter conditions into the right side of the join rather than into the
   272  # left side. See that rule's comments for more details.
   273  [PushFilterIntoJoinRight, Normalize]
   274  (InnerJoin | InnerJoinApply | LeftJoin | LeftJoinApply | SemiJoin
   275          | SemiJoinApply | AntiJoin | AntiJoinApply
   276      $left:*
   277      $right:* & ^(HasOuterCols $right)
   278      $on:[
   279          ...
   280          $item:* &
   281              (IsBoundBy $item $rightCols:(OutputCols $right))
   282          ...
   283      ]
   284      $private:*
   285  )
   286  =>
   287  ((OpName)
   288      $left
   289      (Select $right (ExtractBoundConditions $on $rightCols))
   290      (ExtractUnboundConditions $on $rightCols)
   291      $private
   292  )
   293  
   294  # SimplifyLeftJoin reduces a LeftJoin operator to an InnerJoin operator (or a
   295  # FullJoin to a RightJoin) when it's known that every row in the join's left
   296  # input will match at least one row in the right input. Since every row matches,
   297  # NULL-extended rows will never be added by the outer join, and therefore can be
   298  # mapped to an InnerJoin (or RightJoin in case of FullJoin). See
   299  # filtersMatchAllLeftRows comment for conditions in which this rule can match.
   300  #
   301  # Self-join example:
   302  #   SELECT * FROM xy LEFT JOIN xy AS xy2 ON xy.y = xy2.y
   303  #   =>
   304  #   SELECT * FROM xy INNER JOIN xy AS xy2 ON xy.y = xy2.y
   305  #
   306  # Foreign-key example:
   307  #   SELECT * FROM orders o LEFT JOIN customers c ON o.customer_id = c.id
   308  #   =>
   309  #   SELECT * FROM orders o INNER JOIN customers c ON o.customer_id = c.id
   310  [SimplifyLeftJoin, Normalize]
   311  (LeftJoin | LeftJoinApply | FullJoin
   312      $left:*
   313      $right:*
   314      $on:* & (JoinFiltersMatchAllLeftRows $left $right $on)
   315      $private:*
   316  )
   317  =>
   318  (ConstructNonLeftJoin (OpName) $left $right $on $private)
   319  
   320  # SimplifyRightJoin reduces a FullJoin operator to a LeftJoin operator when it's
   321  # known that every row in the join's right input will match at least one row in
   322  # the left input. This rule is symmetric with SimplifyLeftJoin; see that rule
   323  # for more details and examples.
   324  [SimplifyRightJoin, Normalize]
   325  (FullJoin
   326      $left:*
   327      $right:*
   328      $on:* & (JoinFiltersMatchAllLeftRows $right $left $on)
   329      $private:*
   330  )
   331  =>
   332  (LeftJoin $left $right $on $private)
   333  
   334  # EliminateSemiJoin discards a SemiJoin operator when it's known that the right
   335  # input never returns zero rows, and there is no join condition.
   336  [EliminateSemiJoin, Normalize]
   337  (SemiJoin | SemiJoinApply
   338      $left:*
   339      $right:* & ^(CanHaveZeroRows $right)
   340      []
   341  )
   342  =>
   343  $left
   344  
   345  # SimplifyZeroCardinalitySemiJoin converts a SemiJoin operator to an empty
   346  # Values when it's known that the right input never returns any rows.
   347  [SimplifyZeroCardinalitySemiJoin, Normalize]
   348  (SemiJoin | SemiJoinApply
   349      $left:*
   350      $right:* & (HasZeroRows $right)
   351  )
   352  =>
   353  (ConstructEmptyValues (OutputCols $left))
   354  
   355  # EliminateAntiJoin discards an AntiJoin operator when it's known that the right
   356  # input never returns any rows.
   357  [EliminateAntiJoin, Normalize]
   358  (AntiJoin | AntiJoinApply
   359      $left:*
   360      $right:* & (HasZeroRows $right)
   361  )
   362  =>
   363  $left
   364  
   365  # SimplifyZeroCardinalityAntiJoin converts an AntiJoin operator to an empty
   366  # Values when it's known that the right input never returns zero rows, and
   367  # there is no join condition.
   368  [SimplifyZeroCardinalityAntiJoin, Normalize]
   369  (AntiJoin | AntiJoinApply
   370      $left:*
   371      $right:* & ^(CanHaveZeroRows $right)
   372      []
   373  )
   374  =>
   375  (ConstructEmptyValues (OutputCols $left))
   376  
   377  # EliminateJoinNoColsLeft eliminates an InnerJoin with a one row, zero column
   378  # left input set. These can be produced when a Values, scalar GroupBy, or other
   379  # one-row operator's columns are never used.
   380  [EliminateJoinNoColsLeft, Normalize]
   381  (InnerJoin | InnerJoinApply
   382      $left:* & (HasNoCols $left) & (HasOneRow $left)
   383      $right:*
   384      $on:*
   385  )
   386  =>
   387  (Select $right $on)
   388  
   389  # EliminateJoinNoColsRight eliminates an InnerJoin with a one row, zero column
   390  # right input set. These can be produced when a Values, scalar GroupBy, or other
   391  # one-row operator's columns are never used.
   392  [EliminateJoinNoColsRight, Normalize]
   393  (InnerJoin | InnerJoinApply
   394      $left:*
   395      $right:* & (HasNoCols $right) & (HasOneRow $right)
   396      $on:*
   397  )
   398  =>
   399  (Select $left $on)
   400  
   401  # HoistJoinProjectRight lifts a passthrough Project operator from within a Join
   402  # operator's right input to outside the join. This often allows the Project
   403  # operator to be merged with an outer Project. Since Project operators tend to
   404  # prevent other rules from matching, this and other rules try to either push
   405  # them down (to prune columns), or else to pull them up (to get them out of the
   406  # way of other operators).
   407  #
   408  # TODO(andyk): Add other join types.
   409  [HoistJoinProjectRight, Normalize]
   410  (InnerJoin | InnerJoinApply | LeftJoin | LeftJoinApply
   411      $left:*
   412      $right:(Project $input:* $projections:[])
   413      $on:*
   414      $private:*
   415  )
   416  =>
   417  (Project
   418      ((OpName) $left $input $on $private)
   419      $projections
   420      (OutputCols2 $left $right)
   421  )
   422  
   423  # HoistJoinProjectLeft is the same as HoistJoinProjectRight, but for the left
   424  # input of the join.
   425  [HoistJoinProjectLeft, Normalize]
   426  (InnerJoin | InnerJoinApply | LeftJoin | LeftJoinApply
   427      $left:(Project $input:* $projections:[])
   428      $right:*
   429      $on:*
   430      $private:*
   431  )
   432  =>
   433  (Project
   434      ((OpName) $input $right $on $private)
   435      $projections
   436      (OutputCols2 $left $right)
   437  )
   438  
   439  # SimplifyJoinNotNullEquality simplifies an Is/IsNot equality filter condition
   440  # when it's not possible for it to be null, as in the following case:
   441  #
   442  #   WHERE (a=b) IS NOT False
   443  #
   444  # If a and b are not null, then this can be simplified to:
   445  #
   446  #   WHERE a=b
   447  #
   448  # This pattern can be generated by the NormalizeNotAnyFilter rule, and its
   449  # simplification is necessary for making anti-joins efficient, such as in TPCH
   450  # query 16.
   451  [SimplifyJoinNotNullEquality, Normalize]
   452  (Join
   453      $left:*
   454      $right:*
   455      $on:[
   456          ...
   457          $item:(FiltersItem
   458              $condition:(Is | IsNot
   459                  $eq:(Eq
   460                      # Check whether variable is a not-null column of left or right input.
   461                      (Variable
   462                          $col1:* &
   463                              (IsColNotNull2 $col1 $left $right)
   464                      )
   465                      (Variable
   466                          $col2:* &
   467                              (IsColNotNull2 $col2 $left $right)
   468                      )
   469                  )
   470                  $cnst:(True | False | Null)
   471              )
   472          )
   473          ...
   474      ]
   475      $private:*
   476  )
   477  =>
   478  ((OpName)
   479      $left
   480      $right
   481      (ReplaceFiltersItem
   482          $on
   483          $item
   484          (SimplifyNotNullEquality
   485              $eq
   486              (OpName $condition)
   487              (OpName $cnst)
   488          )
   489      )
   490      $private
   491  )
   492  
   493  # ExtractJoinEqualities finds equality conditions such that one side only
   494  # depends on left columns and the other only on right columns and pushes the
   495  # expressions down into Project operators. The result is a join that has an
   496  # equality constraint, which is much more efficient. For example:
   497  #
   498  #   SELECT * FROM abc JOIN xyz ON a=x+1
   499  #
   500  # This join would be quadratic because we have no equality columns.
   501  # This rule rewrites it as:
   502  #
   503  #   SELECT a,b,c,x,y,z FROM abc JOIN (SELECT *, x+1 AS x1 FROM xyz) ON a=x1
   504  #
   505  # This join can use hash join or lookup on the equality columns.
   506  #
   507  # Depending on the expressions involved, one or both sides require a projection.
   508  [ExtractJoinEqualities, Normalize]
   509  (JoinNonApply
   510      $left:* & ^(HasOuterCols $left)
   511      $right:* & ^(HasOuterCols $right)
   512      $on:[
   513          ...
   514          $item:(FiltersItem
   515              (Eq $a:^(ConstValue) $b:^(ConstValue)) &
   516                  (CanExtractJoinEquality
   517                      $a
   518                      $b
   519                      (OutputCols $left)
   520                      (OutputCols $right)
   521                  )
   522          )
   523          ...
   524      ]
   525      $private:*
   526  )
   527  =>
   528  (ExtractJoinEquality (OpName) $left $right $on $item $private)
   529  
   530  # SortFiltersInJoin ensures that any filters in an inner join are canonicalized
   531  # by sorting them.
   532  [SortFiltersInJoin, Normalize]
   533  (InnerJoin
   534      $left:*
   535      $right:*
   536      $on:* & ^(AreFiltersSorted $on)
   537      $private:*
   538  )
   539  =>
   540  (InnerJoin $left $right (SortFilters $on) $private)