github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/xform/rules/join.opt (about)

     1  # =============================================================================
     2  # join.opt contains exploration rules for the Join operator.
     3  # =============================================================================
     4  
     5  # CommuteJoin creates a Join with the left and right inputs swapped. This is
     6  # useful for other rules that convert joins to other operators (like merge
     7  # join).
     8  # If any join hints are specified, we keep the order in the query.
     9  [CommuteJoin, Explore]
    10  (InnerJoin | FullJoin $left:* $right:* $on:* $private:*)
    11  =>
    12  ((OpName) $right $left $on (CommuteJoinFlags $private))
    13  
    14  # CommuteLeftJoin creates a Join with the left and right inputs swapped.
    15  # This is symmetric with the CommuteRightJoin normalization rule.
    16  [CommuteLeftJoin, Explore]
    17  (LeftJoin $left:* $right:* $on:* $private:*)
    18  =>
    19  (RightJoin $right $left $on (CommuteJoinFlags $private))
    20  
    21  # CommuteSemiJoin generates an InnerJoin that is equivalent to the SemiJoin.
    22  # SemiJoins impose a partial order on the joining tables. We can convert a
    23  # SemiJoin into an InnerJoin by applying a DistinctOn operator on the selected
    24  # rows of the RHS and then relaxing the partial join order restriction.
    25  #
    26  # This allows the join orders (A SemiJoin B) and (Distinct(B*) InnerJoin A) to be
    27  # both considered by the optimizer. This is useful as a different join order may
    28  # allow other rules to trigger. A common case is that, it would allow the inner
    29  # join to use a lookup join in some cases (For example, a different join order
    30  # would allow the use of a lookup join if A has much higher cardinality than B).
    31  #
    32  # We only do this when the On conditions guarantee that for each row in the LHS
    33  # there is at most one unique matching row in the RHS. We need this because a
    34  # SemiJoin emits a maximum of one row for every matching row in the LHS.
    35  # This is an important difference between the behavior of a SemiJoin and an
    36  # InnerJoin. For each row in the LHS, an InnerJoin emits a matching row for every
    37  # row in the RHS where the conditions are met. For example consider the tables:
    38  #
    39  #   lhs       rhs
    40  # +-----+   +------+
    41  #    1         10
    42  #    2         20
    43  #
    44  # If we do an InnerJoin on the table where the On condition is (lhs < rhs),
    45  # you'll notice that each of the lhs rows are matched twice. And so the output
    46  # of the InnerJoin would contain 2 rows for each matching row in the LHS.
    47  # In order to guarantee that there is at most 1 matching row for every row in
    48  # the LHS, we only commute a SemiJoin into an InnerJoin when the On conditions
    49  # are only composed of equalities.
    50  #
    51  # Note: We only consider the columns of the RHS that are used in the On
    52  # conditions (B* in the example above). And so we can be certain that
    53  # ((Distinct(RHS*) InnerJoin LHS) will have at most 1 matching row for each row
    54  # in the LHS if the On conditions are simple equalities.
    55  #
    56  # Citations: [7] (see section 2.1.1)
    57  [CommuteSemiJoin, Explore]
    58  (SemiJoin
    59      $left:*
    60      $right:*
    61      $on:* & (IsSimpleEquality $on)
    62      $private:* & (NoJoinHints $private)
    63  )
    64  =>
    65  (Project
    66      (InnerJoin
    67          $left
    68          (DistinctOn
    69              $right
    70              []
    71              (MakeGrouping
    72                  (IntersectionCols
    73                      (OutputCols $right)
    74                      (FilterOuterCols $on)
    75                  )
    76                  (EmptyOrdering)
    77              )
    78          )
    79          $on
    80          $private
    81      )
    82      []
    83      (OutputCols $left)
    84  )
    85  
    86  # GenerateMergeJoins creates MergeJoin operators for the join, using the
    87  # interesting orderings property.
    88  [GenerateMergeJoins, Explore]
    89  (JoinNonApply $left:* $right:* $on:* $private:*)
    90  =>
    91  (GenerateMergeJoins (OpName) $left $right $on $private)
    92  
    93  # GenerateLookupJoins creates LookupJoin operators for all indexes (of the Scan
    94  # table) which allow it (including non-covering indexes). See the
    95  # GenerateLookupJoins custom function for more details.
    96  [GenerateLookupJoins, Explore]
    97  (InnerJoin | LeftJoin | SemiJoin | AntiJoin
    98      $left:*
    99      (Scan $scanPrivate:*) & (IsCanonicalScan $scanPrivate)
   100      $on:*
   101      $private:*
   102  )
   103  =>
   104  (GenerateLookupJoins (OpName) $left $scanPrivate $on $private)
   105  
   106  # GenerateGeoLookupJoins creates GeoLookupJoin operators for all geospatial
   107  # indexes (of the Scan table) which allow it. See the GenerateGeoLookupJoins
   108  # custom function for more details.
   109  # TODO(rytaft): Add support for LeftJoin.
   110  [GenerateGeoLookupJoins, Explore]
   111  (InnerJoin | SemiJoin | AntiJoin
   112      $left:*
   113      (Scan $scanPrivate:*) &
   114          (IsCanonicalScan $scanPrivate) &
   115          (HasInvertedIndexes $scanPrivate)
   116      $on:[
   117          ...
   118          (FiltersItem
   119              $fn:(Function) &
   120                  (IsGeoIndexFunction $fn) &
   121                  (HasAllVariableArgs $fn)
   122          )
   123          ...
   124      ]
   125      $private:*
   126  )
   127  =>
   128  (GenerateGeoLookupJoins
   129      (OpName)
   130      $left
   131      $scanPrivate
   132      $on
   133      $private
   134      $fn
   135  )
   136  
   137  # GenerateZigzagJoins creates ZigzagJoin operators for all index pairs (of the
   138  # Scan table) where the prefix column(s) of both indexes is/are fixed to
   139  # constant values in the filters. See comments in GenerateZigzagJoin and
   140  # distsqlrun/zigzagjoiner.go for more details on when a zigzag join can be
   141  # planned.
   142  #
   143  # Zigzag joins are prohibited when the source Scan operator has been configured
   144  # with a row-level locking mode. This is mostly out of convenience so that these
   145  # row-level locking modes don't need to added to the ZigzagJoin operator. There
   146  # doesn't seem to be a strong reason to support this, but if one comes up, it
   147  # should be possible to lift this restriction.
   148  [GenerateZigzagJoins, Explore]
   149  (Select
   150      (Scan $scan:*) & (IsCanonicalScan $scan) & ^(IsLocking $scan)
   151      $filters:*
   152  )
   153  =>
   154  (GenerateZigzagJoins $scan $filters)
   155  
   156  # GenerateInvertedIndexZigzagJoins creates ZigzagJoin operators for inverted
   157  # indexes that can be constrained with two or more distinct constant values.
   158  # Inverted indexes contain one row for each path-to-leaf in a JSON value, so one
   159  # row in the primary index could generate multiple inverted index keys. This
   160  # property can be exploited by zigzag joining on the same inverted index, fixed
   161  # at any two of the JSON paths we are querying for.
   162  #
   163  # Zigzag joins are prohibited when the source Scan operator has been configured
   164  # with a row-level locking mode. This is mostly out of convenience so that these
   165  # row-level locking modes don't need to added to the ZigzagJoin operator. There
   166  # doesn't seem to be a strong reason to support this, but if one comes up, it
   167  # should be possible to lift this restriction.
   168  [GenerateInvertedIndexZigzagJoins, Explore]
   169  (Select
   170      (Scan $scan:*) &
   171          (IsCanonicalScan $scan) &
   172          ^(IsLocking $scan) &
   173          (HasInvertedIndexes $scan)
   174      $filters:*
   175  )
   176  =>
   177  (GenerateInvertedIndexZigzagJoins $scan $filters)
   178  
   179  # GenerateLookupJoinWithFilter creates a LookupJoin alternative for a Join which
   180  # has a Select->Scan combination as its right input. The filter can get merged
   181  # with the ON condition (this is correct for both inner and left join).
   182  [GenerateLookupJoinsWithFilter, Explore]
   183  (InnerJoin | LeftJoin
   184      $left:*
   185      (Select
   186          (Scan $scanPrivate:*) & (IsCanonicalScan $scanPrivate)
   187          $filters:*
   188      )
   189      $on:*
   190      $private:*
   191  )
   192  =>
   193  (GenerateLookupJoins
   194      (OpName)
   195      $left
   196      $scanPrivate
   197      (ConcatFilters $on $filters)
   198      $private
   199  )
   200  
   201  # AssociateJoin applies the rule of join associativity. It converts an
   202  # expression like:
   203  #   (A JOIN B ON A.y = B.y) JOIN C ON B.x = C.x
   204  # to the logically equivalent expression:
   205  #   A JOIN (B JOIN C ON B.x = C.x) ON A.y = B.y
   206  #
   207  # If any of the joins contains a hint, we do not rearrange the joins.
   208  [AssociateJoin, Explore]
   209  (InnerJoin
   210      $left:(InnerJoin
   211          $innerLeft:*
   212          $innerRight:*
   213          $innerOn:*
   214          $innerPrivate:* & (NoJoinHints $innerPrivate)
   215      )
   216      $right:* & (ShouldReorderJoins $left $right)
   217      $on:*
   218      $private:* & (NoJoinHints $private)
   219  )
   220  =>
   221  (InnerJoin
   222      $innerLeft
   223      (InnerJoin
   224          $innerRight
   225          $right
   226          (SortFilters
   227              (ExtractBoundConditions
   228                  $newOn:(MapJoinOpEqualities
   229                      (ConcatFilters $on $innerOn)
   230                      (OutputCols $innerLeft)
   231                      $cols:(OutputCols2 $innerRight $right)
   232                  )
   233                  $cols
   234              )
   235          )
   236          (EmptyJoinPrivate)
   237      )
   238      (SortFilters (ExtractUnboundConditions $newOn $cols))
   239      (EmptyJoinPrivate)
   240  )