github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/norm/rules/select.opt

github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/norm/rules/select.opt (about)

     1  # =============================================================================
     2  # select.opt contains normalization rules for the Select operator.
     3  # =============================================================================
     4  
     5  # SimplifySelectFilters simplifies the Filters operator in several possible
     6  # ways:
     7  #   - Removes True operands
     8  #   - Replaces the Filters operator with False if any operand is False or Null
     9  #   - Flattens nested And operands by merging their conditions into parent
    10  #   - Simplifies Or operands where one side is a Null to the other side
    11  #
    12  # Note that the Null handling behavior is different than the SimplifyAnd rules,
    13  # because Filters only appears as a Select or Join filter condition, both of
    14  # which treat a Null filter conjunct exactly as if it were False.
    15  [SimplifySelectFilters, Normalize, HighPriority]
    16  (Select
    17      $input:*
    18      $filters:[
    19              ...
    20              $item:(FiltersItem
    21                      (And | True | False | Null | Or)
    22                  ) &
    23                  ^(IsUnsimplifiableOr $item)
    24              ...
    25          ] &
    26          ^(IsFilterFalse $filters)
    27  )
    28  =>
    29  (Select $input (SimplifyFilters $filters))
    30  
    31  # ConsolidateSelectFilters consolidates filters that constrain a single
    32  # variable. For example, filters x >= 5 and x <= 10 would be combined into a
    33  # single Range operation.
    34  #
    35  # The benefit of consolidating these filters is it allows a single constraint
    36  # to be generated for the variable instead of multiple. In the example above,
    37  # we can generate the single constraint [/5 - /10] instead of the two
    38  # constraints [/5 - ] and [ - /10]. The single constraint allows us to better
    39  # estimate the selectivity of the predicate when calculating statistics for
    40  # the Select expression.
    41  #
    42  # This rule is low priority so other rules in this file such as
    43  # RemoveNotNullCondition can run first.
    44  [ConsolidateSelectFilters, Normalize, LowPriority]
    45  (Select $input:* $filters:* & (CanConsolidateFilters $filters))
    46  =>
    47  (Select $input (ConsolidateFilters $filters))
    48  
    49  # DetectSelectContradiction replaces a Select with an empty Values if it detects
    50  # a contradiction in the filter.
    51  [DetectSelectContradiction, Normalize]
    52  (Select
    53      $input:*
    54      [ ... $item:(FiltersItem) & (IsContradiction $item) ... ]
    55  )
    56  =>
    57  (ConstructEmptyValues (OutputCols $input))
    58  
    59  # EliminateSelect discards an unnecessary Select operator in the case where its
    60  # filter is always true. Keep this near the top of the file so that it tends to
    61  # be checked early.
    62  [EliminateSelect, Normalize]
    63  (Select $input:* [])
    64  =>
    65  $input
    66  
    67  # MergeSelects combines two nested Select operators into a single Select that
    68  # ANDs the filter conditions of the two Selects.
    69  [MergeSelects, Normalize]
    70  (Select (Select $input:* $innerFilters:*) $filters:*)
    71  =>
    72  (Select $input (ConcatFilters $innerFilters $filters))
    73  
    74  # PushSelectIntoProject pushes the Select operator into its Project input. This
    75  # is typically preferable because it minimizes the number of rows which Project
    76  # needs to process. This is especially important if Project is adding expensive
    77  # computed columns.
    78  [PushSelectIntoProject, Normalize]
    79  (Select
    80      (Project $input:* $projections:* $passthrough:*)
    81      $filters:[
    82          ...
    83          $item:* &
    84              (IsBoundBy $item $inputCols:(OutputCols $input))
    85          ...
    86      ]
    87  )
    88  =>
    89  (Select
    90      (Project
    91          (Select
    92              $input
    93              (ExtractBoundConditions $filters $inputCols)
    94          )
    95          $projections
    96          $passthrough
    97      )
    98      (ExtractUnboundConditions $filters $inputCols)
    99  )
   100  
   101  # MergeSelectInnerJoin merges a Select operator with an InnerJoin input by
   102  # AND'ing the filter conditions of each and creating a new InnerJoin with that
   103  # On condition. This is only safe to do with InnerJoin in the general case
   104  # where the conditions could filter either left or right rows. The special case
   105  # where a condition filters only one or the other is already taken care of by
   106  # the PushSelectIntoJoin rules.
   107  # NOTE: Keep this rule ordered before the PushSelectIntoJoin rules to avoid
   108  #       missing out on the potential for new filter inference based on
   109  #       equivalent columns.
   110  [MergeSelectInnerJoin, Normalize]
   111  (Select
   112      $input:(InnerJoin | InnerJoinApply
   113          $left:*
   114          $right:*
   115          $on:*
   116          $private:*
   117      )
   118      $filters:*
   119  )
   120  =>
   121  ((OpName $input)
   122      $left
   123      $right
   124      (ConcatFilters $on $filters)
   125      $private
   126  )
   127  
   128  # PushSelectCondLeftIntoJoinLeftAndRight applies to the case when a condition
   129  # bound by the left side of a join can be mapped to the right side using
   130  # equality columns from the ON condition of the join. It pushes the original
   131  # filter to the left side, and the mapped filter to the right side.
   132  # For example, consider this query:
   133  #
   134  #   SELECT * FROM l LEFT JOIN r ON l.x = r.x WHERE l.x = 5;
   135  #
   136  # This can safely be converted to:
   137  #
   138  #   SELECT * FROM (SELECT * FROM l WHERE l.x = 5)
   139  #   LEFT JOIN (SELECT * FROM r WHERE r.x = 5) ON l.x = r.x;
   140  #
   141  # It's not normally correct to push filters from the SELECT clause to
   142  # the right side of a LEFT JOIN, since those rows might still show up
   143  # in the output as NULL-extended rows from the left side. In this case,
   144  # however, for any rows removed from the right side, the matching rows are
   145  # also removed from the left side (and thus removed from the output).
   146  # To ensure that this is the case, it's important that the filter only refers
   147  # to columns on the left side that have corresponding equivalent columns on
   148  # the right side.
   149  [PushSelectCondLeftIntoJoinLeftAndRight, Normalize]
   150  (Select
   151      $input:(LeftJoin | LeftJoinApply | SemiJoin | SemiJoinApply
   152              | AntiJoin | AntiJoinApply
   153          $left:*
   154          $right:*
   155          $on:*
   156          $private:*
   157      )
   158      $filters:[
   159          ...
   160          $item:(FiltersItem $condition:*) &
   161              (IsBoundBy $item (OutputCols $left)) &
   162              (CanMapJoinOpFilter
   163                  $item
   164                  $rightCols:(OutputCols $right)
   165                  $equivFD:(GetEquivFD $on $left $right)
   166              )
   167          ...
   168      ]
   169  )
   170  =>
   171  (Select
   172      ((OpName $input)
   173          (Select $left [ (FiltersItem $condition) ])
   174          (Select
   175              $right
   176              [
   177                  (FiltersItem
   178                      (MapJoinOpFilter $item $rightCols $equivFD)
   179                  )
   180              ]
   181          )
   182          $on
   183          $private
   184      )
   185      (RemoveFiltersItem $filters $item)
   186  )
   187  
   188  # PushSelectIntoJoinLeft pushes Select filter conditions into the left side of
   189  # an input Join. This is possible in the case of InnerJoin, LeftJoin, SemiJoin,
   190  # and AntiJoin, as long as the condition has no dependencies on the right side
   191  # of the join. Right and Full joins are not eligible, since attempting to filter
   192  # left rows would just result in NULL left rows instead.
   193  #
   194  #   -- No row is returned for a.x=1, a.y=2, b.x=1, since the WHERE excludes it.
   195  #   SELECT * FROM a RIGHT JOIN b ON a.x=b.x WHERE a.y < 0
   196  #
   197  #   -- But if the filter is incorrectly pushed down in RIGHT/FULL JOIN case,
   198  #   -- then a row containing null values on the left side is returned.
   199  #   SELECT * FROM (SELECT * FROM a WHERE a.y < 0) a RIGHT JOIN b ON a.x=b.x
   200  #
   201  # Citations: [1]
   202  [PushSelectIntoJoinLeft, Normalize]
   203  (Select
   204      $input:(LeftJoin | LeftJoinApply | SemiJoin | SemiJoinApply
   205              | AntiJoin | AntiJoinApply
   206          $left:*
   207          $right:*
   208          $on:*
   209          $private:*
   210      )
   211      $filters:[
   212          ...
   213          $item:* & (IsBoundBy $item $leftCols:(OutputCols $left))
   214          ...
   215      ]
   216  )
   217  =>
   218  (Select
   219      ((OpName $input)
   220          (Select
   221              $left
   222              (ExtractBoundConditions $filters $leftCols)
   223          )
   224          $right
   225          $on
   226          $private
   227      )
   228      (ExtractUnboundConditions $filters $leftCols)
   229  )
   230  
   231  # PushSelectIntoGroupBy pushes a Select condition below a GroupBy in the case
   232  # where it only references grouping columns or ConstAgg columns.
   233  #
   234  # This rule doesn't work on ScalarGroupBy which exhibits different behavior if
   235  # the input is empty:
   236  #   SELECT MAX(y) FROM a
   237  #
   238  # If "a" is empty, this returns a single row containing a null value. This is
   239  # different behavior than a GroupBy with grouping columns, which would return
   240  # the empty set for a similar query:
   241  #   SELECT MAX(y) FROM a GROUP BY x
   242  #
   243  # Citations: [2]
   244  #
   245  # Note: Do not add EnsureDistinctOn to the match pattern. Pushing the select
   246  # filters through the EnsureDistinctOn can prevent it from detecting duplicate
   247  # rows and therefore change error behavior.
   248  [PushSelectIntoGroupBy, Normalize]
   249  (Select
   250      $input:(GroupBy | DistinctOn
   251          $groupingInput:*
   252          $aggregations:*
   253          $groupingPrivate:*
   254      )
   255      $filters:[
   256          ...
   257          $item:* &
   258              (IsBoundBy
   259                  $item
   260                  $passthrough:(GroupingAndConstCols
   261                      $groupingPrivate
   262                      $aggregations
   263                  )
   264              )
   265          ...
   266      ]
   267  )
   268  =>
   269  (Select
   270      ((OpName $input)
   271          (Select
   272              $groupingInput
   273              (ExtractBoundConditions $filters $passthrough)
   274          )
   275          $aggregations
   276          $groupingPrivate
   277      )
   278      (ExtractUnboundConditions $filters $passthrough)
   279  )
   280  
   281  # RemoveNotNullCondition removes a filter with an IS NOT NULL condition
   282  # when the given column has a NOT NULL constraint.
   283  [RemoveNotNullCondition, Normalize]
   284  (Select
   285      $input:*
   286      $filters:[
   287          ...
   288          $item:(FiltersItem
   289              (IsNot
   290                  (Variable $col:* & (IsColNotNull $col $input))
   291                  (Null)
   292              )
   293          )
   294          ...
   295      ]
   296  )
   297  =>
   298  (Select $input (RemoveFiltersItem $filters $item))
   299  
   300  # PushSelectIntoProjectSet pushes filters into a ProjectSet. In particular,
   301  # the filters that are bound to the input columns of the ProjectSet are
   302  # pushed down into it, in hopes of being pushed down further into joins
   303  # and scans underneath the ProjectSet.
   304  [PushSelectIntoProjectSet, Normalize]
   305  (Select
   306      (ProjectSet $input:* $zip:*)
   307      $filters:[
   308          ...
   309          $item:* &
   310              (IsBoundBy $item $inputCols:(OutputCols $input))
   311          ...
   312      ]
   313  )
   314  =>
   315  (Select
   316      (ProjectSet
   317          (Select
   318              $input
   319              (ExtractBoundConditions $filters $inputCols)
   320          )
   321          $zip
   322      )
   323      (ExtractUnboundConditions $filters $inputCols)
   324  )
   325  
   326  # PushFilterIntoSetOp pushes filters down to both the left and right sides
   327  # of all set operators. For example, consider this query:
   328  #
   329  #   SELECT * FROM (SELECT x FROM a UNION ALL SELECT y FROM b) WHERE x < 5
   330  #
   331  # In this case, we can map x < 5 to both sides based on the knowledge that
   332  # in the union the x out col corresponds to the x column in the (a) table and
   333  # the y column in the (b) table. Therefore our mapping becomes x < 5 in the
   334  # left side, and y <  5 in the right side. Given this mapping, we can now
   335  # safely push the filter down to both sides as follows:
   336  #
   337  # SELECT * FROM (SELECT x FROM a UNION ALL SELECT y FROM b) WHERE x < 5
   338  # =>
   339  # (SELECT x FROM a WHERE x < 5) UNION ALL (SELECT y FROM b WHERE y < 5)
   340  #
   341  # Pushing (all) the filters down for each of the set operators (Union, Union
   342  # All, Except, Except All, Intersect, Intersect All) is logically equivalent
   343  # to filtering after applying the set operator. Here's some justification for
   344  # this claim:
   345  #
   346  # Notice that each of the set operators this rule applies to, only works on
   347  # union compatible relations. The resulting column set after applying the set
   348  # operator, is also necessarily a subset of the column set of the (any)
   349  # relations it was composed of. And so, any filter applied must have a
   350  # corresponding column in each of the base relations.
   351  #
   352  # This works in the case of Union because a row passing the filter is
   353  # independent of the other rows in its relation. It works in the case of
   354  # Intersect because if a row `a` was filtered from the intersection, it will
   355  # also be filtered from the LHS and RHS. It works in the case of Except because
   356  # if a row is filtered from the RHS, preventing its removal from the LHS, it
   357  # will also have been filtered from the LHS.
   358  #
   359  # Visualization of the rule:
   360  # Let A and B be sets. Let the filter be represented by eliminating some set C.
   361  #
   362  # Union (All):        (A Union B) \ C => (A \ C) Union (B \ C)
   363  # Intersection (All): (A Intersect B) \ C => (A \ C) Intersect (B \ C)
   364  # Except (All):       (A Except B) \ C => (A \ C) Except (B \ C)
   365  #
   366  # We don't push a filter down if it references outer columns because doing so
   367  # prevents decorrelation.
   368  [PushFilterIntoSetOp, Normalize]
   369  (Select
   370      $input:(Set $left:* $right:* $colmap:*)
   371      $filter:[
   372          ...
   373          $item:* &
   374              (CanMapOnSetOp $item) &
   375              (IsBoundBy $item $inputCols:(OutputCols $input))
   376          ...
   377      ]
   378  )
   379  =>
   380  (Select
   381      ((OpName $input)
   382          (Select
   383              $left
   384              [ (FiltersItem (MapSetOpFilterLeft $item $colmap)) ]
   385          )
   386          (Select
   387              $right
   388              [ (FiltersItem (MapSetOpFilterRight $item $colmap)) ]
   389          )
   390          $colmap
   391      )
   392      (RemoveFiltersItem $filter $item)
   393  )