github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/norm/rules/decorrelate.opt (about)

     1  # =============================================================================
     2  # decorrelate.opt contains normalization patterns that try to eliminate
     3  # correlated subqueries. A correlated subquery is a subquery with one or more
     4  # outer columns. For example:
     5  #
     6  #   SELECT * FROM a WHERE (SELECT b.y FROM b WHERE a.x=b.x) < 5
     7  #
     8  # A correlated join has outer columns in its right input that refer to columns
     9  # in its left input. For example:
    10  #
    11  #   SELECT * FROM a INNER JOIN LATERAL (SELECT * FROM b WHERE a.x=b.x)
    12  #
    13  # Normalization rules "hoist" or "pull up" subqueries so that they are directly
    14  # joined with the outer relation to which they are bound. Other patterns try
    15  # to "push down" correlated joins (apply) until they disappear or can no
    16  # longer be pushed further. An apply join can be rewritten as a non-apply join
    17  # once there are no outer columns in the right side of the join that are bound
    18  # by the left side of the join (i.e. the inputs are not "correlated").
    19  #
    20  # Together, these pattens tend to eliminate unnecessary correlation, which has
    21  # the desirable effect of eliminating patterns that can only be executed using
    22  # often expensive nested loops, and instead open up other physical plan
    23  # possibilities.
    24  #
    25  # Citations: [3]
    26  # =============================================================================
    27  
    28  # DecorrelateJoin maps an apply join into the corresponding join without an
    29  # apply if the right side of the join is not correlated with the left side.
    30  # This allows the optimizer to consider additional physical join operators that
    31  # are unable to handle correlated inputs.
    32  #
    33  # NOTE: Keep this before other decorrelation patterns, as if the correlated
    34  #       join can be removed first, it avoids unnecessarily matching other
    35  #       patterns that only exist to get to this pattern.
    36  #
    37  # Citations: [3]
    38  [DecorrelateJoin, Normalize]
    39  (JoinApply
    40      $left:*
    41      $right:* & ^(IsCorrelated $right $left)
    42      $on:*
    43      $private:*
    44  )
    45  =>
    46  (ConstructNonApplyJoin (OpName) $left $right $on $private)
    47  
    48  # DecorrelateProjectSet pulls an input relation outside of a ProjectSet if the
    49  # input is not correlated with any of the functions in the ProjectSet. The
    50  # input is then cross-joined with a new ProjectSet, which contains the same
    51  # functions but has an empty input (a unary VALUES node).
    52  #
    53  # The advantage of this transformation is it means each of the functions in the
    54  # ProjectSet only need to be executed once in total, instead of once for each
    55  # input row.
    56  [DecorrelateProjectSet, Normalize]
    57  (ProjectSet
    58      $input:^(Values)
    59      $zip:* & ^(IsZipCorrelated $zip (OutputCols $input))
    60  )
    61  =>
    62  (InnerJoin
    63      $input
    64      (ProjectSet (ConstructNoColsRow) $zip)
    65      []
    66      (EmptyJoinPrivate)
    67  )
    68  
    69  # TryDecorrelateSelect "pushes down" the join apply into the select operator,
    70  # in order to eliminate any correlation between the select filter list and the
    71  # left side of the join, and also to keep "digging" down to find and eliminate
    72  # other unnecessary correlation. Eventually, the hope is to trigger the
    73  # DecorrelateJoin pattern to turn JoinApply operators into non-apply Join
    74  # operators.
    75  #
    76  # Note that citation [3] doesn't directly contain this identity, since it
    77  # assumes that the Select will be hoisted above the Join rather than becoming
    78  # part of its On condition. PushFilterIntoJoinRight allows the condition to be
    79  # pushed down, so this rule can correctly pull it up.
    80  #
    81  # Citations: [3] (see identity #3)
    82  [TryDecorrelateSelect, Normalize]
    83  (InnerJoin | InnerJoinApply | LeftJoin | LeftJoinApply | SemiJoin
    84          | SemiJoinApply | AntiJoin | AntiJoinApply
    85      $left:*
    86      $right:* &
    87          (HasOuterCols $right) &
    88          (Select $input:* $filters:*)
    89      $on:*
    90      $private:*
    91  )
    92  =>
    93  ((OpName) $left $input (ConcatFilters $on $filters) $private)
    94  
    95  # TryDecorrelateProject "pushes down" a Join into a Project operator, in an
    96  # attempt to eliminate any correlation between the projection list and the left
    97  # side of the join, and also to keep "digging" down to find and eliminate other
    98  # unnecessary correlation. The eventual hope is to trigger the DecorrelateJoin
    99  # rule to turn a JoinApply operator into a non-apply Join operator.
   100  #
   101  # Citations: [3] (see identity #4)
   102  [TryDecorrelateProject, Normalize]
   103  (InnerJoin | InnerJoinApply
   104      $left:*
   105      $right:* &
   106          (HasOuterCols $right) &
   107          (Project $input:* $projections:* $passthrough:*)
   108      $on:*
   109      $private:*
   110  )
   111  =>
   112  (Select
   113      (Project
   114          ((OpName) $left $input [] $private)
   115          $projections
   116          (UnionCols (OutputCols $left) $passthrough)
   117      )
   118      $on
   119  )
   120  
   121  # TryDecorrelateProjectSelect tries to decorrelate by hoisting a Select operator
   122  # that sits below a LeftJoin/Project operator combo. The Project operator itself
   123  # can't be reordered above the LeftJoin like it can in the InnerJoin case.
   124  # However, the Select filter can be merged with the LeftJoin filter, which is
   125  # enough to decorrelate in several useful cases.
   126  [TryDecorrelateProjectSelect, Normalize]
   127  (LeftJoinApply
   128      $left:*
   129      $right:(Project
   130          (Select
   131              $selectInput:*
   132              $filters:* &
   133                  ^(FiltersBoundBy
   134                      $filters
   135                      (OutputCols $selectInput)
   136                  )
   137          )
   138          $projections:*
   139          $passthrough:*
   140      )
   141      $on:*
   142      $private:*
   143  )
   144  =>
   145  (Project
   146      ((OpName)
   147          $left
   148          (Project
   149              $selectInput
   150              $projections
   151              (UnionCols $passthrough (OutputCols $selectInput))
   152          )
   153          (ConcatFilters $on $filters)
   154          $private
   155      )
   156      []
   157      (OutputCols2 $left $right)
   158  )
   159  
   160  # TryDecorrelateProjectInnerJoin tries to decorrelate by hoisting the filter of
   161  # an InnerJoin operator that sits below a LeftJoin/Project operator combo. The
   162  # Project operator itself can't be reordered above the LeftJoin like it can in
   163  # the InnerJoin case. However, the InnerJoin filter can be merged with the
   164  # LeftJoin filter, which is enough to decorrelate in several useful cases. This
   165  # rule works similarly to TryDecorrelateProjectSelect.
   166  [TryDecorrelateProjectInnerJoin, Normalize, HighPriority]
   167  (LeftJoinApply
   168      $left:*
   169      $right:(Project
   170          $join:(InnerJoin | InnerJoinApply
   171              $innerLeft:*
   172              $innerRight:*
   173              $innerOn:* &
   174                  ^(FiltersBoundBy
   175                      $innerOn
   176                      (OutputCols2 $innerLeft $innerRight)
   177                  )
   178              $innerPrivate:*
   179          )
   180          $projections:*
   181          $passthrough:*
   182      )
   183      $on:*
   184      $private:*
   185  )
   186  =>
   187  (Project
   188      (LeftJoinApply
   189          $left
   190          (Project
   191              ((OpName $join)
   192                  $innerLeft
   193                  $innerRight
   194                  []
   195                  $innerPrivate
   196              )
   197              $projections
   198              (UnionCols $passthrough (OutputCols $join))
   199          )
   200          (ConcatFilters $on $innerOn)
   201          $private
   202      )
   203      []
   204      (OutputCols2 $left $right)
   205  )
   206  
   207  # TryDecorrelateInnerJoin tries to decorrelate an InnerJoin operator nested
   208  # beneath another Join operator by pulling up its join condition to the outer
   209  # join. This may be enough to decorrelate the outer join, or it may allow any
   210  # outer column references to continue to journey upwards.
   211  #
   212  # TODO(andyk): Consider adding case for outer cols in $left.
   213  [TryDecorrelateInnerJoin, Normalize]
   214  (InnerJoin | InnerJoinApply | LeftJoin | LeftJoinApply | SemiJoin
   215          | SemiJoinApply | AntiJoin | AntiJoinApply
   216      $left:*
   217      $right:* &
   218          (HasOuterCols $right) &
   219          (InnerJoin | InnerJoinApply
   220              $innerLeft:*
   221              $innerRight:*
   222              $innerOn:* &
   223                  ^(FiltersBoundBy
   224                      $innerOn
   225                      (OutputCols2 $innerLeft $innerRight)
   226                  )
   227              $innerPrivate:*
   228          )
   229      $on:*
   230      $private:*
   231  )
   232  =>
   233  ((OpName)
   234      $left
   235      ((OpName $right) $innerLeft $innerRight [] $innerPrivate)
   236      (ConcatFilters $on $innerOn)
   237      $private
   238  )
   239  
   240  # TryDecorrelateInnerLeftJoin tries to decorrelate a LeftJoin operator nested
   241  # beneath an InnerJoin operator by using the associative identity to pull up the
   242  # left join to become the outer join. This may be enough to decorrelate the
   243  # outer join, or it may allow any outer column references to continue to journey
   244  # upwards.
   245  #
   246  # Citations: [1] (see identity #6)
   247  [TryDecorrelateInnerLeftJoin, Normalize]
   248  (InnerJoin | InnerJoinApply
   249      $left:*
   250      $right:* &
   251          (HasOuterCols $right) &
   252          (LeftJoin
   253              $innerLeft:*
   254              $innerRight:*
   255              $innerOn:*
   256              $innerPrivate:*
   257          )
   258      $on:* & (FiltersBoundBy $on (OutputCols2 $left $innerLeft))
   259      $private:*
   260  )
   261  =>
   262  (LeftJoinApply
   263      ((OpName) $left $innerLeft $on $innerPrivate)
   264      $innerRight
   265      $innerOn
   266      $private
   267  )
   268  
   269  # TryDecorrelateGroupBy "pushes down" a Join into a GroupBy operator, in an
   270  # attempt to keep "digging" down to find and eliminate unnecessary correlation.
   271  # The eventual hope is to trigger the DecorrelateJoin rule to turn a JoinApply
   272  # operator into a non-apply Join operator.
   273  #
   274  # Example:
   275  #
   276  #   SELECT left.x, left.y, input.*
   277  #   FROM left
   278  #   INNER JOIN LATERAL
   279  #   (
   280  #     SELECT COUNT(*) FROM input WHERE input.x = left.x GROUP BY c
   281  #   ) AS input
   282  #   ON left.y = 10
   283  #   =>
   284  #   SELECT CONST_AGG(left.x), CONST_AGG(left.y), COUNT(*)
   285  #   FROM left WITH ORDINALITY
   286  #   INNER JOIN LATERAL
   287  #   (
   288  #     SELECT * FROM input WHERE input.x = left.x
   289  #   ) AS input
   290  #   ON True
   291  #   GROUP BY input.c, left.ordinality
   292  #   HAVING left.y = 10
   293  #
   294  # In other cases, we can use an existing non-null column as a canary; that
   295  # column would not be constant necessarily, hence the use of ANY_NOT_NULL
   296  # instead of CONST_AGG.
   297  #
   298  # An ordinality column only needs to be synthesized if "left" does not already
   299  # have a strict key. We wrap the output in a Project operator to ensure that
   300  # the original output columns are preserved and the ordinality column is not
   301  # inadvertently added as a new output column.
   302  #
   303  # CONST_AGG is an internal aggregation function used when all rows in the
   304  # grouping set have the same value on the column.
   305  #
   306  # Citations: [3] (see identity #8)
   307  [TryDecorrelateGroupBy, Normalize]
   308  (InnerJoin | InnerJoinApply
   309      $left:*
   310      $right:* &
   311          (HasOuterCols $right) &
   312          (GroupBy | DistinctOn
   313              $input:*
   314              $aggregations:*
   315              $groupingPrivate:*
   316          ) &
   317          (IsUnorderedGrouping $groupingPrivate)
   318      $on:*
   319      $private:*
   320  )
   321  =>
   322  (Project
   323      # Needed to project away any columns added by EnsureKey.
   324      (Select
   325          ((OpName $right)
   326              (InnerJoinApply
   327                  $newLeft:(EnsureKey $left)
   328                  $input
   329                  []
   330                  $private
   331              )
   332              (AppendAggCols
   333                  $aggregations
   334                  ConstAgg
   335                  (NonKeyCols $newLeft)
   336              )
   337              (AddColsToGrouping
   338                  $groupingPrivate
   339                  (KeyCols $newLeft)
   340              )
   341          )
   342          $on
   343      )
   344      []
   345      (OutputCols2 $left $right)
   346  )
   347  
   348  # TryDecorrelateScalarGroupBy "pushes down" a Join into a ScalarGroupBy
   349  # operator, in an attempt to keep "digging" down to find and eliminate
   350  # unnecessary correlation. The eventual hope is to trigger the DecorrelateJoin
   351  # rule to turn a JoinApply operator into a non-apply Join operator. This rule
   352  # has several requirements:
   353  #
   354  #   1. The left input must have a strict key. If not already present, a key can
   355  #      be synthesized by using the RowNumber operator to uniquely number the
   356  #      rows.
   357  #   2. All aggregate functions must ignore null values, so that they will
   358  #      ignore the null values generated by the left join. We can remap the ones
   359  #      that do not ignore null values:
   360  #       - CountRows is mapped into a Count aggregate that operates over a
   361  #         not-null column from the right input (one is synthesized if
   362  #         necessary).
   363  #       - ConstAgg is mapped into the less restrictive ConstNotNullAgg.
   364  #       - Any other operator which doesn't ignore NULLs can be replaced with a
   365  #         projection taking into account a non-null column (one is synthesized
   366  #         if necessary) to distinguish NULLs which were present in the right
   367  #         input from those that arose from the left join:
   368  #
   369  #         CASE
   370  #           WHEN notnull IS NOT NULL THEN aggregated_value
   371  #           ELSE NULL
   372  #         END
   373  #
   374  #         This works because for every group there is just one left row
   375  #         (because we group by its key), and if there are right rows we can
   376  #         take the aggregation verbatim, but if there were no matches on the
   377  #         right we need to return the appropriate "0 rows" value for that
   378  #         aggregate (which for now is assumed to be NULL).
   379  #
   380  # Example:
   381  #
   382  #   SELECT left.x, left.y, input.*
   383  #   FROM left
   384  #   INNER JOIN LATERAL
   385  #   (
   386  #     SELECT COUNT(*), SUM(c) FROM input WHERE input.x = left.x
   387  #   ) AS input
   388  #   ON left.y = 10
   389  #   =>
   390  #   SELECT CONST_AGG(left.x), CONST_AGG(left.y), COUNT(input.t), SUM(input.c)
   391  #   FROM left WITH ORDINALITY
   392  #   LEFT JOIN LATERAL
   393  #   (
   394  #     SELECT c, True t FROM input WHERE input.x = left.x
   395  #   ) AS input
   396  #   ON True
   397  #   GROUP BY left.ordinality
   398  #   HAVING left.y = 10
   399  #
   400  # Non-null ignoring example:
   401  #
   402  #   SELECT left.x, input.*
   403  #   FROM left
   404  #   INNER JOIN LATERAL
   405  #   (
   406  #     SELECT ARRAY_AGG(c) FORM INPUT WHERE input.x = left.x
   407  #   ) AS input
   408  #   ON left.y = 10
   409  #   =>
   410  #   SELECT
   411  #   CONST_AGG(left.x),
   412  #   CASE
   413  #     WHEN ANY_NOT_NULL(notnull) IS NOT NULL THEN aggregated_value
   414  #     ELSE NULL
   415  #   END
   416  #   FROM left WITH ORDINALITY
   417  #   LEFT JOIN LATERAL
   418  #   (
   419  #     SELECT c, True notnull FROM input WHERE input.x = left.x
   420  #   ) AS input
   421  #   GROUP BY left.ordinality
   422  #   HAVING left.y = 10
   423  #
   424  # In this example, the "notnull" canary is needed to determine if the value of
   425  # the ARRAY_AGG aggregation should be NULL or {NULL}.
   426  #
   427  # An ordinality column only needs to be synthesized if "left" does not already
   428  # have a key. The "true" column only needs to be added if "input" does not
   429  # already have a not-null column (and COUNT(*) is used).
   430  #
   431  # CONST_AGG is an internal aggregation function used when all rows in the
   432  # grouping set have the same value on the column.
   433  #
   434  # Citations: [3] (see identity #9)
   435  [TryDecorrelateScalarGroupBy, Normalize]
   436  (InnerJoin | InnerJoinApply
   437      $left:*
   438      $right:* &
   439          (HasOuterCols $right) &
   440          (ScalarGroupBy
   441              $input:*
   442              $aggregations:*
   443              $groupingPrivate:*
   444          ) &
   445          (AggsCanBeDecorrelated $aggregations)
   446      $on:*
   447      $private:*
   448  )
   449  =>
   450  (Select
   451      (Project
   452          # Needed to project away any columns added by EnsureKey.
   453          # TranslateNonIgnoreAggs is where the actual discriminating CASE
   454          # expressions are introduced.
   455          (TranslateNonIgnoreAggs
   456              (GroupBy
   457                  (LeftJoinApply
   458                      $leftWithKey:(EnsureKey $left)
   459                      # canaryCol might be 0 if no canary is necessary, in which case
   460                      # this function does nothing.
   461                      $rightWithCanary:(EnsureCanary
   462                          $input
   463                          $canaryCol:(EnsureCanaryCol
   464                              $input
   465                              $aggregations
   466                          )
   467                      )
   468                      []
   469                      $private
   470                  )
   471                  (AppendAggCols2
   472                      $translatedAggs:(EnsureAggsCanIgnoreNulls
   473                          $rightWithCanary
   474                          $aggregations
   475                      )
   476                      ConstAgg
   477                      (NonKeyCols $leftWithKey)
   478                      AnyNotNullAgg
   479                      (CanaryColSet $canaryCol)
   480                  )
   481                  (MakeGrouping
   482                      (KeyCols $leftWithKey)
   483                      (ExtractGroupingOrdering $groupingPrivate)
   484                  )
   485              )
   486              $translatedAggs
   487              $rightWithCanary
   488              $aggregations
   489              $canaryCol
   490          )
   491          []
   492          (OutputCols2 $left $right)
   493      )
   494      $on
   495  )
   496  
   497  # TryDecorrelateSemiJoin maps a SemiJoin to an equivalent GroupBy/InnerJoin
   498  # complex in hopes of triggering further rules that will ultimately decorrelate
   499  # the query. Once this rule fires, a corresponding InnerJoin decorrelation rule
   500  # will match (i.e. TryDecorrelateGroupBy or TryDecorrelateProject).
   501  #
   502  # Citations: [5]
   503  [TryDecorrelateSemiJoin, Normalize]
   504  (SemiJoin | SemiJoinApply
   505      $left:*
   506      $right:* &
   507          (HasOuterCols $right) &
   508          (CanHaveZeroRows $right) &
   509  
   510          # Let EliminateExistsGroupBy match instead.
   511          (GroupBy | DistinctOn | Project | ProjectSet)
   512      $on:*
   513      $private:*
   514  )
   515  =>
   516  (Project
   517      # Needed to project away any columns added by EnsureKey.
   518      (GroupBy
   519          (InnerJoinApply
   520              $newLeft:(EnsureKey $left)
   521              $right
   522              $on
   523              $private
   524          )
   525          (MakeAggCols ConstAgg (NonKeyCols $newLeft))
   526          (MakeGrouping (KeyCols $newLeft) (EmptyOrdering))
   527      )
   528      []
   529      (OutputCols2 $left $right)
   530  )
   531  
   532  # TryDecorrelateLimitOne "pushes down" a Join into a Limit 1 operator, in an
   533  # attempt to keep "digging" down to find and eliminate unnecessary correlation.
   534  # The eventual hope is to trigger the DecorrelateJoin rule to turn a JoinApply
   535  # operator into a non-apply Join operator.
   536  #
   537  # Like the TryDecorrelateGroupBy and TryDecorrelateScalarGroupBy rules, this
   538  # rule rewrites the expression to perform the join first, followed by a grouping
   539  # that eliminates any extra rows introduced by the join. The DistinctOn operator
   540  # uses First aggregates to select values from the first row in each group. Non-
   541  # key columns from the left join input become Const aggregates, since they are
   542  # functionally dependent on the grouped key columns (and are therefore constant
   543  # in each group).
   544  #
   545  # TODO(andyk): Add other join types.
   546  [TryDecorrelateLimitOne, Normalize]
   547  (InnerJoin | InnerJoinApply | LeftJoin | LeftJoinApply
   548      $left:*
   549      $right:* &
   550          (HasOuterCols $right) &
   551          (Limit $input:* (Const 1) $ordering:*)
   552      $on:*
   553      $private:*
   554  )
   555  =>
   556  (Project
   557      # Needed to project away any columns added by EnsureKey.
   558      (DistinctOn
   559          ((OpName) $newLeft:(EnsureKey $left) $input $on $private)
   560          (MakeAggCols2
   561              ConstAgg
   562              (NonKeyCols $newLeft)
   563              FirstAgg
   564              (OutputCols $input)
   565          )
   566          (MakeGrouping (KeyCols $newLeft) $ordering)
   567      )
   568      []
   569      (OutputCols2 $left $right)
   570  )
   571  
   572  # TryDecorrelateProjectSet "pushes down" an InnerJoinApply operator into a
   573  # ProjectSet operator, in hopes of eliminating any correlation between the
   574  # ProjectSet operator and the InnerJoinApply operator. Eventually, the
   575  # hope is to trigger the DecorrelateJoin pattern to turn JoinApply operators
   576  # into non-apply Join operators.
   577  [TryDecorrelateProjectSet, Normalize]
   578  (InnerJoinApply
   579      $left:*
   580      (ProjectSet $input:* $zip:*)
   581      $on:*
   582      $private:*
   583  )
   584  =>
   585  (Select
   586      (ProjectSet (InnerJoinApply $left $input [] $private) $zip)
   587      $on
   588  )
   589  
   590  # TryDecorrelateWindow "pushes down" a Join into a Window operator, in an
   591  # attempt to keep "digging" down to find and eliminate unnecessary correlation.
   592  # The eventual hope is to trigger the DecorrelateJoin rule to turn a JoinApply
   593  # operator into a non-apply Join operator. This rule is very similar to
   594  # TryDecorrelateGroupBy.
   595  #
   596  # This rule adds the output columns of the left side of the join to the Window
   597  # operator's partition cols. This effectively means that each row of the left
   598  # side of the join is windowed independently, assuming the left side has a key
   599  # (and if it doesn't, we can give it one via EnsureKey).
   600  #
   601  # SELECT
   602  #     left.k, left.x, right.x, rank
   603  # FROM
   604  #   left
   605  #   INNER JOIN LATERAL (
   606  #     SELECT rank() OVER () AS rank, right.x FROM (SELECT * FROM right WHERE left.k = right.k)
   607  #   )
   608  # =>
   609  # SELECT
   610  #   left.k, left.x, right.x, rank() OVER (PARTITION BY left.k) AS rank
   611  # FROM
   612  #   left INNER JOIN right ON left.k = right.k
   613  #
   614  # Sketch of why this rule works (assume A has a key):
   615  #
   616  # Recall from [3] that the definition of Apply (for cross joins) is:
   617  #
   618  #  (InnerJoinApply A E true) = (Union_{r ∈ A} {r} × E(r))
   619  #
   620  # Where E is a relational expression mapping rows r ∈ A to relational result
   621  # sets.
   622  #
   623  # Starting with (InnerJoinApply A (Window B partcols) on), where P is the set of
   624  # partition columns and p is the join predicate.
   625  #
   626  #  = (Select (InnerJoinApply A (Window B partcols) true) on)
   627  #
   628  # By the inverse of MergeSelectInnerJoin.
   629  #
   630  #  = (Select
   631  #      (Union_{r ∈ A} {r} × (Window B partcols)(r))
   632  #      on
   633  #    )
   634  #
   635  # By the definition of Apply.
   636  #
   637  #  = (Select
   638  #      (Union_{r ∈ A} {r} × (Window B(r) partcols))
   639  #      on
   640  #    )
   641  #
   642  # By the fact that by construction, window functions only refer to
   643  # variable references in their input.
   644  #
   645  #  = (Select
   646  #      (Union_{r ∈ A} (Window {r} × B(r) partcols))
   647  #      on
   648  #    )
   649  #
   650  # Because the Window only looks at columns from B(r).
   651  #
   652  #  = (Select
   653  #      (Window
   654  #        (Union_{r ∈ A} {r} × B(r))
   655  #        (Union partcols (KeyCols A))
   656  #      )
   657  #      on
   658  #    )
   659  #
   660  # Roughly, since A has a key, partitioning (Union_{r ∈ A} r × B(r)) by the key
   661  # of A results in exactly one partition for each row in A, and so partitioning
   662  # higher up has the same effect as performing the window function for each row.
   663  #
   664  #  = (Select
   665  #      (Window
   666  #        (InnerJoinApply A B true)
   667  #        (Union partcols (OutputCols A))
   668  #      )
   669  #      on
   670  #    )
   671  #
   672  # Again by the definition of Apply.
   673  [TryDecorrelateWindow, Normalize]
   674  (InnerJoinApply | InnerJoin
   675      $left:*
   676      $right:(Window $input:* $windows:* $private:*) &
   677          (HasOuterCols $right)
   678      $on:*
   679      $joinPrivate:*
   680  )
   681  =>
   682  (Project
   683      # Needed to project away any columns added by EnsureKey.
   684      (Select
   685          (Window
   686              ((OpName)
   687                  $newLeft:(EnsureKey $left)
   688                  $input
   689                  []
   690                  $joinPrivate
   691              )
   692              $windows
   693              (AddColsToPartition $private (KeyCols $newLeft))
   694          )
   695          $on
   696      )
   697      []
   698      (OutputCols2 $left $right)
   699  )
   700  
   701  # TryDecorrelateMax1Row "pushes down" a Join into a Max1Row operator, in an
   702  # attempt to keep "digging" down to find and eliminate unnecessary correlation.
   703  # The eventual hope is to trigger the DecorrelateJoin rule to turn a JoinApply
   704  # operator into a non-apply Join operator.
   705  #
   706  # The Max1Row operator is mapped into an EnsureDistinctOn operator that wraps
   707  # the join and raises an error if it detects duplicates in the column(s) that
   708  # made up the key of the join's left input. A duplicate value in those key
   709  # column(s) indicates that more than one row from the right input matched that
   710  # value. Or in other words, it indicates that the Max1Row's subquery input would
   711  # have returned more than one row corresponding to that value. Therefore, the
   712  # two formulations are equivalent.
   713  #
   714  # TryDecorrelateMax1Row only matches when the join's "on" condition is true.
   715  # This is because pushing a non-true filter through the EnsureDistinctOn would
   716  # result in different error behavior. Since there are currently no situations
   717  # where the join's "on" condition is anything other than true, and since these
   718  # cases therefore cannot be tested, TryDecorrelateMax1Row only matches when the
   719  # "on" condition is true. If this changes, TryDecorrelateMax1Row should hoist
   720  # the non-true "on" conditions above the EnsureDistinctOn operator.
   721  [TryDecorrelateMax1Row, Normalize]
   722  (InnerJoin | InnerJoinApply | LeftJoin | LeftJoinApply
   723      $left:*
   724      $right:* &
   725          (HasOuterCols $right) &
   726          (Max1Row $input:* $errorText:*)
   727      []
   728      $private:*
   729  )
   730  =>
   731  (Project
   732      (EnsureDistinctOn
   733          ((OpName) $newLeft:(EnsureKey $left) $input [] $private)
   734          (MakeAggCols
   735              ConstAgg
   736              (UnionCols (NonKeyCols $newLeft) (OutputCols $input))
   737          )
   738          (MakeErrorOnDupGrouping
   739              (KeyCols $newLeft)
   740              (EmptyOrdering)
   741              $errorText
   742          )
   743      )
   744      []
   745      (OutputCols2 $left $right)
   746  )
   747  
   748  # HoistSelectExists extracts existential subqueries from Select filters,
   749  # turning them into semi-joins. This eliminates the subquery, which is often
   750  # expensive to execute and restricts the optimizer's plan choices.
   751  #
   752  # This rule is marked as low priority so that it runs after other rules like
   753  # filter pushdown. Hoisting a correlated subquery is an expensive operation that
   754  # can't be undone, so do it only once all other work is complete. For example,
   755  # filter pushdown rules might be able to move the subquery nearer to the input
   756  # to which it's correlated before it's hoisted, making it easier to decorrelate.
   757  [HoistSelectExists, Normalize, LowPriority]
   758  (Select
   759      $input:*
   760      $filters:[
   761          ...
   762          $item:* &
   763              (HasHoistableSubquery $item) &
   764              (FiltersItem (Exists $subquery:*))
   765          ...
   766      ]
   767  )
   768  =>
   769  (Select
   770      (SemiJoinApply $input $subquery [] (EmptyJoinPrivate))
   771      (RemoveFiltersItem $filters $item)
   772  )
   773  
   774  # HoistSelectNotExists extracts non-existential subqueries from Select filters,
   775  # turning them into anti-joins. This eliminates the subquery, which is often
   776  # expensive to execute and restricts the optimizer's plan choices.
   777  #
   778  # This rule is marked as low priority for the same reason as HoistSelectExists.
   779  [HoistSelectNotExists, Normalize, LowPriority]
   780  (Select
   781      $input:*
   782      $filters:[
   783          ...
   784          $item:* &
   785              (HasHoistableSubquery $item) &
   786              (FiltersItem (Not (Exists $subquery:*)))
   787          ...
   788      ]
   789  )
   790  =>
   791  (Select
   792      (AntiJoinApply $input $subquery [] (EmptyJoinPrivate))
   793      (RemoveFiltersItem $filters $item)
   794  )
   795  
   796  # HoistSelectSubquery extracts subqueries from a Select filter and joins them
   797  # with the Select input. This and other subquery hoisting patterns create a
   798  # single, top-level relational query with no nesting.
   799  #
   800  # NOTE: Keep this ordered after the HoistSelectExists and HoistSelectNotExists
   801  #       rules. This rule will hoist any existential subqueries using
   802  #       LeftJoinApply, which is equivalent to, but not as efficient as, using
   803  #       SemiJoinApply and AntiJoinApply.
   804  #
   805  # This rule is marked as low priority for the same reason as HoistSelectExists.
   806  #
   807  # Citations: [4]
   808  [HoistSelectSubquery, Normalize, LowPriority]
   809  (Select
   810      $input:*
   811      $filters:[ ... $item:* & (HasHoistableSubquery $item) ... ]
   812  )
   813  =>
   814  (HoistSelectSubquery $input $filters)
   815  
   816  # HoistProjectSubquery extracts subqueries from a projections list and joins
   817  # them with the Project input. This and other subquery hoisting patterns create
   818  # a single, top-level relational query with no nesting.
   819  #
   820  # This rule is marked as low priority for the same reason as HoistSelectExists.
   821  [HoistProjectSubquery, Normalize, LowPriority]
   822  (Project
   823      $input:*
   824      $projections:[
   825          ...
   826          $item:* & (HasHoistableSubquery $item)
   827          ...
   828      ]
   829      $passthrough:*
   830  )
   831  =>
   832  (HoistProjectSubquery $input $projections $passthrough)
   833  
   834  # HoistJoinSubquery extracts subqueries from a join filter and joins them with
   835  # the join's right input. This and other subquery hoisting patterns create a
   836  # single, top-level relational query with no nesting. This rule only applies to
   837  # join types which have a legal apply variant.
   838  #
   839  # This rule is marked as low priority for the same reason as HoistSelectExists.
   840  [HoistJoinSubquery, Normalize, LowPriority]
   841  (InnerJoin | LeftJoin | SemiJoin | AntiJoin
   842      $left:*
   843      $right:*
   844      $on:[ ... $item:* & (HasHoistableSubquery $item) ... ]
   845      $private:*
   846  )
   847  =>
   848  (HoistJoinSubquery (OpName) $left $right $on $private)
   849  
   850  # HoistValuesSubquery extracts subqueries from row tuples and joins them with
   851  # the Values operator. This and other subquery hoisting patterns create a
   852  # single, top-level relational query with no nesting.
   853  #
   854  # This rule is marked as low priority for the same reason as HoistSelectExists.
   855  [HoistValuesSubquery, Normalize, LowPriority]
   856  (Values
   857      $rows:[ ... $item:* & (HasHoistableSubquery $item) ... ]
   858      $private:*
   859  )
   860  =>
   861  (HoistValuesSubquery $rows $private)
   862  
   863  # HoistProjectSetSubquery extracts subqueries from zipped functions and joins
   864  # them with the ProjectSet operator's input. This and other subquery hoisting
   865  # patterns create a single, top-level relational query with no nesting.
   866  #
   867  # This rule is marked as low priority for the same reason as HoistSelectExists.
   868  [HoistProjectSetSubquery, Normalize, LowPriority]
   869  (ProjectSet
   870      $input:*
   871      $zip:[ ... $item:* & (HasHoistableSubquery $item) ... ]
   872  )
   873  =>
   874  (HoistProjectSetSubquery $input $zip)
   875  
   876  # NormalizeSelectAnyFilter rewrites an Any expression that is a top-level
   877  # conjunct in Select filters, turning it into an Exists expression. Any can be
   878  # rewritten as Exists in this context because a NULL return value is treated as
   879  # False by the filter.
   880  #
   881  # Exists is more efficient than Any, since its null handling is much simpler. In
   882  # addition, the Exists can be transformed into a semi-join.
   883  #
   884  # Citations: [5] (section 3.5)
   885  [NormalizeSelectAnyFilter, Normalize]
   886  (Select
   887      $input:*
   888      $filters:[
   889          ...
   890          $item:(FiltersItem
   891              (Any $anyInput:* $scalar:* $anyPrivate:*)
   892          )
   893          ...
   894      ]
   895  )
   896  =>
   897  (Select
   898      $input
   899      (ReplaceFiltersItem
   900          $filters
   901          $item
   902          (Exists
   903              (Select
   904                  $anyInput
   905                  [
   906                      (FiltersItem
   907                          (ConstructAnyCondition
   908                              $anyInput
   909                              $scalar
   910                              $anyPrivate
   911                          )
   912                      )
   913                  ]
   914              )
   915              $anyPrivate
   916          )
   917      )
   918  )
   919  
   920  # NormalizeJoinAnyFilter is similar to NormalizeSelectAnyFilter, except that it
   921  # operates on Any expressions within Join filters rather than Select filters.
   922  [NormalizeJoinAnyFilter, Normalize]
   923  (Join
   924      $left:*
   925      $right:*
   926      $on:[
   927          ...
   928          $item:(FiltersItem
   929              (Any $anyInput:* $scalar:* $anyPrivate:*)
   930          )
   931          ...
   932      ]
   933      $private:*
   934  )
   935  =>
   936  ((OpName)
   937      $left
   938      $right
   939      (ReplaceFiltersItem
   940          $on
   941          $item
   942          (Exists
   943              (Select
   944                  $anyInput
   945                  [
   946                      (FiltersItem
   947                          (ConstructAnyCondition
   948                              $anyInput
   949                              $scalar
   950                              $anyPrivate
   951                          )
   952                      )
   953                  ]
   954              )
   955              $anyPrivate
   956          )
   957      )
   958      $private
   959  )
   960  
   961  # NormalizeSelectNotAnyFilter rewrites a Not Any expression that is a top-level
   962  # conjunct in Select filters, turning it into a Not Exists expression. Not Any
   963  # can be rewritten as Not Exists in this context because a NULL return value is
   964  # treated as False by the filter.
   965  #
   966  # Not Exists is more efficient than Not Any, since its null handling is much
   967  # simpler. In addition, the Not Exists can be transformed into an anti-join.
   968  #
   969  # Citations: [5] (section 3.5)
   970  [NormalizeSelectNotAnyFilter, Normalize]
   971  (Select
   972      $input:*
   973      $filters:[
   974          ...
   975          $item:(FiltersItem
   976              (Not (Any $anyInput:* $scalar:* $anyPrivate:*))
   977          )
   978          ...
   979      ]
   980  )
   981  =>
   982  (Select
   983      $input
   984      (ReplaceFiltersItem
   985          $filters
   986          $item
   987          (Not
   988              (Exists
   989                  (Select
   990                      $anyInput
   991                      [
   992                          (FiltersItem
   993                              (IsNot
   994                                  (ConstructAnyCondition
   995                                      $anyInput
   996                                      $scalar
   997                                      $anyPrivate
   998                                  )
   999                                  (False)
  1000                              )
  1001                          )
  1002                      ]
  1003                  )
  1004                  $anyPrivate
  1005              )
  1006          )
  1007      )
  1008  )
  1009  
  1010  # NormalizeJoinNotAnyFilter is similar to NormalizeSelectNotAnyFilter, except
  1011  # that it operates on Not Any expressions within Join filters rather than Select
  1012  # filters.
  1013  [NormalizeJoinNotAnyFilter, Normalize]
  1014  (Join
  1015      $left:*
  1016      $right:*
  1017      $on:[
  1018          ...
  1019          $item:(FiltersItem
  1020              (Not (Any $anyInput:* $scalar:* $anyPrivate:*))
  1021          )
  1022          ...
  1023      ]
  1024      $private:*
  1025  )
  1026  =>
  1027  ((OpName)
  1028      $left
  1029      $right
  1030      (ReplaceFiltersItem
  1031          $on
  1032          $item
  1033          (Not
  1034              (Exists
  1035                  (Select
  1036                      $anyInput
  1037                      [
  1038                          (FiltersItem
  1039                              (IsNot
  1040                                  (ConstructAnyCondition
  1041                                      $anyInput
  1042                                      $scalar
  1043                                      $anyPrivate
  1044                                  )
  1045                                  (False)
  1046                              )
  1047                          )
  1048                      ]
  1049                  )
  1050                  $anyPrivate
  1051              )
  1052          )
  1053      )
  1054      $private
  1055  )