github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/xform/explorer.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package xform
    12  
    13  import (
    14  	"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
    15  	"github.com/cockroachdb/cockroach/pkg/sql/opt/norm"
    16  	"github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical"
    17  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    18  	"github.com/cockroachdb/cockroach/pkg/util"
    19  )
    20  
    21  // explorer generates alternate expressions that are logically equivalent to
    22  // existing expressions in the memo. The new expressions are added to the same
    23  // memo group as the existing expression. The optimizer will cost all the
    24  // expressions and pick the lowest cost alternative that provides any required
    25  // physical properties.
    26  //
    27  // Equivalent expressions are generated by exploration rules. An exploration
    28  // rule efficiently enumerates all possible combinations of its sub-expressions
    29  // in order to look for matches. For example:
    30  //
    31  //  // [AssociateJoin]
    32  //  (InnerJoin
    33  //    (InnerJoin $r:* $s:* $lowerOn:*)
    34  //    $t:*
    35  //    $upperOn:*
    36  //  )
    37  //  =>
    38  //  ...
    39  //
    40  // Say the memo group containing the upper inner-join has 3 expressions in it,
    41  // and the memo group containing the lower inner-join has 4 expressions. Then
    42  // the explorer will enumerate 12 possible expression combinations, looking for
    43  // a combination that has an InnerJoin operator with another InnerJoin operator
    44  // as its left operand.
    45  //
    46  // Once new expressions have been added to a group, they themselves become
    47  // eligible for exploration, which might generate further expressions, and so
    48  // on. Because the same group will often be explored multiple times, the
    49  // explorer keeps state which helps it avoid duplicate work during subsequent
    50  // passes.
    51  //
    52  // The explorer only traverses expression trees to the depth required by the
    53  // exploration match patterns. It expects the optimizer to call exploreGroup
    54  // for each group that needs to be explored. The optimizer can then use branch
    55  // and bound pruning to skip exploration of entire sub-trees.
    56  //
    57  // For each expression combination that matches, a replace expression is
    58  // constructed and added to the same memo group as the matched expression:
    59  //
    60  //  // [AssociateJoin]
    61  //  (InnerJoin
    62  //    (InnerJoin $r:* $s:* $lowerOn:*)
    63  //    $t:*
    64  //    $upperOn:*
    65  //  )
    66  //  =>
    67  //  (InnerJoin
    68  //    (InnerJoin
    69  //      $r
    70  //      $t
    71  //      (ConstructFiltersNotUsing $s $lowerOn $upperOn)
    72  //    )
    73  //    $s
    74  //    (ConstructFiltersUsing $s $lowerOn $upperOn)
    75  //  )
    76  //
    77  // In this example, if the upper and lower groups each contain two InnerJoin
    78  // expressions, then four new expressions will be added to the memo group of the
    79  // matched expression. During the next pass, the four new expressions will
    80  // themselves match this same rule. However, adding their replace expressions to
    81  // the memo group will be a no-op, because they're already present.
    82  type explorer struct {
    83  	evalCtx *tree.EvalContext
    84  	o       *Optimizer
    85  	f       *norm.Factory
    86  	mem     *memo.Memo
    87  
    88  	// funcs is the struct used to call all custom match and replace functions
    89  	// used by the exploration rules. It wraps an unnamed xfunc.CustomFuncs,
    90  	// so it provides a clean interface for calling functions from both the xform
    91  	// and xfunc packages using the same prefix.
    92  	funcs CustomFuncs
    93  }
    94  
    95  // init initializes the explorer for use (or reuse).
    96  func (e *explorer) init(o *Optimizer) {
    97  	e.evalCtx = o.evalCtx
    98  	e.o = o
    99  	e.f = o.Factory()
   100  	e.mem = o.mem
   101  	e.funcs.Init(e)
   102  }
   103  
   104  // exploreGroup generates alternate expressions that are logically equivalent
   105  // to the expressions already in the given group, and adds them to the group.
   106  // The explorer maintains state that tracks which expressions were explored in
   107  // previous passes. It keeps "start" and "end" expressions for the group which
   108  // track the expressions which need to be fully explored during the current
   109  // pass. Each time exploreGroup is called, the end of the previous pass becomes
   110  // the start of the next pass. For example:
   111  //
   112  //   pass1         pass2         pass3
   113  //      <-start
   114  //   e0            e0            e0
   115  //      <-end         <-start
   116  //   e1 (new)      e1            e1
   117  //
   118  //   e2 (new)      e2            e2
   119  //                    <-end         <-start
   120  //                 e3 (new)      e3
   121  //                                  <-end
   122  //
   123  // For rules which match one or more sub-expressions in addition to the top-
   124  // level expression, there is extra complexity because every combination needs
   125  // to be considered. Even expressions which were explored in previous passes
   126  // need to be partially re-explored, because they may match when considered in
   127  // combination with a new sub-expression which wasn't present during the last
   128  // pass. Only combinations which consist solely of old expressions can be
   129  // skipped.
   130  //
   131  // Combination enumeration code is just a series of nested loops generated by
   132  // Optgen. Each non-scalar match pattern or sub-pattern uses a loop to
   133  // enumerate the expressions in the corresponding memo group. For example:
   134  //
   135  //   $join:(InnerJoin
   136  //     $left:(InnerJoin)
   137  //     $right:(Select)
   138  //     $on:*
   139  //   )
   140  //
   141  // This match pattern would be implemented with 3 nested loops: 1 each for the
   142  // $join, $left, and $right memo groups. If $join had 2 expressions, $left had
   143  // 3 expressions, and $right had 2 expressions, then 2 * 3 * 2 = 12 combos will
   144  // be considered. The innermost loop can skip iteration if all outer loops are
   145  // bound to expressions which have already been explored in previous passes:
   146  //
   147  //   for e1 in memo-exprs($join):
   148  //     for e2 in memo-exprs($left):
   149  //       for e3 in memo-exprs($right):
   150  //         if ordinal(e3) >= state.start:
   151  //           ... explore (e1, e2, e3) combo ...
   152  //
   153  func (e *explorer) exploreGroup(grp memo.RelExpr) *exploreState {
   154  	// Do nothing if this group has already been fully explored.
   155  	state := e.ensureExploreState(grp)
   156  	if state.fullyExplored {
   157  		return state
   158  	}
   159  
   160  	// Update set of group members that will be considered during this pass, by
   161  	// setting the start member to be the end expression from last pass.
   162  	state.start = state.end
   163  	state.end = 0
   164  	for member := grp; member != nil; member = member.NextExpr() {
   165  		state.end++
   166  	}
   167  
   168  	var member memo.RelExpr
   169  	var i int
   170  	fullyExplored := true
   171  	for i, member = 0, grp; i < state.end; i, member = i+1, member.NextExpr() {
   172  		// If member was fully explored in previous passes, then nothing further
   173  		// to do.
   174  		if state.isMemberFullyExplored(i) {
   175  			continue
   176  		}
   177  
   178  		if memberExplored := e.exploreGroupMember(state, member, i); memberExplored {
   179  			// No more rules can ever match this expression, so skip it in
   180  			// future passes.
   181  			state.markMemberAsFullyExplored(i)
   182  		} else {
   183  			// If even one member is not fully explored, then the group is not
   184  			// fully explored.
   185  			fullyExplored = false
   186  		}
   187  	}
   188  
   189  	// If new group members were added by the explorer, then the group has not
   190  	// yet been fully explored.
   191  	if fullyExplored && member == nil {
   192  		state.fullyExplored = true
   193  	}
   194  	return state
   195  }
   196  
   197  // lookupExploreState returns the optState struct associated with the memo
   198  // group.
   199  func (e *explorer) lookupExploreState(grp memo.RelExpr) *exploreState {
   200  	return &e.o.lookupOptState(grp, physical.MinRequired).explore
   201  }
   202  
   203  // ensureExploreState allocates the exploration state in the optState struct
   204  // associated with the memo group, with respect to the min physical props.
   205  func (e *explorer) ensureExploreState(grp memo.RelExpr) *exploreState {
   206  	return &e.o.ensureOptState(grp, physical.MinRequired).explore
   207  }
   208  
   209  // ----------------------------------------------------------------------
   210  //
   211  // Exploration state
   212  //
   213  // ----------------------------------------------------------------------
   214  
   215  // exploreState contains state needed by the explorer for each memo group it
   216  // explores. The state is allocated lazily for a group when the exploreGroup
   217  // method is called. Various fields record what exploration has taken place so
   218  // that the same work isn't repeated.
   219  type exploreState struct {
   220  	// start (inclusive) and end (exclusive) specify which expressions need to
   221  	// be explored in the current pass. Expressions < start have been partly
   222  	// explored during previous passes. Expressions >= end are new expressions
   223  	// added during the current pass.
   224  	start int
   225  	end   int
   226  
   227  	// fullyExplored is set to true once all members of the group have been fully
   228  	// explored, meaning that no new members will ever be added to the group, or
   229  	// to dependent child groups. Further exploration of the group can be skipped.
   230  	fullyExplored bool
   231  
   232  	// fullyExploredMembers is a set of ordinal positions of members within the
   233  	// memo group. Once a member expression has been fully explored, its ordinal
   234  	// is added to this set.
   235  	fullyExploredMembers util.FastIntSet
   236  }
   237  
   238  // isMemberFullyExplored is true if the member at the given ordinal position
   239  // within the group will never match an additional rule, and can therefore be
   240  // skipped in future exploration passes.
   241  func (e *exploreState) isMemberFullyExplored(ordinal int) bool {
   242  	return e.fullyExploredMembers.Contains(ordinal)
   243  }
   244  
   245  // markMemberAsFullyExplored is called when all possible matching combinations
   246  // have been considered for the subtree rooted at the given expression. Even if
   247  // there are more exploration passes, this expression will never have new
   248  // children, grand-children, etc. that might cause it to match another rule.
   249  func (e *exploreState) markMemberAsFullyExplored(ordinal int) {
   250  	e.fullyExploredMembers.Add(ordinal)
   251  }