github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/xform/explorer.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package xform 12 13 import ( 14 "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" 15 "github.com/cockroachdb/cockroach/pkg/sql/opt/norm" 16 "github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical" 17 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 18 "github.com/cockroachdb/cockroach/pkg/util" 19 ) 20 21 // explorer generates alternate expressions that are logically equivalent to 22 // existing expressions in the memo. The new expressions are added to the same 23 // memo group as the existing expression. The optimizer will cost all the 24 // expressions and pick the lowest cost alternative that provides any required 25 // physical properties. 26 // 27 // Equivalent expressions are generated by exploration rules. An exploration 28 // rule efficiently enumerates all possible combinations of its sub-expressions 29 // in order to look for matches. For example: 30 // 31 // // [AssociateJoin] 32 // (InnerJoin 33 // (InnerJoin $r:* $s:* $lowerOn:*) 34 // $t:* 35 // $upperOn:* 36 // ) 37 // => 38 // ... 39 // 40 // Say the memo group containing the upper inner-join has 3 expressions in it, 41 // and the memo group containing the lower inner-join has 4 expressions. Then 42 // the explorer will enumerate 12 possible expression combinations, looking for 43 // a combination that has an InnerJoin operator with another InnerJoin operator 44 // as its left operand. 45 // 46 // Once new expressions have been added to a group, they themselves become 47 // eligible for exploration, which might generate further expressions, and so 48 // on. Because the same group will often be explored multiple times, the 49 // explorer keeps state which helps it avoid duplicate work during subsequent 50 // passes. 51 // 52 // The explorer only traverses expression trees to the depth required by the 53 // exploration match patterns. It expects the optimizer to call exploreGroup 54 // for each group that needs to be explored. The optimizer can then use branch 55 // and bound pruning to skip exploration of entire sub-trees. 56 // 57 // For each expression combination that matches, a replace expression is 58 // constructed and added to the same memo group as the matched expression: 59 // 60 // // [AssociateJoin] 61 // (InnerJoin 62 // (InnerJoin $r:* $s:* $lowerOn:*) 63 // $t:* 64 // $upperOn:* 65 // ) 66 // => 67 // (InnerJoin 68 // (InnerJoin 69 // $r 70 // $t 71 // (ConstructFiltersNotUsing $s $lowerOn $upperOn) 72 // ) 73 // $s 74 // (ConstructFiltersUsing $s $lowerOn $upperOn) 75 // ) 76 // 77 // In this example, if the upper and lower groups each contain two InnerJoin 78 // expressions, then four new expressions will be added to the memo group of the 79 // matched expression. During the next pass, the four new expressions will 80 // themselves match this same rule. However, adding their replace expressions to 81 // the memo group will be a no-op, because they're already present. 82 type explorer struct { 83 evalCtx *tree.EvalContext 84 o *Optimizer 85 f *norm.Factory 86 mem *memo.Memo 87 88 // funcs is the struct used to call all custom match and replace functions 89 // used by the exploration rules. It wraps an unnamed xfunc.CustomFuncs, 90 // so it provides a clean interface for calling functions from both the xform 91 // and xfunc packages using the same prefix. 92 funcs CustomFuncs 93 } 94 95 // init initializes the explorer for use (or reuse). 96 func (e *explorer) init(o *Optimizer) { 97 e.evalCtx = o.evalCtx 98 e.o = o 99 e.f = o.Factory() 100 e.mem = o.mem 101 e.funcs.Init(e) 102 } 103 104 // exploreGroup generates alternate expressions that are logically equivalent 105 // to the expressions already in the given group, and adds them to the group. 106 // The explorer maintains state that tracks which expressions were explored in 107 // previous passes. It keeps "start" and "end" expressions for the group which 108 // track the expressions which need to be fully explored during the current 109 // pass. Each time exploreGroup is called, the end of the previous pass becomes 110 // the start of the next pass. For example: 111 // 112 // pass1 pass2 pass3 113 // <-start 114 // e0 e0 e0 115 // <-end <-start 116 // e1 (new) e1 e1 117 // 118 // e2 (new) e2 e2 119 // <-end <-start 120 // e3 (new) e3 121 // <-end 122 // 123 // For rules which match one or more sub-expressions in addition to the top- 124 // level expression, there is extra complexity because every combination needs 125 // to be considered. Even expressions which were explored in previous passes 126 // need to be partially re-explored, because they may match when considered in 127 // combination with a new sub-expression which wasn't present during the last 128 // pass. Only combinations which consist solely of old expressions can be 129 // skipped. 130 // 131 // Combination enumeration code is just a series of nested loops generated by 132 // Optgen. Each non-scalar match pattern or sub-pattern uses a loop to 133 // enumerate the expressions in the corresponding memo group. For example: 134 // 135 // $join:(InnerJoin 136 // $left:(InnerJoin) 137 // $right:(Select) 138 // $on:* 139 // ) 140 // 141 // This match pattern would be implemented with 3 nested loops: 1 each for the 142 // $join, $left, and $right memo groups. If $join had 2 expressions, $left had 143 // 3 expressions, and $right had 2 expressions, then 2 * 3 * 2 = 12 combos will 144 // be considered. The innermost loop can skip iteration if all outer loops are 145 // bound to expressions which have already been explored in previous passes: 146 // 147 // for e1 in memo-exprs($join): 148 // for e2 in memo-exprs($left): 149 // for e3 in memo-exprs($right): 150 // if ordinal(e3) >= state.start: 151 // ... explore (e1, e2, e3) combo ... 152 // 153 func (e *explorer) exploreGroup(grp memo.RelExpr) *exploreState { 154 // Do nothing if this group has already been fully explored. 155 state := e.ensureExploreState(grp) 156 if state.fullyExplored { 157 return state 158 } 159 160 // Update set of group members that will be considered during this pass, by 161 // setting the start member to be the end expression from last pass. 162 state.start = state.end 163 state.end = 0 164 for member := grp; member != nil; member = member.NextExpr() { 165 state.end++ 166 } 167 168 var member memo.RelExpr 169 var i int 170 fullyExplored := true 171 for i, member = 0, grp; i < state.end; i, member = i+1, member.NextExpr() { 172 // If member was fully explored in previous passes, then nothing further 173 // to do. 174 if state.isMemberFullyExplored(i) { 175 continue 176 } 177 178 if memberExplored := e.exploreGroupMember(state, member, i); memberExplored { 179 // No more rules can ever match this expression, so skip it in 180 // future passes. 181 state.markMemberAsFullyExplored(i) 182 } else { 183 // If even one member is not fully explored, then the group is not 184 // fully explored. 185 fullyExplored = false 186 } 187 } 188 189 // If new group members were added by the explorer, then the group has not 190 // yet been fully explored. 191 if fullyExplored && member == nil { 192 state.fullyExplored = true 193 } 194 return state 195 } 196 197 // lookupExploreState returns the optState struct associated with the memo 198 // group. 199 func (e *explorer) lookupExploreState(grp memo.RelExpr) *exploreState { 200 return &e.o.lookupOptState(grp, physical.MinRequired).explore 201 } 202 203 // ensureExploreState allocates the exploration state in the optState struct 204 // associated with the memo group, with respect to the min physical props. 205 func (e *explorer) ensureExploreState(grp memo.RelExpr) *exploreState { 206 return &e.o.ensureOptState(grp, physical.MinRequired).explore 207 } 208 209 // ---------------------------------------------------------------------- 210 // 211 // Exploration state 212 // 213 // ---------------------------------------------------------------------- 214 215 // exploreState contains state needed by the explorer for each memo group it 216 // explores. The state is allocated lazily for a group when the exploreGroup 217 // method is called. Various fields record what exploration has taken place so 218 // that the same work isn't repeated. 219 type exploreState struct { 220 // start (inclusive) and end (exclusive) specify which expressions need to 221 // be explored in the current pass. Expressions < start have been partly 222 // explored during previous passes. Expressions >= end are new expressions 223 // added during the current pass. 224 start int 225 end int 226 227 // fullyExplored is set to true once all members of the group have been fully 228 // explored, meaning that no new members will ever be added to the group, or 229 // to dependent child groups. Further exploration of the group can be skipped. 230 fullyExplored bool 231 232 // fullyExploredMembers is a set of ordinal positions of members within the 233 // memo group. Once a member expression has been fully explored, its ordinal 234 // is added to this set. 235 fullyExploredMembers util.FastIntSet 236 } 237 238 // isMemberFullyExplored is true if the member at the given ordinal position 239 // within the group will never match an additional rule, and can therefore be 240 // skipped in future exploration passes. 241 func (e *exploreState) isMemberFullyExplored(ordinal int) bool { 242 return e.fullyExploredMembers.Contains(ordinal) 243 } 244 245 // markMemberAsFullyExplored is called when all possible matching combinations 246 // have been considered for the subtree rooted at the given expression. Even if 247 // there are more exploration passes, this expression will never have new 248 // children, grand-children, etc. that might cause it to match another rule. 249 func (e *exploreState) markMemberAsFullyExplored(ordinal int) { 250 e.fullyExploredMembers.Add(ordinal) 251 }