github.com/dolthub/go-mysql-server@v0.18.0/sql/analyzer/resolve_subqueries.go (about) 1 // Copyright 2020-2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package analyzer 16 17 import ( 18 "github.com/dolthub/go-mysql-server/sql" 19 "github.com/dolthub/go-mysql-server/sql/analyzer/analyzererrors" 20 "github.com/dolthub/go-mysql-server/sql/plan" 21 "github.com/dolthub/go-mysql-server/sql/transform" 22 ) 23 24 // resolveSubqueries runs analysis on each subquery expression and subquery alias in the specified node tree. 25 // Subqueries are processed from the top down and a new scope level is created for each subquery when it is sent 26 // to be analyzed. 27 func resolveSubqueries(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) { 28 span, ctx := ctx.Span("resolve_subqueries") 29 defer span.End() 30 31 return resolveSubqueriesHelper(ctx, a, n, scope, sel, false) 32 } 33 34 func addLeftTablesToScope(outerScope *plan.Scope, leftNode sql.Node) *plan.Scope { 35 resTbls := getTablesByName(leftNode) 36 subScope := outerScope 37 for _, tbl := range resTbls { 38 subScope = subScope.NewScopeInJoin(tbl) 39 } 40 subScope.SetJoin(true) 41 return subScope 42 } 43 44 // finalizeSubqueryLateral ensures that all SubqueryAliases with IsLateral set to true have their children also set to true. 45 func finalizeSubqueryLateral(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) { 46 return transform.NodeWithOpaque(n, func(n sql.Node) (sql.Node, transform.TreeIdentity, error) { 47 if parentSQA, ok := n.(*plan.SubqueryAlias); ok && parentSQA.IsLateral { 48 newSqaChild, sqaSame, sqaErr := transform.NodeWithOpaque(parentSQA.Child, func(n sql.Node) (sql.Node, transform.TreeIdentity, error) { 49 if sqa, ok := n.(*plan.SubqueryAlias); ok { 50 sqa.IsLateral = true 51 return sqa, transform.NewTree, nil 52 } 53 return n, transform.SameTree, nil 54 }) 55 if sqaErr != nil { 56 return n, transform.SameTree, sqaErr 57 } 58 if sqaSame { 59 return n, transform.SameTree, nil 60 } 61 newSqa, err := parentSQA.WithChildren(newSqaChild) 62 if err != nil { 63 return n, transform.SameTree, err 64 } 65 return newSqa, transform.NewTree, nil 66 } 67 return n, transform.SameTree, nil 68 }) 69 } 70 71 // finalizeSubqueries runs the final analysis pass on subquery expressions and subquery aliases in the node tree to ensure 72 // they are fully resolved and that the plan is ready to be executed. The logic is similar to when subqueries are initially 73 // resolved with resolveSubqueries, but with a few important differences: 74 // - finalizeSubqueries processes each subquery once, finalizing parent before child scopes, and should only be included 75 // when analyzing a root node at the top of the plan. 76 // - resolveSubqueries skips pruneColumns and optimizeJoins for subquery expressions and only runs the OnceBeforeDefault 77 // rule set on subquery aliases. 78 // - finalizeSubqueries runs a full analysis pass on subquery expressions and runs all rule batches except for OnceBeforeDefault. 79 func finalizeSubqueries(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) { 80 span, ctx := ctx.Span("finalize_subqueries") 81 defer span.End() 82 83 node, same1, err := finalizeSubqueriesHelper(ctx, a, n, scope, sel) 84 if err != nil { 85 return nil, transform.SameTree, err 86 } 87 88 newNode, same2, err := finalizeSubqueryLateral(ctx, a, node, scope, sel) 89 if err != nil { 90 return nil, transform.SameTree, err 91 } 92 93 return newNode, same1 && same2, nil 94 } 95 96 // finalizeSubqueriesHelper finalizes all subqueries and subquery expressions, 97 // fixing parent scopes before recursing into child nodes. 98 func finalizeSubqueriesHelper(ctx *sql.Context, a *Analyzer, node sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) { 99 var joinParent *plan.JoinNode 100 var selFunc transform.SelectorFunc = func(c transform.Context) bool { 101 if jp, ok := c.Node.(*plan.JoinNode); ok { 102 joinParent = jp 103 } 104 return true 105 } 106 107 var conFunc transform.CtxFunc = func(c transform.Context) (sql.Node, transform.TreeIdentity, error) { 108 n := c.Node 109 if sqa, ok := n.(*plan.SubqueryAlias); ok { 110 var newSqa sql.Node 111 var same2 transform.TreeIdentity 112 var err error 113 // NOTE: this only really fixes one level of subquery with two joins. 114 // This patch will likely not fix cases with more deeply nested joins and subqueries. 115 // A real fix would be to re-examine indexes after everything. 116 if sqa.OuterScopeVisibility && joinParent != nil { 117 if stripChild, ok := joinParent.Right().(*plan.StripRowNode); ok && stripChild.Child == sqa { 118 subScope := scope.NewScopeInJoin(joinParent.Children()[0]) 119 subScope.SetLateralJoin(joinParent.Op.IsLateral()) 120 newSqa, same2, err = analyzeSubqueryAlias(ctx, a, sqa, subScope, sel, true) 121 } else { 122 // IsLateral means that the subquery should have visibility into the left scope. 123 if sqa.IsLateral { 124 subScope := addLeftTablesToScope(scope, joinParent.Left()) 125 subScope.SetLateralJoin(true) 126 newSqa, same2, err = analyzeSubqueryAlias(ctx, a, sqa, subScope, sel, true) 127 } else { 128 newSqa, same2, err = analyzeSubqueryAlias(ctx, a, sqa, scope, sel, true) 129 } 130 } 131 } else { 132 // IsLateral means that the subquery should have visibility into the left scope. 133 if joinParent != nil && sqa.IsLateral { 134 subScope := addLeftTablesToScope(scope, joinParent.Left()) 135 subScope.SetLateralJoin(true) 136 newSqa, same2, err = analyzeSubqueryAlias(ctx, a, sqa, subScope, sel, true) 137 } else { 138 newSqa, same2, err = analyzeSubqueryAlias(ctx, a, sqa, scope, sel, true) 139 } 140 } 141 142 if err != nil { 143 return n, transform.SameTree, err 144 } 145 146 newNode, same1, err := finalizeSubqueriesHelper(ctx, a, newSqa.(*plan.SubqueryAlias).Child, scope.NewScopeFromSubqueryAlias(sqa), sel) 147 if err != nil { 148 return n, transform.SameTree, err 149 } 150 151 if same1 && same2 { 152 return n, transform.SameTree, nil 153 } else { 154 newNode, err = newSqa.WithChildren(newNode) 155 return newNode, transform.NewTree, err 156 } 157 } 158 return transform.OneNodeExprsWithNode(n, func(node sql.Node, e sql.Expression) (sql.Expression, transform.TreeIdentity, error) { 159 if sq, ok := e.(*plan.Subquery); ok { 160 newSq, same2, err := analyzeSubqueryExpression(ctx, a, node, sq, scope, sel, true) 161 if err != nil { 162 if analyzererrors.ErrValidationResolved.Is(err) { 163 // if a parent is unresolved, we want to dig deeper to find the unresolved 164 // child dependency 165 _, _, err := finalizeSubqueriesHelper(ctx, a, sq.Query, scope.NewScopeFromSubqueryExpression(node, sq.Correlated()), sel) 166 if err != nil { 167 return e, transform.SameTree, err 168 } 169 } 170 return e, transform.SameTree, err 171 } 172 newExpr, same1, err := finalizeSubqueriesHelper(ctx, a, newSq.(*plan.Subquery).Query, scope.NewScopeFromSubqueryExpression(node, newSq.(*plan.Subquery).Correlated()), sel) 173 if err != nil { 174 return e, transform.SameTree, err 175 } 176 177 if same1 && same2 { 178 return e, transform.SameTree, nil 179 } else { 180 return newSq.(*plan.Subquery).WithQuery(newExpr), transform.NewTree, nil 181 } 182 } else { 183 return e, transform.SameTree, nil 184 } 185 }) 186 } 187 188 return transform.NodeWithCtx(node, selFunc, conFunc) 189 } 190 191 func resolveSubqueriesHelper(ctx *sql.Context, a *Analyzer, node sql.Node, scope *plan.Scope, sel RuleSelector, finalize bool) (sql.Node, transform.TreeIdentity, error) { 192 return transform.NodeWithCtx(node, nil, func(c transform.Context) (sql.Node, transform.TreeIdentity, error) { 193 n := c.Node 194 if sqa, ok := n.(*plan.SubqueryAlias); ok { 195 // IsLateral means that the subquery should have visibility into the left scope. 196 if parent, ok := c.Parent.(*plan.JoinNode); ok && sqa.IsLateral { 197 subScope := addLeftTablesToScope(scope, parent.Left()) 198 return analyzeSubqueryAlias(ctx, a, sqa, subScope, sel, finalize) 199 } 200 return analyzeSubqueryAlias(ctx, a, sqa, scope, sel, finalize) 201 } else { 202 return transform.OneNodeExprsWithNode(n, func(node sql.Node, e sql.Expression) (sql.Expression, transform.TreeIdentity, error) { 203 if sq, ok := e.(*plan.Subquery); ok { 204 return analyzeSubqueryExpression(ctx, a, n, sq, scope, sel, finalize) 205 } else { 206 return e, transform.SameTree, nil 207 } 208 }) 209 } 210 }) 211 } 212 213 // flattenTableAliases transforms TableAlias nodes that contain a SubqueryAlias or TableAlias node as the immediate 214 // child so that the top level TableAlias is removed and the nested SubqueryAlias or nested TableAlias is the new top 215 // level node, making sure to capture the alias name and transfer it to the new node. The parser doesn't directly 216 // create this nested structure; it occurs as the execution plan is built and altered during analysis, for 217 // example with CTEs that get plugged into the execution plan as the analyzer processes it. 218 func flattenTableAliases(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) { 219 span, ctx := ctx.Span("flatten_table_aliases") 220 defer span.End() 221 return transform.Node(n, func(n sql.Node) (sql.Node, transform.TreeIdentity, error) { 222 switch n := n.(type) { 223 case *plan.TableAlias: 224 if sa, isSA := n.Children()[0].(*plan.SubqueryAlias); isSA { 225 return sa.WithName(n.Name()), transform.NewTree, nil 226 } 227 if ta, isTA := n.Children()[0].(*plan.TableAlias); isTA { 228 return ta.WithName(n.Name()), transform.NewTree, nil 229 } 230 return n, transform.SameTree, nil 231 default: 232 return n, transform.SameTree, nil 233 } 234 }) 235 } 236 237 // analyzeSubqueryExpression runs analysis on the specified subquery expression, |sq|. The specified node |n| is the node 238 // that contains the subquery expression and |finalize| indicates if this is the final run of the analyzer on the query 239 // before execution, which means all analyzer rules are included, otherwise SubqueryExprResolveSelector is used to prevent 240 // running pruneColumns and optimizeJoins for all non-final analysis passes. 241 func analyzeSubqueryExpression(ctx *sql.Context, a *Analyzer, n sql.Node, sq *plan.Subquery, scope *plan.Scope, sel RuleSelector, finalize bool) (sql.Expression, transform.TreeIdentity, error) { 242 // We always analyze subquery expressions even if they are resolved, since other transformations to the surrounding 243 // query might cause them to need to shift their field indexes. 244 subqueryCtx, cancelFunc := ctx.NewSubContext() 245 defer cancelFunc() 246 247 var analyzed sql.Node 248 var err error 249 if finalize { 250 analyzed, _, err = a.analyzeStartingAtBatch(subqueryCtx, sq.Query, 251 scope.NewScopeFromSubqueryExpression(n, sq.Correlated()), "default-rules", NewFinalizeSubquerySel(sel)) 252 } else { 253 analyzed, _, err = a.analyzeThroughBatch(subqueryCtx, sq.Query, 254 scope.NewScopeFromSubqueryExpression(n, sq.Correlated()), "default-rules", NewResolveSubqueryExprSelector(sel)) 255 } 256 if err != nil { 257 // We ignore certain errors during non-final passes of the analyzer, deferring them to later analysis passes. 258 // Specifically, if the subquery isn't resolved or a column can't be found in the scope node, wait until a later pass. 259 if !finalize && (analyzererrors.ErrValidationResolved.Is(err) || sql.ErrTableColumnNotFound.Is(err) || sql.ErrColumnNotFound.Is(err)) { 260 // keep the work we have and defer remainder of analysis of this subquery until a later pass 261 return sq.WithQuery(analyzed), transform.NewTree, nil 262 } 263 return nil, transform.SameTree, err 264 } 265 266 //todo(max): Infinite cycles with subqueries, unions, ctes, catalog. 267 // we squashed most negative errors, where a rule fails to report a plan change 268 // to the expense of positive errors, where a rule reports a change when the plan 269 // is the same before/after. 270 // .Resolved() might be useful for fixing these bugs. 271 return sq.WithQuery(StripPassthroughNodes(analyzed)).WithExecBuilder(a.ExecBuilder), transform.NewTree, nil 272 } 273 274 // analyzeSubqueryAlias runs analysis on the specified subquery alias, |sqa|. The |finalize| parameter indicates if this is 275 // the final run of the analyzer on the query before execution, which means all rules, starting from the default-rules 276 // batch are processed, otherwise only the once-before-default batch of rules is processed for all other non-final passes. 277 func analyzeSubqueryAlias(ctx *sql.Context, a *Analyzer, sqa *plan.SubqueryAlias, scope *plan.Scope, sel RuleSelector, finalize bool) (sql.Node, transform.TreeIdentity, error) { 278 subScope := scope.NewScopeFromSubqueryAlias(sqa) 279 280 var child sql.Node 281 var same transform.TreeIdentity 282 var err error 283 if finalize { 284 child, same, err = a.analyzeStartingAtBatch(ctx, sqa.Child, subScope, "default-rules", NewFinalizeSubquerySel(sel)) 285 } else { 286 child, same, err = a.analyzeThroughBatch(ctx, sqa.Child, subScope, "default-rules", sel) 287 } 288 if err != nil { 289 return nil, same, err 290 } 291 292 if len(sqa.ColumnNames) > 0 { 293 schemaLen := schemaLength(child) 294 if schemaLen != len(sqa.ColumnNames) { 295 return nil, transform.SameTree, sql.ErrColumnCountMismatch.New() 296 } 297 } 298 if same { 299 return sqa, transform.SameTree, nil 300 } 301 newn, err := sqa.WithChildren(StripPassthroughNodes(child)) 302 return newn, transform.NewTree, err 303 } 304 305 // StripPassthroughNodes strips all top-level passthrough nodes meant to apply only to top-level queries (query 306 // tracking, transaction logic, etc) from the node tree given and return the first non-passthrough child element. This 307 // is useful for when we invoke the analyzer recursively when e.g. analyzing subqueries or triggers 308 // TODO: instead of stripping this node off after analysis, it would be better to just not add it in the first place. 309 func StripPassthroughNodes(n sql.Node) sql.Node { 310 nodeIsPassthrough := true 311 for nodeIsPassthrough { 312 switch tn := n.(type) { 313 case *plan.QueryProcess: 314 n = tn.Child() 315 case *plan.TransactionCommittingNode: 316 n = tn.Child() 317 default: 318 nodeIsPassthrough = false 319 } 320 } 321 322 return n 323 } 324 325 // cacheSubqueryAlisesInJoins will look for joins against subquery aliases that 326 // will repeatedly execute the subquery, and will insert a *plan.CachedResults 327 // node on top of those nodes. The left-most child of a join root is an exception 328 // that cannot be cached. 329 func cacheSubqueryAliasesInJoins(ctx *sql.Context, a *Analyzer, n sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) { 330 var recurse func(n sql.Node, parentCached, inJoin, rootJoinT1 bool) (sql.Node, transform.TreeIdentity, error) 331 recurse = func(n sql.Node, parentCached, inJoin, foundFirstRel bool) (sql.Node, transform.TreeIdentity, error) { 332 _, isOp := n.(sql.OpaqueNode) 333 var isCacheableSq bool 334 var isCachedRs bool 335 var isMax1Row bool 336 switch n := n.(type) { 337 case *plan.JoinNode: 338 if !inJoin { 339 inJoin = true 340 foundFirstRel = false 341 } 342 case *plan.SubqueryAlias: 343 isCacheableSq = n.CanCacheResults() 344 case *plan.CachedResults: 345 isCachedRs = true 346 case *plan.Max1Row: 347 isMax1Row = true 348 default: 349 350 } 351 352 doCache := isCacheableSq && inJoin && !parentCached 353 childInJoin := inJoin && !isOp 354 355 if inJoin && !foundFirstRel { 356 switch n.(type) { 357 case sql.Nameable: 358 doCache = false 359 foundFirstRel = true 360 default: 361 } 362 } 363 364 children := n.Children() 365 var newChildren []sql.Node 366 for i, c := range children { 367 child, same, _ := recurse(c, doCache || isCachedRs || isMax1Row, childInJoin, foundFirstRel) 368 if !same { 369 if newChildren == nil { 370 newChildren = make([]sql.Node, len(children)) 371 copy(newChildren, children) 372 } 373 newChildren[i] = child 374 } 375 } 376 377 if len(newChildren) == 0 && !doCache { 378 return n, transform.SameTree, nil 379 } 380 381 ret := n 382 if len(newChildren) > 0 { 383 ret, _ = ret.WithChildren(newChildren...) 384 } 385 if doCache { 386 ret = plan.NewCachedResults(n) 387 } 388 return ret, transform.NewTree, nil 389 } 390 return recurse(n, false, false, false) 391 }