vitess.io/vitess@v0.16.2/go/vt/vtgate/planbuilder/operators/subquery_planning.go (about) 1 /* 2 Copyright 2022 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package operators 18 19 import ( 20 "vitess.io/vitess/go/vt/sqlparser" 21 "vitess.io/vitess/go/vt/vterrors" 22 "vitess.io/vitess/go/vt/vtgate/engine" 23 "vitess.io/vitess/go/vt/vtgate/evalengine" 24 "vitess.io/vitess/go/vt/vtgate/planbuilder/operators/ops" 25 "vitess.io/vitess/go/vt/vtgate/planbuilder/operators/rewrite" 26 "vitess.io/vitess/go/vt/vtgate/planbuilder/plancontext" 27 ) 28 29 func optimizeSubQuery(ctx *plancontext.PlanningContext, op *SubQuery) (ops.Operator, rewrite.TreeIdentity, error) { 30 var unmerged []*SubQueryOp 31 32 // first loop over the subqueries and try to merge them into the outer plan 33 outer := op.Outer 34 for _, inner := range op.Inner { 35 innerOp := inner.Inner 36 37 var preds []sqlparser.Expr 38 preds, innerOp = unresolvedAndSource(ctx, innerOp) 39 merger := func(a, b *Route) (*Route, error) { 40 return mergeSubQueryOp(ctx, a, b, inner) 41 } 42 43 newInner := &SubQueryInner{ 44 Inner: inner.Inner, 45 ExtractedSubquery: inner.ExtractedSubquery, 46 } 47 merged, err := tryMergeSubQueryOp(ctx, outer, innerOp, newInner, preds, merger) 48 if err != nil { 49 return nil, rewrite.SameTree, err 50 } 51 52 if merged != nil { 53 outer = merged 54 continue 55 } 56 57 if len(preds) == 0 { 58 // uncorrelated queries 59 sq := &SubQueryOp{ 60 Extracted: inner.ExtractedSubquery, 61 Inner: innerOp, 62 } 63 unmerged = append(unmerged, sq) 64 continue 65 } 66 67 if inner.ExtractedSubquery.OpCode == int(engine.PulloutExists) { 68 correlatedTree, err := createCorrelatedSubqueryOp(ctx, innerOp, outer, preds, inner.ExtractedSubquery) 69 if err != nil { 70 return nil, rewrite.SameTree, err 71 } 72 outer = correlatedTree 73 continue 74 } 75 76 return nil, rewrite.SameTree, vterrors.VT12001("cross-shard correlated subquery") 77 } 78 79 for _, tree := range unmerged { 80 tree.Outer = outer 81 outer = tree 82 } 83 return outer, rewrite.NewTree, nil 84 } 85 86 func unresolvedAndSource(ctx *plancontext.PlanningContext, op ops.Operator) ([]sqlparser.Expr, ops.Operator) { 87 preds := UnresolvedPredicates(op, ctx.SemTable) 88 if filter, ok := op.(*Filter); ok { 89 if ctx.SemTable.ASTEquals().Exprs(preds, filter.Predicates) { 90 // if we are seeing a single filter with only these predicates, 91 // we can throw away the filter and just use the source 92 return preds, filter.Source 93 } 94 } 95 96 return preds, op 97 } 98 99 func mergeSubQueryOp(ctx *plancontext.PlanningContext, outer *Route, inner *Route, subq *SubQueryInner) (*Route, error) { 100 subq.ExtractedSubquery.NeedsRewrite = true 101 outer.SysTableTableSchema = append(outer.SysTableTableSchema, inner.SysTableTableSchema...) 102 for k, v := range inner.SysTableTableName { 103 if outer.SysTableTableName == nil { 104 outer.SysTableTableName = map[string]evalengine.Expr{} 105 } 106 outer.SysTableTableName[k] = v 107 } 108 109 // When merging an inner query with its outer query, we can remove the 110 // inner query from the list of predicates that can influence routing of 111 // the outer query. 112 // 113 // Note that not all inner queries necessarily are part of the routing 114 // predicates list, so this might be a no-op. 115 subQueryWasPredicate := false 116 for i, predicate := range outer.SeenPredicates { 117 if ctx.SemTable.EqualsExpr(predicate, subq.ExtractedSubquery) { 118 outer.SeenPredicates = append(outer.SeenPredicates[:i], outer.SeenPredicates[i+1:]...) 119 120 subQueryWasPredicate = true 121 122 // The `ExtractedSubquery` of an inner query is unique (due to the uniqueness of bind variable names) 123 // so we can stop after the first match. 124 break 125 } 126 } 127 128 err := outer.resetRoutingSelections(ctx) 129 if err != nil { 130 return nil, err 131 } 132 133 if subQueryWasPredicate { 134 // Copy Vindex predicates from the inner route to the upper route. 135 // If we can route based on some of these predicates, the routing can improve 136 outer.VindexPreds = append(outer.VindexPreds, inner.VindexPreds...) 137 138 if inner.RouteOpCode == engine.None { 139 outer.setSelectNoneOpcode() 140 } 141 } 142 143 outer.MergedWith = append(outer.MergedWith, inner) 144 145 return outer, nil 146 } 147 148 func isMergeable(ctx *plancontext.PlanningContext, query sqlparser.SelectStatement, op ops.Operator) bool { 149 validVindex := func(expr sqlparser.Expr) bool { 150 sc := findColumnVindex(ctx, op, expr) 151 return sc != nil && sc.IsUnique() 152 } 153 154 if query.GetLimit() != nil { 155 return false 156 } 157 158 sel, ok := query.(*sqlparser.Select) 159 if !ok { 160 return false 161 } 162 163 if len(sel.GroupBy) > 0 { 164 // iff we are grouping, we need to check that we can perform the grouping inside a single shard, and we check that 165 // by checking that one of the grouping expressions used is a unique single column vindex. 166 // TODO: we could also support the case where all the columns of a multi-column vindex are used in the grouping 167 for _, gb := range sel.GroupBy { 168 if validVindex(gb) { 169 return true 170 } 171 } 172 return false 173 } 174 175 // if we have grouping, we have already checked that it's safe, and don't need to check for aggregations 176 // but if we don't have groupings, we need to check if there are aggregations that will mess with us 177 if sqlparser.ContainsAggregation(sel.SelectExprs) { 178 return false 179 } 180 181 if sqlparser.ContainsAggregation(sel.Having) { 182 return false 183 } 184 185 return true 186 } 187 188 func tryMergeSubQueryOp( 189 ctx *plancontext.PlanningContext, 190 outer, subq ops.Operator, 191 subQueryInner *SubQueryInner, 192 joinPredicates []sqlparser.Expr, 193 merger mergeFunc, 194 ) (ops.Operator, error) { 195 switch outerOp := outer.(type) { 196 case *Filter: 197 op, err := tryMergeSubQueryOp(ctx, outerOp.Source, subq, subQueryInner, joinPredicates, merger) 198 if err != nil || op == nil { 199 return nil, err 200 } 201 outerOp.Source = op 202 return outerOp, nil 203 case *Route: 204 return tryMergeSubqueryWithRoute(ctx, subq, outerOp, joinPredicates, merger, subQueryInner) 205 case *ApplyJoin: 206 return tryMergeSubqueryWithJoin(ctx, subq, outerOp, joinPredicates, merger, subQueryInner) 207 default: 208 return nil, nil 209 } 210 } 211 212 func tryMergeSubqueryWithRoute( 213 ctx *plancontext.PlanningContext, 214 subq ops.Operator, 215 outerOp *Route, 216 joinPredicates []sqlparser.Expr, 217 merger mergeFunc, 218 subQueryInner *SubQueryInner, 219 ) (ops.Operator, error) { 220 subqueryRoute, isRoute := subq.(*Route) 221 if !isRoute { 222 return nil, nil 223 } 224 225 if outerOp.RouteOpCode == engine.Reference && !subqueryRoute.IsSingleShard() { 226 return nil, nil 227 } 228 229 merged, err := tryMerge(ctx, outerOp, subq, joinPredicates, merger) 230 if err != nil { 231 return nil, err 232 } 233 234 // If the subqueries could be merged here, we're done 235 if merged != nil { 236 return merged, err 237 } 238 239 if !isMergeable(ctx, subQueryInner.ExtractedSubquery.Subquery.Select, subq) { 240 return nil, nil 241 } 242 243 // Special case: Inner query won't return any results / is not routable. 244 if subqueryRoute.RouteOpCode == engine.None { 245 merged, err := merger(outerOp, subqueryRoute) 246 if err != nil { 247 return nil, err 248 } 249 return merged, err 250 } 251 252 // Inner subqueries can be merged with the outer subquery as long as 253 // the inner query is a single column selection, and that single column has a matching 254 // vindex on the outer query's operand. 255 if canMergeSubqueryOnColumnSelection(ctx, outerOp, subqueryRoute, subQueryInner.ExtractedSubquery) { 256 merged, err := merger(outerOp, subqueryRoute) 257 258 if err != nil { 259 return nil, err 260 } 261 262 if merged != nil { 263 // since we inlined the subquery into the outer query, new vindex options might have been enabled, 264 // so we go over our current options to check if anything better has come up. 265 merged.PickBestAvailableVindex() 266 return merged, err 267 } 268 } 269 return nil, nil 270 } 271 272 func tryMergeSubqueryWithJoin( 273 ctx *plancontext.PlanningContext, 274 subq ops.Operator, 275 outerOp *ApplyJoin, 276 joinPredicates []sqlparser.Expr, 277 merger mergeFunc, 278 subQueryInner *SubQueryInner, 279 ) (ops.PhysicalOperator, error) { 280 // Trying to merge the subquery with the left-hand or right-hand side of the join 281 282 if outerOp.LeftJoin { 283 return nil, nil 284 } 285 newMergefunc := func(a, b *Route) (*Route, error) { 286 rt, err := merger(a, b) 287 if err != nil { 288 return nil, err 289 } 290 outerOp.RHS, err = rewriteColumnsInSubqueryOpForJoin(ctx, outerOp.RHS, outerOp, subQueryInner) 291 return rt, err 292 } 293 merged, err := tryMergeSubQueryOp(ctx, outerOp.LHS, subq, subQueryInner, joinPredicates, newMergefunc) 294 if err != nil { 295 return nil, err 296 } 297 if merged != nil { 298 outerOp.LHS = merged 299 return outerOp, nil 300 } 301 302 newMergefunc = func(a, b *Route) (*Route, error) { 303 rt, err := merger(a, b) 304 if err != nil { 305 return nil, err 306 } 307 outerOp.LHS, err = rewriteColumnsInSubqueryOpForJoin(ctx, outerOp.LHS, outerOp, subQueryInner) 308 return rt, err 309 } 310 merged, err = tryMergeSubQueryOp(ctx, outerOp.RHS, subq, subQueryInner, joinPredicates, newMergefunc) 311 if err != nil { 312 return nil, err 313 } 314 if merged != nil { 315 outerOp.RHS = merged 316 return outerOp, nil 317 } 318 return nil, nil 319 } 320 321 // rewriteColumnsInSubqueryOpForJoin rewrites the columns that appear from the other side 322 // of the join. For example, let's say we merged a subquery on the right side of a join tree 323 // If it was using any columns from the left side then they need to be replaced by bind variables supplied 324 // from that side. 325 // outerTree is the joinTree within whose children the subquery lives in 326 // the child of joinTree which does not contain the subquery is the otherTree 327 func rewriteColumnsInSubqueryOpForJoin( 328 ctx *plancontext.PlanningContext, 329 innerOp ops.Operator, 330 outerTree *ApplyJoin, 331 subQueryInner *SubQueryInner, 332 ) (ops.Operator, error) { 333 resultInnerOp := innerOp 334 var rewriteError error 335 // go over the entire expression in the subquery 336 sqlparser.SafeRewrite(subQueryInner.ExtractedSubquery.Original, nil, func(cursor *sqlparser.Cursor) bool { 337 node, ok := cursor.Node().(*sqlparser.ColName) 338 if !ok { 339 return true 340 } 341 342 // check whether the column name belongs to the other side of the join tree 343 if !ctx.SemTable.RecursiveDeps(node).IsSolvedBy(TableID(resultInnerOp)) { 344 return true 345 } 346 347 // get the bindVariable for that column name and replace it in the subquery 348 bindVar := ctx.ReservedVars.ReserveColName(node) 349 cursor.Replace(sqlparser.NewArgument(bindVar)) 350 // check whether the bindVariable already exists in the joinVars of the other tree 351 _, alreadyExists := outerTree.Vars[bindVar] 352 if alreadyExists { 353 return true 354 } 355 // if it does not exist, then push this as an output column there and add it to the joinVars 356 offset, err := resultInnerOp.AddColumn(ctx, node) 357 if err != nil { 358 rewriteError = err 359 return false 360 } 361 outerTree.Vars[bindVar] = offset 362 return true 363 }) 364 365 // update the dependencies for the subquery by removing the dependencies from the innerOp 366 tableSet := ctx.SemTable.Direct[subQueryInner.ExtractedSubquery.Subquery] 367 ctx.SemTable.Direct[subQueryInner.ExtractedSubquery.Subquery] = tableSet.Remove(TableID(resultInnerOp)) 368 tableSet = ctx.SemTable.Recursive[subQueryInner.ExtractedSubquery.Subquery] 369 ctx.SemTable.Recursive[subQueryInner.ExtractedSubquery.Subquery] = tableSet.Remove(TableID(resultInnerOp)) 370 371 // return any error while rewriting 372 return resultInnerOp, rewriteError 373 } 374 375 func createCorrelatedSubqueryOp( 376 ctx *plancontext.PlanningContext, 377 innerOp, outerOp ops.Operator, 378 preds []sqlparser.Expr, 379 extractedSubquery *sqlparser.ExtractedSubquery, 380 ) (*CorrelatedSubQueryOp, error) { 381 newOuter, err := RemovePredicate(ctx, extractedSubquery, outerOp) 382 if err != nil { 383 return nil, vterrors.VT12001("EXISTS sub-queries are only supported with AND clause") 384 } 385 386 resultOuterOp := newOuter 387 vars := map[string]int{} 388 bindVars := map[*sqlparser.ColName]string{} 389 var lhsCols []*sqlparser.ColName 390 for _, pred := range preds { 391 var rewriteError error 392 sqlparser.SafeRewrite(pred, nil, func(cursor *sqlparser.Cursor) bool { 393 node, ok := cursor.Node().(*sqlparser.ColName) 394 if !ok { 395 return true 396 } 397 398 nodeDeps := ctx.SemTable.RecursiveDeps(node) 399 if !nodeDeps.IsSolvedBy(TableID(resultOuterOp)) { 400 return true 401 } 402 403 // check whether the bindVariable already exists in the map 404 // we do so by checking that the column names are the same and their recursive dependencies are the same 405 // so the column names `user.a` and `a` would be considered equal as long as both are bound to the same table 406 for colName, bindVar := range bindVars { 407 if ctx.SemTable.EqualsExpr(node, colName) { 408 cursor.Replace(sqlparser.NewArgument(bindVar)) 409 return true 410 } 411 } 412 413 // get the bindVariable for that column name and replace it in the predicate 414 bindVar := ctx.ReservedVars.ReserveColName(node) 415 cursor.Replace(sqlparser.NewArgument(bindVar)) 416 // store it in the map for future comparisons 417 bindVars[node] = bindVar 418 419 // if it does not exist, then push this as an output column in the outerOp and add it to the joinVars 420 offset, err := resultOuterOp.AddColumn(ctx, node) 421 if err != nil { 422 rewriteError = err 423 return true 424 } 425 lhsCols = append(lhsCols, node) 426 vars[bindVar] = offset 427 return true 428 }) 429 if rewriteError != nil { 430 return nil, rewriteError 431 } 432 var err error 433 innerOp, err = innerOp.AddPredicate(ctx, pred) 434 if err != nil { 435 return nil, err 436 } 437 } 438 return &CorrelatedSubQueryOp{ 439 Outer: resultOuterOp, 440 Inner: innerOp, 441 Extracted: extractedSubquery, 442 Vars: vars, 443 LHSColumns: lhsCols, 444 }, nil 445 } 446 447 // canMergeSubqueryOnColumnSelection will return true if the predicate used allows us to merge the two subqueries 448 // into a single Route. This can be done if we are comparing two columns that contain data that is guaranteed 449 // to exist on the same shard. 450 func canMergeSubqueryOnColumnSelection(ctx *plancontext.PlanningContext, a, b *Route, predicate *sqlparser.ExtractedSubquery) bool { 451 left := predicate.OtherSide 452 opCode := predicate.OpCode 453 if opCode != int(engine.PulloutValue) && opCode != int(engine.PulloutIn) { 454 return false 455 } 456 457 lVindex := findColumnVindex(ctx, a, left) 458 if lVindex == nil || !lVindex.IsUnique() { 459 return false 460 } 461 462 rightSelection := extractSingleColumnSubquerySelection(predicate.Subquery) 463 if rightSelection == nil { 464 return false 465 } 466 467 rVindex := findColumnVindex(ctx, b, rightSelection) 468 if rVindex == nil { 469 return false 470 } 471 return rVindex == lVindex 472 } 473 474 // Searches for the single column returned from a subquery, like the `col` in `(SELECT col FROM tbl)` 475 func extractSingleColumnSubquerySelection(subquery *sqlparser.Subquery) *sqlparser.ColName { 476 if subquery.Select.GetColumnCount() != 1 { 477 return nil 478 } 479 480 columnExpr := subquery.Select.GetColumns()[0] 481 482 aliasedExpr, ok := columnExpr.(*sqlparser.AliasedExpr) 483 if !ok { 484 return nil 485 } 486 487 return getColName(aliasedExpr.Expr) 488 }