vitess.io/vitess@v0.16.2/go/vt/vtgate/planbuilder/aggregation_pushing.go (about) 1 /* 2 Copyright 2022 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package planbuilder 18 19 import ( 20 "fmt" 21 "strconv" 22 23 "vitess.io/vitess/go/vt/sqlparser" 24 "vitess.io/vitess/go/vt/vterrors" 25 "vitess.io/vitess/go/vt/vtgate/engine" 26 "vitess.io/vitess/go/vt/vtgate/planbuilder/operators" 27 "vitess.io/vitess/go/vt/vtgate/planbuilder/plancontext" 28 ) 29 30 // pushAggregation pushes grouping and aggregation as far down in the tree as possible 31 // the output `outputAggrsOffset` needs a little explaining: this is the offsets for aggregation - remember 32 // that aggregation can be broken down into multiple expressions that are later combined. 33 // this is why this output is a slice of slices 34 func (hp *horizonPlanning) pushAggregation( 35 ctx *plancontext.PlanningContext, 36 plan logicalPlan, 37 grouping []operators.GroupBy, 38 aggregations []operators.Aggr, 39 ignoreOutputOrder bool, 40 ) (output logicalPlan, 41 groupingOffsets []offsets, 42 outputAggrsOffset [][]offsets, 43 pushed bool, 44 err error) { 45 pushed = true 46 switch plan := plan.(type) { 47 case *routeGen4: 48 output = plan 49 groupingOffsets, outputAggrsOffset, _, err = pushAggrOnRoute(ctx, plan, aggregations, grouping, ignoreOutputOrder) 50 return 51 52 case *joinGen4: 53 output = plan 54 groupingOffsets, outputAggrsOffset, err = hp.pushAggrOnJoin(ctx, plan, grouping, aggregations) 55 return 56 57 case *semiJoin: 58 output = plan 59 groupingOffsets, outputAggrsOffset, pushed, err = hp.pushAggrOnSemiJoin(ctx, plan, grouping, aggregations, ignoreOutputOrder) 60 return 61 62 case *simpleProjection: 63 // we just remove the simpleProjection. We are doing an OA on top anyway, so no need to clean up the output columns 64 return hp.pushAggregation(ctx, plan.input, grouping, aggregations, ignoreOutputOrder) 65 66 case *limit: 67 // if we are seeing a limit, it's because we are building on top of a derived table. 68 output = plan 69 pushed = false 70 71 for _, grp := range grouping { 72 offset, wOffset, err := wrapAndPushExpr(ctx, grp.Inner, grp.WeightStrExpr, plan.input) 73 if err != nil { 74 return nil, nil, nil, false, err 75 } 76 groupingOffsets = append(groupingOffsets, offsets{ 77 col: offset, 78 wsCol: wOffset, 79 }) 80 } 81 82 for _, aggr := range aggregations { 83 var offset int 84 aggrExpr, ok := aggr.Original.Expr.(sqlparser.AggrFunc) 85 if !ok { 86 return nil, nil, nil, false, vterrors.VT13001(fmt.Sprintf("unexpected expression: %v", aggr.Original)) 87 } 88 89 switch aggrExpr.(type) { 90 case *sqlparser.CountStar: 91 offset = 0 92 default: 93 if len(aggrExpr.GetArgs()) != 1 { 94 return nil, nil, nil, false, vterrors.VT13001(fmt.Sprintf("unexpected expression: %v", aggrExpr)) 95 } 96 offset, _, err = pushProjection(ctx, &sqlparser.AliasedExpr{Expr: aggrExpr.GetArg() /*As: expr.As*/}, plan.input, true, true, false) 97 } 98 99 if err != nil { 100 return nil, nil, nil, false, err 101 } 102 103 outputAggrsOffset = append(outputAggrsOffset, []offsets{newOffset(offset)}) 104 } 105 106 return 107 default: 108 err = vterrors.VT12001(fmt.Sprintf("using aggregation on top of a %T plan", plan)) 109 return 110 } 111 } 112 113 func pushAggrOnRoute( 114 ctx *plancontext.PlanningContext, 115 plan *routeGen4, 116 aggregations []operators.Aggr, 117 grouping []operators.GroupBy, 118 ignoreOutputOrder bool, 119 ) ( 120 groupingOffsets []offsets, 121 vtgateAggregation [][]offsets, 122 nonAggrOffsets []offsets, 123 err error, 124 ) { 125 columnOrderMatters := !ignoreOutputOrder 126 sel, isSel := plan.Select.(*sqlparser.Select) 127 if !isSel { 128 return nil, nil, nil, vterrors.VT12001("plan aggregation on union") 129 } 130 131 var groupingCols []int 132 var reorg = passThrough 133 134 if columnOrderMatters { 135 // During this first run, we push the projections for the normal columns (not the weigh_string ones, that is) 136 // in the order that the user asked for it 137 // sortOffsets also returns a reorgFunc, 138 // that can be used to rearrange the produced outputs to the original order 139 var it *sortedIterator 140 var err error 141 grouping, reorg, it = sortOffsets(grouping, aggregations) 142 vtgateAggregation, groupingCols, err = pushAggrsAndGroupingInOrder(ctx, plan, it, sel, vtgateAggregation, groupingCols) 143 if err != nil { 144 return nil, nil, nil, err 145 } 146 } else { 147 // if we haven't already pushed the aggregations, now is the time 148 for _, aggregation := range aggregations { 149 param := addAggregationToSelect(ctx, sel, aggregation) 150 vtgateAggregation = append(vtgateAggregation, []offsets{param}) 151 } 152 } 153 154 groupingOffsets = make([]offsets, 0, len(grouping)) 155 for idx, expr := range grouping { 156 sel.AddGroupBy(expr.Inner) 157 var pos offsets 158 if ignoreOutputOrder { 159 // we have not yet pushed anything, so we need to push the expression first 160 col, _, err := addExpressionToRoute(ctx, plan, &sqlparser.AliasedExpr{Expr: expr.Inner}, true) 161 if err != nil { 162 return nil, nil, nil, err 163 } 164 pos = newOffset(col) 165 } else { 166 pos = newOffset(groupingCols[idx]) 167 } 168 169 if expr.WeightStrExpr != nil && ctx.SemTable.NeedsWeightString(expr.Inner) { 170 wsExpr := weightStringFor(expr.WeightStrExpr) 171 wsCol, _, err := addExpressionToRoute(ctx, plan, &sqlparser.AliasedExpr{Expr: wsExpr}, true) 172 if err != nil { 173 return nil, nil, nil, err 174 } 175 pos.wsCol = wsCol 176 sel.AddGroupBy(wsExpr) 177 } 178 groupingOffsets = append(groupingOffsets, pos) 179 } 180 181 groupingOffsets, vtgateAggregation = reorg(groupingOffsets, vtgateAggregation) 182 return groupingOffsets, vtgateAggregation, nil, nil 183 } 184 185 func pushAggrsAndGroupingInOrder( 186 ctx *plancontext.PlanningContext, 187 plan *routeGen4, 188 it *sortedIterator, 189 sel *sqlparser.Select, 190 vtgateAggregation [][]offsets, 191 groupingCols []int, 192 ) ([][]offsets, []int, error) { 193 for it.next() { 194 groupBy, aggregation := it.current() 195 if aggregation != nil { 196 param := addAggregationToSelect(ctx, sel, *aggregation) 197 vtgateAggregation = append(vtgateAggregation, []offsets{param}) 198 continue 199 } 200 if groupBy != nil { 201 reuseCol := groupBy.InnerIndex == nil 202 col, _, err := addExpressionToRoute(ctx, plan, groupBy.AsAliasedExpr(), reuseCol) 203 groupingCols = append(groupingCols, col) 204 if err != nil { 205 return nil, nil, err 206 } 207 } 208 } 209 return vtgateAggregation, groupingCols, nil 210 } 211 212 // addAggregationToSelect adds the aggregation to the SELECT statement and returns the AggregateParams to be used outside 213 func addAggregationToSelect(ctx *plancontext.PlanningContext, sel *sqlparser.Select, aggregation operators.Aggr) offsets { 214 // TODO: removing duplicated aggregation expression should also be done at the join level 215 for i, expr := range sel.SelectExprs { 216 aliasedExpr, isAliasedExpr := expr.(*sqlparser.AliasedExpr) 217 if !isAliasedExpr { 218 continue 219 } 220 if ctx.SemTable.EqualsExpr(aliasedExpr.Expr, aggregation.Original.Expr) { 221 return newOffset(i) 222 } 223 } 224 225 sel.SelectExprs = append(sel.SelectExprs, aggregation.Original) 226 return newOffset(len(sel.SelectExprs) - 1) 227 } 228 229 func countStarAggr() *operators.Aggr { 230 f := &sqlparser.CountStar{} 231 232 return &operators.Aggr{ 233 Original: &sqlparser.AliasedExpr{Expr: f}, 234 OpCode: engine.AggregateCountStar, 235 Alias: "count(*)", 236 } 237 } 238 239 /* 240 We push down aggregations using the logic from the paper Orthogonal Optimization of Subqueries and Aggregation, by 241 Cesar A. Galindo-Legaria and Milind M. Joshi from Microsoft Corp. 242 243 It explains how one can split an aggregation into local aggregates that depend on only one side of the join. 244 The local aggregates can then be gathered together to produce the global 245 group by/aggregate query that the user asked for. 246 247 In Vitess, this is particularly useful because it allows us to push aggregation down to the routes, even when 248 we have to join the results at the vtgate level. Instead of doing all the grouping and aggregation at the 249 vtgate level, we can offload most of the work to MySQL, and at the vtgate just summarize the results. 250 */ 251 func (hp *horizonPlanning) pushAggrOnJoin( 252 ctx *plancontext.PlanningContext, 253 join *joinGen4, 254 grouping []operators.GroupBy, 255 aggregations []operators.Aggr, 256 ) ([]offsets, [][]offsets, error) { 257 // First we separate aggregations according to which side the dependencies are coming from 258 lhsAggrs, rhsAggrs, err := splitAggregationsToLeftAndRight(ctx, aggregations, join) 259 if err != nil { 260 return nil, nil, err 261 } 262 263 // We need to group by the columns used in the join condition. 264 // If we don't, the LHS will not be able to return the column, and it can't be used to send down to the RHS 265 lhsCols, err := hp.createGroupingsForColumns(join.LHSColumns) 266 if err != nil { 267 return nil, nil, err 268 } 269 270 // Here we split the grouping depending on if they should with the LHS or RHS of the query 271 // This is done by using the semantic table and checking dependencies 272 lhsGrouping, rhsGrouping, groupingOffsets, err := splitGroupingsToLeftAndRight(ctx, join, grouping, lhsCols) 273 if err != nil { 274 return nil, nil, err 275 } 276 277 // If the rhs has no grouping column then a count(*) will return 0 from the query and will get mapped to the record from left hand side. 278 // This is an incorrect behaviour as the join condition has not matched, so we add a literal 1 to the select query and also group by on it. 279 // So that only if join condition matches the records will be mapped and returned. 280 if len(rhsGrouping) == 0 && len(rhsAggrs) != 0 { 281 l := sqlparser.NewIntLiteral("1") 282 aExpr := &sqlparser.AliasedExpr{ 283 Expr: l, 284 } 285 offset, _, err := pushProjection(ctx, aExpr, join.Right, true, true, false) 286 if err != nil { 287 return nil, nil, err 288 } 289 l = sqlparser.NewIntLiteral(strconv.Itoa(offset + 1)) 290 rhsGrouping = append(rhsGrouping, operators.GroupBy{Inner: l}) 291 } 292 293 // Next we push the aggregations to both sides 294 newLHS, lhsOffsets, lhsAggrOffsets, _, err := hp.filteredPushAggregation(ctx, join.Left, lhsGrouping, lhsAggrs, true) 295 if err != nil { 296 return nil, nil, err 297 } 298 299 newRHS, rhsOffsets, rhsAggrOffsets, _, err := hp.filteredPushAggregation(ctx, join.Right, rhsGrouping, rhsAggrs, true) 300 if err != nil { 301 return nil, nil, err 302 } 303 join.Left, join.Right = newLHS, newRHS 304 305 // Next, we have to pass through the grouping values through the join and the projection we add on top 306 // We added new groupings to the LHS because of the join condition, so we don't want to pass through everything, 307 // just the groupings that are used by operators on top of this current one 308 wsOutputGrpOffset := len(groupingOffsets) + len(join.Cols) 309 outputGroupings := make([]offsets, 0, len(groupingOffsets)) 310 var wsOffsets []int 311 for _, groupBy := range groupingOffsets { 312 var offset offsets 313 var f func(i int) int 314 if groupBy < 0 { 315 offset = lhsOffsets[-groupBy-1] 316 f = func(i int) int { return -(i + 1) } 317 } else { 318 offset = rhsOffsets[groupBy-1] 319 f = func(i int) int { return i + 1 } 320 } 321 outputGrouping := newOffset(len(join.Cols)) 322 join.Cols = append(join.Cols, f(offset.col)) 323 if offset.wsCol > -1 { 324 // we add the weight_string calls at the end of the join columns 325 outputGrouping.wsCol = wsOutputGrpOffset + len(wsOffsets) 326 wsOffsets = append(wsOffsets, f(offset.wsCol)) 327 } 328 outputGroupings = append(outputGroupings, outputGrouping) 329 } 330 join.Cols = append(join.Cols, wsOffsets...) 331 332 outputAggrOffsets := make([][]offsets, 0, len(aggregations)) 333 for idx := range aggregations { 334 l, r := lhsAggrOffsets[idx], rhsAggrOffsets[idx] 335 var offSlice []offsets 336 for _, off := range l { 337 offSlice = append(offSlice, newOffset(len(join.Cols))) 338 join.Cols = append(join.Cols, -(off.col + 1)) 339 } 340 for _, off := range r { 341 offSlice = append(offSlice, newOffset(len(join.Cols))) 342 join.Cols = append(join.Cols, off.col+1) 343 } 344 outputAggrOffsets = append(outputAggrOffsets, offSlice) 345 } 346 return outputGroupings, outputAggrOffsets, err 347 } 348 349 /* 350 pushAggrOnSemiJoin works similarly to pushAggrOnJoin, but it's simpler, because we don't get any inputs from the RHS, 351 so there are no aggregations or groupings that have to be sent to the RHS 352 353 We do however need to add the columns used in the subquery coming from the LHS to the grouping. 354 That way we get the aggregation grouped by the column we need to use to decide if the row should 355 */ 356 func (hp *horizonPlanning) pushAggrOnSemiJoin( 357 ctx *plancontext.PlanningContext, 358 join *semiJoin, 359 grouping []operators.GroupBy, 360 aggregations []operators.Aggr, 361 ignoreOutputOrder bool, 362 ) ([]offsets, [][]offsets, bool, error) { 363 // We need to group by the columns used in the join condition. 364 // If we don't, the LHS will not be able to return the column, and it can't be used to send down to the RHS 365 lhsCols, err := hp.createGroupingsForColumns(join.LHSColumns) 366 if err != nil { 367 return nil, nil, false, err 368 } 369 370 totalGrouping := append(grouping, lhsCols...) 371 newLeft, groupingOffsets, aggrParams, pushed, err := hp.pushAggregation(ctx, join.lhs, totalGrouping, aggregations, ignoreOutputOrder) 372 if err != nil { 373 return nil, nil, false, err 374 } 375 join.lhs = newLeft 376 377 outputGroupings := make([]offsets, 0, len(grouping)) 378 for idx := range grouping { 379 outputGroupings = append(outputGroupings, groupingOffsets[idx]) 380 } 381 382 return outputGroupings, aggrParams, pushed, nil 383 } 384 385 // this method takes a slice of aggregations that can have missing spots in the form of `nil`, 386 // and pushes the non-empty values down. 387 // during aggregation planning, it's important to know which of 388 // the incoming aggregations correspond to what is sent to the LHS and RHS. 389 // Some aggregations only need to be sent to one of the sides of the join, and in that case, 390 // the other side will have a nil in this offset of the aggregations 391 func (hp *horizonPlanning) filteredPushAggregation( 392 ctx *plancontext.PlanningContext, 393 plan logicalPlan, 394 grouping []operators.GroupBy, 395 aggregations []*operators.Aggr, 396 ignoreOutputOrder bool, 397 ) (out logicalPlan, groupingOffsets []offsets, outputAggrs [][]offsets, pushed bool, err error) { 398 used := make([]bool, len(aggregations)) 399 var aggrs []operators.Aggr 400 401 for idx, aggr := range aggregations { 402 if aggr != nil { 403 used[idx] = true 404 aggrs = append(aggrs, *aggr) 405 } 406 } 407 newplan, groupingOffsets, pushedAggrs, pushed, err := hp.pushAggregation(ctx, plan, grouping, aggrs, ignoreOutputOrder) 408 if err != nil { 409 return nil, nil, nil, pushed, err 410 } 411 idx := 0 412 for _, b := range used { 413 if !b { 414 outputAggrs = append(outputAggrs, nil) 415 continue 416 } 417 outputAggrs = append(outputAggrs, pushedAggrs[idx]) 418 idx++ 419 } 420 return newplan, groupingOffsets, outputAggrs, pushed, nil 421 } 422 423 func isMinOrMax(in engine.AggregateOpcode) bool { 424 switch in { 425 case engine.AggregateMin, engine.AggregateMax: 426 return true 427 default: 428 return false 429 } 430 } 431 432 func isRandom(in engine.AggregateOpcode) bool { 433 return in == engine.AggregateRandom 434 } 435 436 func splitAggregationsToLeftAndRight( 437 ctx *plancontext.PlanningContext, 438 aggregations []operators.Aggr, 439 join *joinGen4, 440 ) ([]*operators.Aggr, []*operators.Aggr, error) { 441 var lhsAggrs, rhsAggrs []*operators.Aggr 442 for _, aggr := range aggregations { 443 newAggr := aggr 444 if _, ok := aggr.Original.Expr.(*sqlparser.CountStar); ok { 445 lhsAggrs = append(lhsAggrs, &newAggr) 446 rhsAggrs = append(rhsAggrs, &newAggr) 447 } else { 448 deps := ctx.SemTable.RecursiveDeps(aggr.Original.Expr) 449 var other *operators.Aggr 450 // if we are sending down min/max/random, we don't have to multiply the results with anything 451 if !isMinOrMax(aggr.OpCode) && !isRandom(aggr.OpCode) { 452 other = countStarAggr() 453 } 454 switch { 455 case deps.IsSolvedBy(join.Left.ContainsTables()): 456 lhsAggrs = append(lhsAggrs, &newAggr) 457 rhsAggrs = append(rhsAggrs, other) 458 case deps.IsSolvedBy(join.Right.ContainsTables()): 459 rhsAggrs = append(rhsAggrs, &newAggr) 460 lhsAggrs = append(lhsAggrs, other) 461 default: 462 return nil, nil, vterrors.VT12001("aggregation on columns from different sources") 463 } 464 } 465 } 466 return lhsAggrs, rhsAggrs, nil 467 } 468 469 func splitGroupingsToLeftAndRight( 470 ctx *plancontext.PlanningContext, 471 join *joinGen4, 472 grouping, lhsGrouping []operators.GroupBy, 473 ) ([]operators.GroupBy, []operators.GroupBy, []int, error) { 474 var rhsGrouping []operators.GroupBy 475 476 lhsTS := join.Left.ContainsTables() 477 rhsTS := join.Right.ContainsTables() 478 // here we store information about which side the grouping value is coming from. 479 // Negative values from the left operator and positive values are offsets into the RHS 480 var groupingOffsets []int 481 for _, groupBy := range grouping { 482 deps := ctx.SemTable.RecursiveDeps(groupBy.Inner) 483 switch { 484 case deps.IsSolvedBy(lhsTS): 485 groupingOffsets = append(groupingOffsets, -(len(lhsGrouping) + 1)) 486 lhsGrouping = append(lhsGrouping, groupBy) 487 case deps.IsSolvedBy(rhsTS): 488 groupingOffsets = append(groupingOffsets, len(rhsGrouping)+1) 489 rhsGrouping = append(rhsGrouping, groupBy) 490 default: 491 return nil, nil, nil, vterrors.VT12001("grouping on columns from different sources") 492 } 493 } 494 return lhsGrouping, rhsGrouping, groupingOffsets, nil 495 } 496 497 type ( 498 reorgFunc = func(groupByOffsets []offsets, aggrOffsets [][]offsets) ([]offsets, [][]offsets) 499 sortedIterator struct { 500 grouping []operators.GroupBy 501 aggregations []operators.Aggr 502 valueGB *operators.GroupBy 503 valueA *operators.Aggr 504 groupbyIdx int 505 aggrIdx int 506 } 507 ) 508 509 func (it *sortedIterator) current() (*operators.GroupBy, *operators.Aggr) { 510 return it.valueGB, it.valueA 511 } 512 513 func (it *sortedIterator) next() bool { 514 if it.aggrIdx < len(it.aggregations) && it.groupbyIdx < len(it.grouping) { 515 aggregation := it.aggregations[it.aggrIdx] 516 groupBy := it.grouping[it.groupbyIdx] 517 if operators.CompareRefInt(aggregation.Index, groupBy.InnerIndex) { 518 it.aggrIdx++ 519 it.valueA, it.valueGB = &aggregation, nil 520 return true 521 } 522 it.groupbyIdx++ 523 it.valueA, it.valueGB = nil, &groupBy 524 return true 525 } 526 527 if it.groupbyIdx < len(it.grouping) { 528 groupBy := it.grouping[it.groupbyIdx] 529 it.groupbyIdx++ 530 it.valueA, it.valueGB = nil, &groupBy 531 return true 532 } 533 if it.aggrIdx < len(it.aggregations) { 534 aggregation := it.aggregations[it.aggrIdx] 535 it.aggrIdx++ 536 it.valueA, it.valueGB = &aggregation, nil 537 return true 538 } 539 return false 540 } 541 542 func passThrough(groupByOffsets []offsets, aggrOffsets [][]offsets) ([]offsets, [][]offsets) { 543 return groupByOffsets, aggrOffsets 544 } 545 546 func sortOffsets(grouping []operators.GroupBy, aggregations []operators.Aggr) ([]operators.GroupBy, reorgFunc, *sortedIterator) { 547 originalGrouping := make([]operators.GroupBy, len(grouping)) 548 originalAggr := make([]operators.Aggr, len(aggregations)) 549 copy(originalAggr, aggregations) 550 copy(originalGrouping, grouping) 551 operators.SortAggregations(aggregations) 552 operators.SortGrouping(grouping) 553 554 reorg := func(groupByOffsets []offsets, aggrOffsets [][]offsets) ([]offsets, [][]offsets) { 555 orderedGroupingOffsets := make([]offsets, 0, len(originalGrouping)) 556 for _, og := range originalGrouping { 557 for i, g := range grouping { 558 if og.Inner == g.Inner { 559 orderedGroupingOffsets = append(orderedGroupingOffsets, groupByOffsets[i]) 560 break 561 } 562 } 563 } 564 565 orderedAggrs := make([][]offsets, 0, len(originalAggr)) 566 for _, og := range originalAggr { 567 for i, g := range aggregations { 568 if og.Original.Expr == g.Original.Expr { 569 orderedAggrs = append(orderedAggrs, aggrOffsets[i]) 570 break 571 } 572 } 573 } 574 575 return orderedGroupingOffsets, orderedAggrs 576 } 577 578 return grouping, reorg, &sortedIterator{ 579 grouping: grouping, 580 aggregations: aggregations, 581 } 582 }