github.com/dolthub/go-mysql-server@v0.18.0/sql/analyzer/costed_index_scan.go (about) 1 // Copyright 2023 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package analyzer 16 17 import ( 18 "fmt" 19 "sort" 20 "strings" 21 "time" 22 23 "github.com/dolthub/go-mysql-server/sql" 24 "github.com/dolthub/go-mysql-server/sql/expression" 25 "github.com/dolthub/go-mysql-server/sql/expression/function/spatial" 26 "github.com/dolthub/go-mysql-server/sql/fulltext" 27 "github.com/dolthub/go-mysql-server/sql/memo" 28 "github.com/dolthub/go-mysql-server/sql/plan" 29 "github.com/dolthub/go-mysql-server/sql/rowexec" 30 "github.com/dolthub/go-mysql-server/sql/stats" 31 "github.com/dolthub/go-mysql-server/sql/transform" 32 "github.com/dolthub/go-mysql-server/sql/types" 33 ) 34 35 // costedIndexScans matches a Filter-ResolvedTable pattern, and tries to 36 // use those filters to create a better IndexedTableAccess plan. We first 37 // convert the filter into a format that separates index-supported and 38 // unsupported filters, the unsupported remaining in the Filter parent. 39 // We then attempt to construct index scans using each table index and the 40 // set of index-supported filters. Each individual index greedily consumes 41 // filters. We use statistical cost and functional dependencies to compare 42 // indexScan options. Then we use metadata for the best indexScan to 43 // (1) convert the included filters to a sql.RangeCollection needed and 44 // then a sql.IndexLookup, and (2) collect the unused filters as a 45 // replacement parent Filter. 46 // 47 // It is worth noting that AND and OR filters behave differently. An OR 48 // filter can only be converted into an index scan if its entire child 49 // tree can be converted into a sql.Range. An AND filter can convert a 50 // fraction of its conjunctions into an indexScan, with the excluded 51 // remaining in the parent filter. Much of the format conversions focus 52 // on maintaining this invariant. 53 func costedIndexScans(ctx *sql.Context, a *Analyzer, n sql.Node) (sql.Node, transform.TreeIdentity, error) { 54 return transform.Node(n, func(n sql.Node) (sql.Node, transform.TreeIdentity, error) { 55 filter, ok := n.(*plan.Filter) 56 if !ok { 57 return n, transform.SameTree, nil 58 } 59 60 var rt sql.TableNode 61 var aliasName string 62 switch n := filter.Child.(type) { 63 case *plan.ResolvedTable: 64 rt = n 65 case *plan.TableAlias: 66 rt, _ = n.Child.(sql.TableNode) 67 aliasName = n.Name() 68 } 69 if rt == nil { 70 return n, transform.SameTree, nil 71 } 72 73 if is, ok := rt.UnderlyingTable().(sql.IndexSearchableTable); ok && is.SkipIndexCosting() { 74 lookup, err := is.LookupForExpressions(ctx, expression.SplitConjunction(filter.Expression)) 75 if err != nil { 76 return n, transform.SameTree, err 77 } 78 if lookup.IsEmpty() { 79 return n, transform.SameTree, nil 80 } 81 ret, err := plan.NewStaticIndexedAccessForTableNode(rt, lookup) 82 if err != nil { 83 return n, transform.SameTree, err 84 } 85 return plan.NewFilter(filter.Expression, ret), transform.NewTree, nil 86 } else if iat, ok := rt.UnderlyingTable().(sql.IndexAddressableTable); ok { 87 indexes, err := iat.GetIndexes(ctx) 88 if err != nil { 89 return n, transform.SameTree, err 90 } 91 ita, _, filters, err := getCostedIndexScan(ctx, a.Catalog, rt, indexes, expression.SplitConjunction(filter.Expression)) 92 if err != nil || ita == nil { 93 return n, transform.SameTree, err 94 } 95 var ret sql.Node = ita 96 if aliasName != "" { 97 ret = plan.NewTableAlias(aliasName, ret) 98 } 99 // excluded from tree + not included in index scan => filter above scan 100 if len(filters) > 0 { 101 ret = plan.NewFilter(expression.JoinAnd(filters...), ret) 102 } 103 return ret, transform.NewTree, nil 104 } 105 return n, transform.SameTree, nil 106 }) 107 } 108 109 func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.TableNode, indexes []sql.Index, filters []sql.Expression) (*plan.IndexedTableAccess, sql.Statistic, []sql.Expression, error) { 110 statistics, err := statsProv.GetTableStats(ctx, strings.ToLower(rt.Database().Name()), strings.ToLower(rt.Name())) 111 if err != nil { 112 return nil, nil, nil, err 113 } 114 115 qualToStat := make(map[sql.StatQualifier]sql.Statistic) 116 for _, stat := range statistics { 117 if prev, ok := qualToStat[stat.Qualifier()]; !ok || ok && len(stat.Columns()) > len(prev.Columns()) { 118 qualToStat[stat.Qualifier()] = stat 119 } 120 } 121 122 // flatten expression tree for costing 123 c := newIndexCoster(ctx, rt.Name()) 124 root, leftover, imprecise := c.flatten(expression.JoinAnd(filters...)) 125 if root == nil { 126 return nil, nil, nil, err 127 } 128 129 iat, ok := rt.UnderlyingTable().(sql.IndexAddressableTable) 130 if !ok { 131 return nil, nil, nil, err 132 } 133 134 // run each index through coster, save the cheapest 135 var dbName string 136 if dbTab, ok := rt.UnderlyingTable().(sql.Databaseable); ok { 137 dbName = strings.ToLower(dbTab.Database()) 138 } 139 tableName := strings.ToLower(rt.UnderlyingTable().Name()) 140 141 if len(qualToStat) > 0 { 142 // don't mix and match real and default stats 143 for _, idx := range indexes { 144 qual := sql.NewStatQualifier(dbName, tableName, strings.ToLower(idx.ID())) 145 _, ok := qualToStat[qual] 146 if !ok { 147 qualToStat = nil 148 break 149 } 150 } 151 } 152 153 for _, idx := range indexes { 154 qual := sql.NewStatQualifier(dbName, tableName, strings.ToLower(idx.ID())) 155 stat, ok := qualToStat[qual] 156 if !ok { 157 stat, err = uniformDistStatisticsForIndex(ctx, statsProv, iat, idx) 158 } 159 err := c.cost(root, stat, idx) 160 if err != nil { 161 return nil, nil, nil, err 162 } 163 } 164 165 if c.bestStat == nil || c.bestFilters.Empty() { 166 return nil, nil, nil, err 167 } 168 169 targetId := c.bestStat.Qualifier().Index() 170 var idx sql.Index 171 for _, i := range indexes { 172 if strings.EqualFold(i.ID(), targetId) { 173 idx = i 174 break 175 } 176 } 177 if idx == nil { 178 return nil, nil, nil, fmt.Errorf("tried building indexScan with unknown statistic index: %s", targetId) 179 } 180 181 // separate |include| and |leftover| filters 182 b := newIndexScanRangeBuilder(ctx, idx, c.bestFilters, imprecise, c.idToExpr) 183 if leftover != nil { 184 b.leftover = append(b.leftover, leftover) 185 } 186 ranges, err := b.buildRangeCollection(root) 187 if err != nil { 188 return nil, nil, nil, err 189 } 190 191 var emptyLookup bool 192 if len(ranges) == 0 { 193 emptyLookup = true 194 } else if len(ranges) == 1 { 195 emptyLookup, err = ranges[0].IsEmpty() 196 if err != nil { 197 return nil, nil, nil, err 198 } 199 allRange := true 200 for i, r := range ranges[0] { 201 _, uok := r.UpperBound.(sql.AboveAll) 202 _, lok := r.LowerBound.(sql.BelowNull) 203 allRange = allRange && uok && lok 204 if i == 0 && allRange { 205 // no prefix restriction 206 return nil, nil, nil, err 207 } 208 } 209 if allRange { 210 return nil, nil, nil, err 211 } 212 } 213 214 if !idx.CanSupport(ranges...) { 215 return nil, nil, nil, err 216 } 217 218 if idx.IsSpatial() && len(ranges) > 1 { 219 // spatials don't support disjunct ranges 220 return nil, nil, nil, err 221 } 222 223 // create ranges, lookup, ITA for best indexScan 224 // TODO: use FALSE filters to replace empty tables 225 lookup := sql.NewIndexLookup(idx, ranges, false, emptyLookup, idx.IsSpatial(), false) 226 227 var ret *plan.IndexedTableAccess 228 if idx.IsFullText() { 229 id, _ := c.bestFilters.Next(1) 230 ma := c.idToExpr[indexScanId(id)] 231 matchAgainst, ok := ma.(*expression.MatchAgainst) 232 if !ok { 233 return nil, nil, nil, fmt.Errorf("Full-Text index found in filter with unknown expression: %T", ma) 234 } 235 if matchAgainst.KeyCols.Type == fulltext.KeyType_None { 236 return nil, nil, nil, err 237 } 238 ret = plan.NewStaticIndexedAccessForFullTextTable(rt, lookup, &rowexec.FulltextFilterTable{ 239 MatchAgainst: matchAgainst, 240 Table: rt, 241 }) 242 } else { 243 ret, err = plan.NewStaticIndexedAccessForTableNode(rt, lookup) 244 if err != nil { 245 return nil, nil, nil, err 246 } 247 } 248 249 var retFilters []sql.Expression 250 if !iat.PreciseMatch() { 251 // cannot drop any filters 252 retFilters = filters 253 } else if len(b.leftover) > 0 { 254 // excluded from tree + not included in index scan => filter above scan 255 retFilters = b.leftover 256 } 257 258 return ret, c.bestStat, retFilters, nil 259 } 260 261 func addIndexScans(m *memo.Memo) error { 262 return memo.DfsRel(m.Root(), func(e memo.RelExpr) error { 263 filter, ok := e.(*memo.Filter) 264 if !ok { 265 return nil 266 } 267 268 var rt sql.TableNode 269 var aliasName string 270 switch n := filter.Child.First.(type) { 271 case *memo.TableScan: 272 rt = n.Table.(sql.TableNode) 273 case *memo.TableAlias: 274 rt, ok = n.Table.Child.(sql.TableNode) 275 if !ok { 276 return nil 277 } 278 aliasName = n.Name() 279 default: 280 return nil 281 } 282 283 indexes := filter.Child.First.(memo.SourceRel).Indexes() 284 285 if is, ok := rt.UnderlyingTable().(sql.IndexSearchableTable); ok && is.SkipIndexCosting() { 286 lookup, err := is.LookupForExpressions(m.Ctx, filter.Filters) 287 if err != nil { 288 m.HandleErr(err) 289 } 290 if lookup.IsEmpty() { 291 return nil 292 } 293 ret, err := plan.NewStaticIndexedAccessForTableNode(rt, lookup) 294 if err != nil { 295 m.HandleErr(err) 296 297 } 298 // TODO add ITA to filter group 299 // todo memoize ITA 300 // we explicitly put ITA as child of filter group for this shortcut 301 var idx *memo.Index 302 for _, i := range indexes { 303 if i.SqlIdx().ID() == lookup.Index.ID() { 304 idx = i 305 break 306 } 307 } 308 itaGroup := m.MemoizeIndexScan(nil, ret, aliasName, idx, nil) 309 m.MemoizeFilter(filter.Group(), itaGroup, filter.Filters) 310 } else { 311 sqlIndexes := make([]sql.Index, len(indexes)) 312 for i, idx := range indexes { 313 sqlIndexes[i] = idx.SqlIdx() 314 } 315 ita, stat, filters, err := getCostedIndexScan(m.Ctx, m.StatsProvider(), rt, sqlIndexes, filter.Filters) 316 if err != nil { 317 m.HandleErr(err) 318 } 319 if ita != nil { 320 var idx *memo.Index 321 for _, i := range indexes { 322 if ita.Index().ID() == i.SqlIdx().ID() { 323 idx = i 324 break 325 } 326 } 327 var itaGrp *memo.ExprGroup 328 if len(filters) > 0 { 329 // set the indexed path as best. correct for cases where 330 // indexScan is incompatible with best join operator 331 itaGrp = m.MemoizeIndexScan(nil, ita, aliasName, idx, stat) 332 itaGrp.Best = itaGrp.First 333 itaGrp.Done = true 334 itaGrp.HintOk = true 335 itaGrp.Best.SetDistinct(memo.NoDistinctOp) 336 fGrp := m.MemoizeFilter(filter.Group(), itaGrp, filters) 337 fGrp.Best = fGrp.First 338 fGrp.Done = true 339 fGrp.HintOk = true 340 fGrp.Best.SetDistinct(memo.NoDistinctOp) 341 } else { 342 itaGrp = m.MemoizeIndexScan(filter.Group(), ita, aliasName, idx, stat) 343 } 344 } 345 } 346 return nil 347 }) 348 } 349 350 func newIndexCoster(ctx *sql.Context, underlyingName string) *indexCoster { 351 return &indexCoster{ 352 ctx: ctx, 353 i: 1, 354 idToExpr: make(map[indexScanId]sql.Expression), 355 underlyingName: underlyingName, 356 } 357 } 358 359 type indexCoster struct { 360 ctx *sql.Context 361 i indexScanId 362 // idToExpr is a record of conj decomposition so we can remove duplicates later 363 idToExpr map[indexScanId]sql.Expression 364 // bestStat is the lowest cardinality indexScan option 365 bestStat sql.Statistic 366 // bestFilters is the set of conjunctions used to create bestStat 367 bestFilters sql.FastIntSet 368 // bestConstant are the constant best filters 369 bestConstant sql.FastIntSet 370 // prefix key of the best indexScan 371 bestPrefix int 372 underlyingName string 373 } 374 375 // cost tries to build the lowest cardinality index scan for an expression 376 // tree rooted at |f| on the index |idx| whose statistics are represented by |stat|. 377 func (c *indexCoster) cost(f indexFilter, stat sql.Statistic, idx sql.Index) error { 378 ordinals := ordinalsForStat(stat) 379 380 newStat := stat 381 var filters sql.FastIntSet 382 var prefix int 383 var err error 384 var ok bool 385 386 switch f := f.(type) { 387 case *iScanAnd: 388 newStat, filters, prefix, err = c.costIndexScanAnd(f, stat, ordinals, idx) 389 if err != nil { 390 return err 391 } 392 393 case *iScanOr: 394 newStat, ok, err = c.costIndexScanOr(f, stat, ordinals, idx) 395 if err != nil { 396 return err 397 } 398 if ok { 399 filters.Add(int(f.id)) 400 } 401 case *iScanLeaf: 402 newStat, ok, prefix, err = c.costIndexScanLeaf(f, stat, ordinals, idx) 403 if err != nil { 404 return err 405 } 406 if ok { 407 filters.Add(int(f.id)) 408 } 409 default: 410 panic("unreachable") 411 } 412 413 c.updateBest(newStat, filters, prefix) 414 return nil 415 } 416 417 func (c *indexCoster) updateBest(s sql.Statistic, filters sql.FastIntSet, prefix int) { 418 if s == nil || filters.Len() == 0 { 419 return 420 } 421 422 var update bool 423 defer func() { 424 if update { 425 c.bestStat = s 426 c.bestFilters = filters 427 c.bestPrefix = prefix 428 } 429 }() 430 431 if c.bestStat == nil || s.RowCount() < c.bestStat.RowCount() { 432 update = true 433 return 434 } else if c.bestStat.FuncDeps().HasMax1Row() { 435 return 436 } else if c.bestPrefix == 0 || prefix == 0 && c.bestPrefix != prefix { 437 // any prefix is better than no prefix 438 update = prefix > c.bestPrefix 439 return 440 } else if s.RowCount() == c.bestStat.RowCount() { 441 // hand rules when stats don't exist or match exactly 442 cmp := s.FuncDeps() 443 best := c.bestStat.FuncDeps() 444 if cmp.HasMax1Row() { 445 update = true 446 return 447 } 448 449 bestKey, bok := best.StrictKey() 450 cmpKey, cok := cmp.StrictKey() 451 if cok && !bok { 452 // prefer unique key 453 update = true 454 return 455 } else if bok && !cok { 456 // prefer unique key 457 return 458 } else if cok && bok { 459 // prefer shorter strict key 460 if cmpKey.Len() < bestKey.Len() { 461 update = true 462 return 463 } 464 } 465 466 // the one below is sketchy, this is why we need costing 467 // prefer unique key even if non-unique has more constants 468 _, bestHasLax := best.LaxKey() 469 _, cmpHasLax := cmp.LaxKey() 470 if cmp.Constants().Len() > best.Constants().Len() { 471 if bestHasLax && !cmpHasLax { 472 // keep unique key 473 return 474 } 475 update = true 476 return 477 } else if cmp.Constants().Len() < best.Constants().Len() { 478 if cmpHasLax && !bestHasLax { 479 // keep unique key 480 update = true 481 } 482 return 483 } 484 485 if filters.Len() > c.bestFilters.Len() { 486 update = true 487 return 488 } 489 490 if s.ColSet().Len()-filters.Len() < c.bestStat.ColSet().Len()-c.bestFilters.Len() { 491 // prefer 1 range filter over 1 column index (1 - 1 = 0) 492 // vs. 1 range filter over 2 column index (2 - 1 = 1) 493 update = true 494 return 495 } 496 497 { 498 // if no unique keys, prefer equality over ranges 499 bestConst, bestIsNull := c.getConstAndNullFilters(c.bestFilters) 500 cmpConst, cmpIsNull := c.getConstAndNullFilters(c.bestFilters) 501 if cmpConst.Len() > bestConst.Len() { 502 update = true 503 return 504 } 505 if cmpIsNull.Len() > bestIsNull.Len() { 506 update = true 507 return 508 } 509 } 510 511 { 512 if strings.EqualFold(s.Qualifier().Index(), "primary") { 513 update = true 514 return 515 } else if strings.EqualFold(c.bestStat.Qualifier().Index(), "primary") { 516 return 517 } 518 if strings.Compare(s.Qualifier().Index(), c.bestStat.Qualifier().Index()) < 0 { 519 // if they are still equal, use index name to make deterministic 520 update = true 521 return 522 } 523 } 524 } 525 } 526 527 func (c *indexCoster) getConstAndNullFilters(filters sql.FastIntSet) (sql.FastIntSet, sql.FastIntSet) { 528 var isConst sql.FastIntSet 529 var isNull sql.FastIntSet 530 for i, hasNext := filters.Next(0); hasNext; i, hasNext = filters.Next(i + 1) { 531 e := c.idToExpr[indexScanId(i)] 532 switch e.(type) { 533 case *expression.Equals: 534 isConst.Add(i) 535 case *expression.IsNull: 536 isNull.Add(i) 537 case *expression.NullSafeEquals: 538 isConst.Add(i) 539 isNull.Add(i) 540 } 541 } 542 return isConst, isNull 543 } 544 545 // flatten converts a filter into a tree of indexFilter, a format designed 546 // to make costing index scans easier. We return the root of the new tree 547 // and a conjunction of filters that cannot be pushed into index scans. 548 func (c *indexCoster) flatten(e sql.Expression) (indexFilter, sql.Expression, sql.FastIntSet) { 549 switch e := e.(type) { 550 case *expression.And: 551 c.idToExpr[c.i] = e 552 newAnd := &iScanAnd{id: c.i} 553 c.i++ 554 invalid, imprecise := c.flattenAnd(e, newAnd) 555 var leftovers []sql.Expression 556 for i, hasMore := invalid.Next(1); hasMore; i, hasMore = invalid.Next(i + 1) { 557 f, ok := c.idToExpr[indexScanId(i)] 558 if !ok { 559 panic("todo filter map not working") 560 } 561 leftovers = append(leftovers, f) 562 } 563 return newAnd, expression.JoinAnd(leftovers...), imprecise 564 565 case *expression.Or: 566 c.idToExpr[c.i] = e 567 newOr := &iScanOr{id: c.i} 568 c.i++ 569 valid, imp := c.flattenOr(e, newOr) 570 if !valid { 571 return nil, e, sql.FastIntSet{} 572 } 573 var imprecise sql.FastIntSet 574 if imp { 575 imprecise.Add(int(newOr.id)) 576 } 577 return newOr, nil, imprecise 578 579 default: 580 c.idToExpr[c.i] = e 581 leaf, ok := newLeaf(c.ctx, c.i, e, c.underlyingName) 582 c.i++ 583 if !ok { 584 return nil, e, sql.FastIntSet{} 585 } 586 var imprecise sql.FastIntSet 587 if !expression.PreciseComparison(e) { 588 imprecise.Add(int(leaf.id)) 589 } 590 return leaf, nil, imprecise 591 } 592 } 593 594 // flattenAnd return two bitsets to indicate invalid index filter ids, and imprecise filter ids 595 func (c *indexCoster) flattenAnd(e *expression.And, and *iScanAnd) (sql.FastIntSet, sql.FastIntSet) { 596 var invalid sql.FastIntSet 597 var imprecise sql.FastIntSet 598 for _, e := range e.Children() { 599 switch e := e.(type) { 600 case *expression.And: 601 c.idToExpr[c.i] = e 602 c.i++ 603 inv, imp := c.flattenAnd(e, and) 604 invalid = invalid.Union(inv) 605 imprecise = invalid.Union(imp) 606 case *expression.Or: 607 c.idToExpr[c.i] = e 608 newOr := &iScanOr{id: c.i} 609 c.i++ 610 valid, imp := c.flattenOr(e, newOr) 611 if !valid { 612 // this or is invalid 613 invalid.Add(int(newOr.Id())) 614 } else { 615 and.orChildren = append(and.orChildren, newOr) 616 if imp { 617 imprecise.Add(int(newOr.id)) 618 } 619 } 620 default: 621 c.idToExpr[c.i] = e 622 leaf, ok := newLeaf(c.ctx, c.i, e, c.underlyingName) 623 if !ok { 624 invalid.Add(int(c.i)) 625 } else { 626 and.newLeaf(leaf) 627 if !expression.PreciseComparison(e) { 628 imprecise.Add(int(leaf.id)) 629 } 630 } 631 // keep a ref to the invalid |e| 632 c.i++ 633 } 634 } 635 return invalid, imprecise 636 } 637 638 func (c *indexCoster) flattenOr(e *expression.Or, or *iScanOr) (bool, bool) { 639 var imprecise bool 640 for _, e := range e.Children() { 641 switch e := e.(type) { 642 case *expression.And: 643 c.idToExpr[c.i] = e 644 newAnd := &iScanAnd{id: c.i} 645 c.i++ 646 inv, imp := c.flattenAnd(e, newAnd) 647 if !inv.Empty() { 648 return false, false 649 } 650 or.children = append(or.children, newAnd) 651 imprecise = imprecise || !imp.Empty() 652 case *expression.Or: 653 c.idToExpr[c.i] = e 654 c.i++ 655 ok, imp := c.flattenOr(e, or) 656 if !ok { 657 return false, false 658 } 659 imprecise = imprecise || imp 660 default: 661 c.idToExpr[c.i] = e 662 leaf, ok := newLeaf(c.ctx, c.i, e, c.underlyingName) 663 if !ok { 664 return false, false 665 } else { 666 c.i++ 667 or.children = append(or.children, leaf) 668 if !expression.PreciseComparison(e) { 669 imprecise = true 670 } 671 } 672 } 673 } 674 return true, imprecise 675 } 676 677 func newIndexScanRangeBuilder(ctx *sql.Context, idx sql.Index, include, imprecise sql.FastIntSet, idToExpr map[indexScanId]sql.Expression) *indexScanRangeBuilder { 678 return &indexScanRangeBuilder{ 679 ctx: ctx, 680 idx: idx, 681 include: include, 682 imprecise: imprecise, 683 idToExpr: idToExpr, 684 } 685 } 686 687 type indexScanRangeBuilder struct { 688 ctx *sql.Context 689 idx sql.Index 690 include sql.FastIntSet 691 imprecise sql.FastIntSet 692 idToExpr map[indexScanId]sql.Expression 693 conjIb *sql.IndexBuilder 694 allRanges sql.RangeCollection 695 leftover []sql.Expression 696 tableName string 697 } 698 699 // buildRangeCollection converts our representation of the best index scan 700 // into the format that represents an index lookup, a list of sql.Range. 701 func (b *indexScanRangeBuilder) buildRangeCollection(f indexFilter) (sql.RangeCollection, error) { 702 inScan := b.include.Contains(int(f.Id())) 703 704 var ranges sql.RangeCollection 705 var err error 706 switch f := f.(type) { 707 case *iScanAnd: 708 ranges, err = b.rangeBuildAnd(f, inScan) 709 case *iScanOr: 710 ranges, err = b.rangeBuildOr(f, inScan) 711 case *iScanLeaf: 712 ranges, err = b.rangeBuildLeaf(f, inScan) 713 default: 714 return nil, fmt.Errorf("unknown indexFilter type: %T", f) 715 } 716 717 if err != nil { 718 return nil, err 719 } 720 return sql.RemoveOverlappingRanges(ranges...) 721 } 722 723 func (b *indexScanRangeBuilder) Ranges() (sql.RangeCollection, error) { 724 return sql.RemoveOverlappingRanges(b.allRanges...) 725 } 726 727 func (b *indexScanRangeBuilder) rangeBuildAnd(f *iScanAnd, inScan bool) (sql.RangeCollection, error) { 728 // no leftover check for AND, it's children may be included in scan 729 inScan = inScan || b.include.Contains(int(f.Id())) 730 731 var ret sql.RangeCollection 732 for _, or := range f.orChildren { 733 // separate range builder for each, before UNIONing 734 ranges, err := b.rangeBuildOr(or.(*iScanOr), inScan) 735 if err != nil { 736 return nil, err 737 } 738 if ranges == nil { 739 continue 740 } 741 if ret == nil { 742 ret = ranges 743 continue 744 } 745 ret, err = ret.Intersect(ranges) 746 if err != nil { 747 return nil, err 748 } 749 } 750 751 partBuilder := sql.NewIndexBuilder(b.idx) 752 for _, leaf := range f.leaves() { 753 switch leaf.Op() { 754 case indexScanOpSpatialEq: 755 ranges, err := b.rangeBuildSpatialLeaf(leaf, inScan) 756 if err != nil { 757 return nil, err 758 } 759 if ranges != nil { 760 ret, err = ret.Intersect(partBuilder.Ranges(b.ctx)) 761 if err != nil { 762 return nil, err 763 } 764 } 765 case indexScanOpFulltextEq: 766 ranges, err := b.rangeBuildFulltextLeaf(leaf, inScan) 767 if err != nil { 768 return nil, err 769 } 770 if ranges != nil { 771 ret, err = ret.Intersect(partBuilder.Ranges(b.ctx)) 772 if err != nil { 773 return nil, err 774 } 775 } 776 default: 777 b.rangeBuildDefaultLeaf(partBuilder, leaf, inScan) 778 } 779 } 780 781 if _, err := partBuilder.Build(b.ctx); err != nil { 782 return nil, err 783 } 784 785 if ret == nil { 786 return partBuilder.Ranges(b.ctx), nil 787 } 788 789 ret, err := ret.Intersect(partBuilder.Ranges(b.ctx)) 790 if err != nil { 791 return nil, err 792 } 793 794 return ret, nil 795 } 796 797 func (b *indexScanRangeBuilder) rangeBuildOr(f *iScanOr, inScan bool) (sql.RangeCollection, error) { 798 inScan = !b.markLeftover(f, inScan) 799 if !inScan { 800 return nil, nil 801 } 802 803 // imprecise filters cannot be removed 804 b.markImprecise(f) 805 806 //todo union the or ranges 807 var ret sql.RangeCollection 808 for _, c := range f.children { 809 var ranges sql.RangeCollection 810 var err error 811 switch c := c.(type) { 812 case *iScanAnd: 813 ranges, err = b.rangeBuildAnd(c, inScan) 814 case *iScanLeaf: 815 ranges, err = b.rangeBuildLeaf(c, inScan) 816 default: 817 return nil, fmt.Errorf("invalid *iScanOr child: %T", c) 818 } 819 if err != nil { 820 return nil, err 821 } 822 ret = append(ret, ranges...) 823 } 824 return ret, nil 825 } 826 827 func (b *indexScanRangeBuilder) rangeBuildSpatialLeaf(f *iScanLeaf, inScan bool) (sql.RangeCollection, error) { 828 inScan = !b.markLeftover(f, inScan) 829 if inScan { 830 // always mark leftover 831 b.leftover = append(b.leftover, b.idToExpr[f.Id()]) 832 } else { 833 return nil, nil 834 } 835 836 g, ok := f.litValue.(types.GeometryValue) 837 if !ok { 838 return nil, sql.ErrInvalidGISData.New() 839 } 840 minX, minY, maxX, maxY := g.BBox() 841 lower := types.Point{X: minX, Y: minY} 842 upper := types.Point{X: maxX, Y: maxY} 843 844 return sql.RangeCollection{{{ 845 LowerBound: sql.Below{Key: lower}, 846 UpperBound: sql.Above{Key: upper}, 847 Typ: f.gf.Type(), 848 }}}, nil 849 } 850 851 func (b *indexScanRangeBuilder) rangeBuildFulltextLeaf(f *iScanLeaf, inScan bool) (sql.RangeCollection, error) { 852 // fulltext leaf doesn't use ranges 853 inScan = !b.markLeftover(f, inScan) 854 if inScan { 855 // always mark leftover 856 b.leftover = append(b.leftover, b.idToExpr[f.Id()]) 857 } else { 858 return nil, nil 859 } 860 return sql.RangeCollection{{sql.EmptyRangeColumnExpr(f.gf.Type())}}, nil 861 } 862 863 func (b *indexScanRangeBuilder) rangeBuildLeaf(f *iScanLeaf, inScan bool) (sql.RangeCollection, error) { 864 switch f.Op() { 865 case indexScanOpSpatialEq: 866 return b.rangeBuildSpatialLeaf(f, inScan) 867 case indexScanOpFulltextEq: 868 return b.rangeBuildFulltextLeaf(f, inScan) 869 default: 870 bb := sql.NewIndexBuilder(b.idx) 871 b.rangeBuildDefaultLeaf(bb, f, inScan) 872 if _, err := bb.Build(b.ctx); err != nil { 873 return nil, err 874 } 875 return bb.Ranges(b.ctx), nil 876 } 877 } 878 879 func (b *indexScanRangeBuilder) rangeBuildDefaultLeaf(bb *sql.IndexBuilder, f *iScanLeaf, inScan bool) { 880 inScan = !b.markLeftover(f, inScan) 881 if !inScan { 882 return 883 } 884 885 b.markImprecise(f) 886 887 name := f.normString() 888 switch f.Op() { 889 case indexScanOpEq: 890 bb.Equals(b.ctx, name, f.litValue) 891 case indexScanOpNotEq: 892 bb.NotEquals(b.ctx, name, f.litValue) 893 case indexScanOpInSet: 894 bb.Equals(b.ctx, name, f.setValues...) 895 case indexScanOpNotInSet: 896 for _, v := range f.setValues { 897 bb.NotEquals(b.ctx, name, v) 898 } 899 case indexScanOpGt: 900 bb.GreaterThan(b.ctx, name, f.litValue) 901 case indexScanOpGte: 902 bb.GreaterOrEqual(b.ctx, name, f.litValue) 903 case indexScanOpLt: 904 bb.LessThan(b.ctx, name, f.litValue) 905 case indexScanOpLte: 906 bb.LessOrEqual(b.ctx, name, f.litValue) 907 case indexScanOpIsNotNull: 908 bb.IsNotNull(b.ctx, name) 909 case indexScanOpIsNull: 910 bb.IsNull(b.ctx, name) 911 case indexScanOpNullSafeEq: 912 if f.litValue == nil { 913 bb.IsNull(b.ctx, name) 914 } else { 915 bb.Equals(b.ctx, name, f.litValue) 916 } 917 default: 918 panic(fmt.Sprintf("unknown indexScanOp: %d", f.Op())) 919 } 920 } 921 922 // markLeftover is used to check if leaf nodes and OR filters are left out 923 // of the index lookup. We omit this check for AND filters because a portion 924 // of their children can contribute to the scan. 925 func (b *indexScanRangeBuilder) markLeftover(f indexFilter, inScan bool) bool { 926 if !inScan && !b.include.Contains(int(f.Id())) { 927 b.leftover = append(b.leftover, b.idToExpr[f.Id()]) 928 return true 929 } 930 return false 931 } 932 933 func (b *indexScanRangeBuilder) markImprecise(f indexFilter) { 934 if b.imprecise.Contains(int(f.Id())) { 935 b.leftover = append(b.leftover, b.idToExpr[f.Id()]) 936 } 937 } 938 939 // indexFilter decomposes filter conjunction into a format 940 // amenable for checking index prefix alignment 941 type indexFilter interface { 942 Op() indexScanOp 943 Id() indexScanId 944 } 945 946 type iScanLeaf struct { 947 op indexScanOp 948 id indexScanId 949 gf *expression.GetField 950 underlying string 951 litValue interface{} 952 setValues []interface{} 953 fulltextIndex string 954 } 955 956 func (l *iScanLeaf) normString() string { 957 if l.underlying != "" { 958 return fmt.Sprintf("%s.%s", strings.ToLower(l.underlying), strings.ToLower(l.gf.Name())) 959 } 960 return strings.ToLower(l.gf.String()) 961 } 962 963 func (l *iScanLeaf) Id() indexScanId { 964 return l.id 965 } 966 967 func (l *iScanLeaf) Op() indexScanOp { 968 return l.op 969 } 970 971 type iScanOr struct { 972 id indexScanId 973 children []indexFilter 974 } 975 976 func (o *iScanOr) Id() indexScanId { 977 return o.id 978 } 979 980 func (o *iScanOr) Op() indexScanOp { 981 return indexScanOpOr 982 } 983 984 func newIScanAnd(id indexScanId) *iScanAnd { 985 return &iScanAnd{ 986 id: id, 987 } 988 } 989 990 type iScanAnd struct { 991 id indexScanId 992 leafChildren map[string][]*iScanLeaf 993 orChildren []indexFilter 994 cnt int 995 } 996 997 func (a *iScanAnd) Op() indexScanOp { 998 return indexScanOpAnd 999 } 1000 1001 func (a *iScanAnd) Id() indexScanId { 1002 return a.id 1003 } 1004 1005 func (a *iScanAnd) newLeaf(l *iScanLeaf) { 1006 if a.leafChildren == nil { 1007 a.leafChildren = make(map[string][]*iScanLeaf) 1008 } 1009 a.leafChildren[strings.ToLower(l.gf.Name())] = append(a.leafChildren[strings.ToLower(l.gf.Name())], l) 1010 } 1011 1012 // leaves returns a list of this nodes leaf filters, sorted by id 1013 func (a *iScanAnd) leaves() []*iScanLeaf { 1014 var ret []*iScanLeaf 1015 for _, colLeaves := range a.leafChildren { 1016 for _, leaf := range colLeaves { 1017 ret = append(ret, leaf) 1018 } 1019 } 1020 sort.SliceStable(ret, func(i, j int) bool { 1021 return ret[i].id < ret[j].id 1022 }) 1023 return ret 1024 } 1025 1026 func (a *iScanAnd) childCnt() int { 1027 if a.cnt > 0 { 1028 return a.cnt 1029 } 1030 cnt := len(a.orChildren) 1031 for _, leaves := range a.leafChildren { 1032 cnt += len(leaves) 1033 } 1034 a.cnt = cnt 1035 return a.cnt 1036 } 1037 1038 func formatIndexFilter(f indexFilter) string { 1039 b := &strings.Builder{} 1040 formatIndexFilterRec(b, 0, f) 1041 return b.String() 1042 } 1043 1044 func formatIndexFilterRec(b *strings.Builder, nesting int, f indexFilter) { 1045 if f == nil { 1046 return 1047 } 1048 switch f := f.(type) { 1049 case *iScanAnd: 1050 for i := 0; i < nesting; i++ { 1051 b.WriteString(" ") 1052 } 1053 fmt.Fprintf(b, "(%d: and", f.Id()) 1054 for _, leaf := range f.leaves() { 1055 fmt.Fprintf(b, "\n") 1056 formatIndexFilterRec(b, nesting+1, leaf) 1057 } 1058 for _, or := range f.orChildren { 1059 fmt.Fprintf(b, "\n") 1060 formatIndexFilterRec(b, nesting+1, or) 1061 } 1062 1063 fmt.Fprintf(b, ")") 1064 1065 case *iScanOr: 1066 for i := 0; i < nesting; i++ { 1067 b.WriteString(" ") 1068 } 1069 fmt.Fprintf(b, "(%d: or", f.Id()) 1070 1071 for _, c := range f.children { 1072 fmt.Fprintf(b, "\n") 1073 formatIndexFilterRec(b, nesting+1, c) 1074 } 1075 fmt.Fprintf(b, ")") 1076 1077 case *iScanLeaf: 1078 for i := 0; i < nesting; i++ { 1079 b.WriteString(" ") 1080 } 1081 switch f.Op() { 1082 case indexScanOpIsNull, indexScanOpIsNotNull: 1083 fmt.Fprintf(b, "(%d: %s %s)", f.Id(), f.gf, f.Op()) 1084 case indexScanOpInSet, indexScanOpNotInSet: 1085 var valStrs []string 1086 for _, v := range f.setValues { 1087 valStrs = append(valStrs, fmt.Sprintf("%v", v)) 1088 } 1089 fmt.Fprintf(b, "(%d: %s %s (%s))", f.Id(), f.gf, f.Op(), strings.Join(valStrs, ", ")) 1090 default: 1091 fmt.Fprintf(b, "(%d: %s %s %v)", f.Id(), f.gf, f.Op(), f.litValue) 1092 } 1093 1094 default: 1095 panic(fmt.Sprintf("unknown indexFilter type :%T", f)) 1096 } 1097 } 1098 1099 type indexScanId uint16 1100 1101 func ordinalsForStat(stat sql.Statistic) map[string]int { 1102 ret := make(map[string]int) 1103 for i, c := range stat.Columns() { 1104 ret[strings.ToLower(c)] = i 1105 } 1106 return ret 1107 } 1108 1109 // costIndexScanAnd applies (1) series of disjunctions and (2) a set of 1110 // conjunctions to an index represented by a statistic. We return the 1111 // updated statistic, the subset of applicable filters, the maximum prefix 1112 // key created by a subset of equality filters (from conjunction only), 1113 // or an error if applicable. 1114 func (c *indexCoster) costIndexScanAnd(filter *iScanAnd, s sql.Statistic, ordinals map[string]int, idx sql.Index) (sql.Statistic, sql.FastIntSet, int, error) { 1115 // first step finds the conjunctions that match index prefix columns. 1116 // we divide into eqFilters and rangeFilters 1117 1118 ret := s 1119 var exact sql.FastIntSet 1120 1121 if len(filter.orChildren) > 0 { 1122 for _, or := range filter.orChildren { 1123 childStat, ok, err := c.costIndexScanOr(or.(*iScanOr), s, ordinals, idx) 1124 if err != nil { 1125 return nil, sql.FastIntSet{}, 0, err 1126 } 1127 // if valid, INTERSECT 1128 if ok { 1129 ret = stats.Intersect(ret, childStat) 1130 exact.Add(int(or.Id())) 1131 } 1132 } 1133 } 1134 1135 conj := newConjCollector(ret, ordinals) 1136 for _, c := range s.Columns() { 1137 if colFilters, ok := filter.leafChildren[c]; ok { 1138 for _, f := range colFilters { 1139 conj.add(f) 1140 } 1141 } 1142 } 1143 1144 if exact.Len()+conj.applied.Len() == filter.childCnt() { 1145 // matched all filters 1146 return conj.stat, sql.NewFastIntSet(int(filter.id)), conj.missingPrefix, nil 1147 } 1148 1149 return conj.stat, exact.Union(conj.applied), conj.missingPrefix, nil 1150 } 1151 1152 func (c *indexCoster) costIndexScanOr(filter *iScanOr, s sql.Statistic, ordinals map[string]int, idx sql.Index) (sql.Statistic, bool, error) { 1153 // OR just unions the statistics from each child? 1154 // if one of the children is invalid, we balk and return false 1155 // otherwise we union the buckets between the children 1156 ret := s 1157 for _, child := range filter.children { 1158 switch child := child.(type) { 1159 case *iScanAnd: 1160 childStat, ids, _, err := c.costIndexScanAnd(child, s, ordinals, idx) 1161 if err != nil { 1162 return nil, false, err 1163 } 1164 if ids.Len() != 1 || !ids.Contains(int(child.Id())) { 1165 // scan option missed some filters 1166 return nil, false, nil 1167 } 1168 ret = stats.Union(s, childStat) 1169 1170 case *iScanLeaf: 1171 var ok bool 1172 childStat, ok, _, err := c.costIndexScanLeaf(child, s, ordinals, idx) 1173 if err != nil { 1174 return nil, false, err 1175 } 1176 if !ok { 1177 return nil, false, nil 1178 } 1179 ret = stats.Union(s, childStat) 1180 1181 default: 1182 return nil, false, fmt.Errorf("invalid *iScanOr child: %T", child) 1183 } 1184 } 1185 return ret, true, nil 1186 } 1187 1188 // indexHasContentHashedFieldForFilter returns true if the given index |idx| has a content-hashed field that is used 1189 // by the given filter |filter|. |ordinals| provides a mapping from filter expression to position in |idx|. Indexes 1190 // with content-hashed fields can only be used for a subset of filter operations. 1191 func indexHasContentHashedFieldForFilter(filter *iScanLeaf, idx sql.Index, ordinals map[string]int) bool { 1192 // Only unique indexes are currently able to use content-hashed fields 1193 if !idx.IsUnique() { 1194 return false 1195 } 1196 1197 i := ordinals[filter.gf.Name()] 1198 columnExpressionType := idx.ColumnExpressionTypes()[i] 1199 1200 // Only TEXT/BLOB types can currently use content-hashes in indexes 1201 if !types.IsTextBlob(columnExpressionType.Type) { 1202 return false 1203 } 1204 1205 prefixLength := uint16(0) 1206 if len(idx.PrefixLengths()) > i { 1207 prefixLength = idx.PrefixLengths()[i] 1208 } 1209 return prefixLength == 0 1210 } 1211 1212 // costIndexScanLeaf tries to apply a leaf filter to an index represented 1213 // by a statistic, returning the updated statistic, whether the filter was 1214 // applicable, and the maximum prefix key (0 or 1 for a leaf). 1215 func (c *indexCoster) costIndexScanLeaf(filter *iScanLeaf, s sql.Statistic, ordinals map[string]int, idx sql.Index) (sql.Statistic, bool, int, error) { 1216 ord, ok := ordinals[strings.ToLower(filter.gf.Name())] 1217 if !ok { 1218 return nil, false, 0, nil 1219 } 1220 1221 // indexes with content-hashed fields can be used to test equality or compare with NULL, 1222 // but can't be used for other comparisons, such as less than or greater than. 1223 if indexHasContentHashedFieldForFilter(filter, idx, ordinals) { 1224 switch filter.op { 1225 case indexScanOpEq, indexScanOpNotEq, indexScanOpNullSafeEq, indexScanOpIsNull, indexScanOpIsNotNull: 1226 default: 1227 return nil, false, 0, nil 1228 } 1229 } 1230 1231 switch filter.op { 1232 case indexScanOpSpatialEq: 1233 stat, ok, err := c.costSpatial(filter, s, ord) 1234 return stat, ok, 0, err 1235 case indexScanOpFulltextEq: 1236 stat, ok, err := c.costFulltext(filter, s, ord) 1237 return stat, ok, 0, err 1238 default: 1239 conj := newConjCollector(s, ordinals) 1240 conj.add(filter) 1241 return conj.stat, true, conj.missingPrefix, nil 1242 } 1243 } 1244 1245 func (c *indexCoster) costSpatial(filter *iScanLeaf, s sql.Statistic, ordinal int) (sql.Statistic, bool, error) { 1246 return s, s.IndexClass() == sql.IndexClassSpatial && ordinal == 0 && filter.litValue != nil, nil 1247 } 1248 1249 func (c *indexCoster) costFulltext(filter *iScanLeaf, s sql.Statistic, ordinal int) (sql.Statistic, bool, error) { 1250 // check that the filter's index matches the fulltext index 1251 return s, s.IndexClass() == sql.IndexClassFulltext && s.Qualifier().Index() == filter.fulltextIndex, nil 1252 } 1253 1254 type indexScanOp uint8 1255 1256 //go:generate stringer -type=indexScanOp -linecomment 1257 1258 const ( 1259 indexScanOpEq indexScanOp = iota // = 1260 indexScanOpNullSafeEq // <=> 1261 indexScanOpInSet // = 1262 indexScanOpNotInSet // != 1263 indexScanOpNotEq // != 1264 indexScanOpGt // > 1265 indexScanOpGte // >= 1266 indexScanOpLt // < 1267 indexScanOpLte // <= 1268 indexScanOpAnd // && 1269 indexScanOpOr // || 1270 indexScanOpIsNull // IS NULL 1271 indexScanOpIsNotNull // IS NOT NULL 1272 indexScanOpSpatialEq // SpatialEq 1273 indexScanOpFulltextEq // FulltextEq 1274 ) 1275 1276 // swap returns the identity op for swapping a comparison's LHS and RHS 1277 func (o indexScanOp) swap() indexScanOp { 1278 switch o { 1279 case indexScanOpGt: 1280 return indexScanOpLt 1281 case indexScanOpGte: 1282 return indexScanOpLte 1283 case indexScanOpLt: 1284 return indexScanOpGt 1285 case indexScanOpLte: 1286 return indexScanOpGte 1287 default: 1288 return o 1289 } 1290 } 1291 1292 func newLeaf(ctx *sql.Context, id indexScanId, e sql.Expression, underlying string) (*iScanLeaf, bool) { 1293 var op indexScanOp 1294 var left sql.Expression 1295 var right sql.Expression 1296 switch e := e.(type) { 1297 case *expression.NullSafeEquals: 1298 op = indexScanOpNullSafeEq 1299 right = e.Right() 1300 left = e.Left() 1301 case *expression.Equals: 1302 op = indexScanOpEq 1303 right = e.Right() 1304 left = e.Left() 1305 case *expression.InTuple: 1306 op = indexScanOpInSet 1307 right = e.Right() 1308 left = e.Left() 1309 case *expression.HashInTuple: 1310 op = indexScanOpInSet 1311 right = e.Right() 1312 left = e.Left() 1313 case *expression.LessThan: 1314 left = e.Left() 1315 right = e.Right() 1316 op = indexScanOpLt 1317 case *expression.GreaterThanOrEqual: 1318 left = e.Left() 1319 right = e.Right() 1320 op = indexScanOpGte 1321 case *expression.GreaterThan: 1322 left = e.Left() 1323 right = e.Right() 1324 op = indexScanOpGt 1325 case *expression.LessThanOrEqual: 1326 left = e.Left() 1327 right = e.Right() 1328 op = indexScanOpLte 1329 case *expression.IsNull: 1330 left = e.Child 1331 op = indexScanOpIsNull 1332 case *expression.Not: 1333 switch e := e.Child.(type) { 1334 case *expression.IsNull: 1335 left = e.Child 1336 op = indexScanOpIsNotNull 1337 case *expression.Equals: 1338 left = e.Left() 1339 right = e.Right() 1340 op = indexScanOpNotEq 1341 case *expression.InTuple: 1342 op = indexScanOpNotInSet 1343 right = e.Right() 1344 left = e.Left() 1345 case *expression.HashInTuple: 1346 op = indexScanOpNotInSet 1347 right = e.Right() 1348 left = e.Left() 1349 default: 1350 return nil, false 1351 } 1352 case *spatial.Intersects, *spatial.Within, *spatial.STEquals: 1353 op = indexScanOpSpatialEq 1354 children := e.Children() 1355 left = children[0] 1356 right = children[1] 1357 case *expression.MatchAgainst: 1358 op = indexScanOpFulltextEq 1359 return &iScanLeaf{id: id, op: op, gf: e.Columns[0].(*expression.GetField), underlying: underlying, fulltextIndex: e.GetIndex().ID()}, true 1360 default: 1361 return nil, false 1362 } 1363 1364 if _, ok := left.(*expression.GetField); !ok { 1365 left, right = right, left 1366 op = op.swap() 1367 } 1368 1369 gf, ok := left.(*expression.GetField) 1370 if !ok { 1371 return nil, false 1372 } 1373 1374 if op == indexScanOpIsNull || op == indexScanOpIsNotNull { 1375 return &iScanLeaf{id: id, gf: gf, op: op, underlying: underlying}, true 1376 } 1377 1378 if !isEvaluable(right) { 1379 return nil, false 1380 } 1381 1382 if op == indexScanOpInSet || op == indexScanOpNotInSet { 1383 tup := right.(expression.Tuple) 1384 var litSet []interface{} 1385 for _, lit := range tup { 1386 value, err := lit.Eval(ctx, nil) 1387 if err != nil { 1388 return nil, false 1389 } 1390 litSet = append(litSet, value) 1391 } 1392 return &iScanLeaf{id: id, gf: gf, op: op, setValues: litSet, underlying: underlying}, true 1393 } 1394 1395 value, err := right.Eval(ctx, nil) 1396 if err != nil { 1397 return nil, false 1398 } 1399 1400 return &iScanLeaf{id: id, gf: gf, op: op, litValue: value, underlying: underlying}, true 1401 } 1402 1403 const dummyNotUniqueDistinct = .90 1404 const dummyNotUniqueNull = .03 1405 1406 func uniformDistStatisticsForIndex(ctx *sql.Context, statsProv sql.StatsProvider, iat sql.IndexAddressableTable, idx sql.Index) (sql.Statistic, error) { 1407 var rowCount uint64 1408 var avgSize uint64 1409 1410 rowCount, _ = statsProv.RowCount(ctx, idx.Database(), idx.Table()) 1411 1412 if st, ok := iat.(sql.StatisticsTable); ok { 1413 rCnt, _, err := st.RowCount(ctx) 1414 if err != nil { 1415 return nil, err 1416 } 1417 if rowCount == 0 { 1418 rowCount = rCnt 1419 } 1420 if rowCount > 0 { 1421 dataSize, err := st.DataLength(ctx) 1422 if err != nil { 1423 return nil, err 1424 } 1425 avgSize = dataSize / rowCount 1426 } 1427 } 1428 1429 var dbName string 1430 if dbTable, ok := iat.(sql.Databaseable); ok { 1431 dbName = strings.ToLower(dbTable.Database()) 1432 } 1433 tableName := strings.ToLower(iat.Name()) 1434 1435 var sch sql.Schema 1436 if pkt, ok := iat.(sql.PrimaryKeyTable); ok { 1437 sch = pkt.PrimaryKeySchema().Schema 1438 } else { 1439 sch = iat.Schema() 1440 } 1441 1442 return newUniformDistStatistic(dbName, tableName, sch, idx, rowCount, avgSize) 1443 } 1444 1445 func indexFds(tableName string, sch sql.Schema, idx sql.Index) (*sql.FuncDepSet, sql.ColSet, error) { 1446 var idxCols sql.ColSet 1447 pref := fmt.Sprintf("%s.", tableName) 1448 for _, col := range idx.ColumnExpressionTypes() { 1449 colName := strings.TrimPrefix(strings.ToLower(col.Expression), pref) 1450 i := sch.IndexOfColName(colName) 1451 if i < 0 { 1452 return nil, idxCols, fmt.Errorf("column not found on table during stats building: %s", colName) 1453 } 1454 idxCols.Add(sql.ColumnId(i + 1)) 1455 } 1456 1457 var all sql.ColSet 1458 var notNull sql.ColSet 1459 for i, col := range sch { 1460 all.Add(sql.ColumnId(i + 1)) 1461 if !col.Nullable { 1462 notNull.Add(sql.ColumnId(i + 1)) 1463 } 1464 } 1465 1466 strict := true 1467 for i, hasNext := idxCols.Next(1); hasNext; i, hasNext = idxCols.Next(i + 1) { 1468 if !notNull.Contains(i) { 1469 strict = false 1470 } 1471 } 1472 1473 var strictKeys []sql.ColSet 1474 var laxKeys []sql.ColSet 1475 if !idx.IsUnique() { 1476 // not an FD 1477 } else if strict { 1478 strictKeys = append(strictKeys, idxCols) 1479 } else { 1480 laxKeys = append(laxKeys, idxCols) 1481 } 1482 return sql.NewTablescanFDs(all, strictKeys, laxKeys, notNull), idxCols, nil 1483 } 1484 1485 func newUniformDistStatistic(dbName, tableName string, sch sql.Schema, idx sql.Index, rowCount, avgSize uint64) (sql.Statistic, error) { 1486 tablePrefix := fmt.Sprintf("%s.", tableName) 1487 1488 distinctCount := rowCount 1489 if !idx.IsUnique() { 1490 distinctCount = uint64(float64(distinctCount) * dummyNotUniqueDistinct) 1491 } 1492 1493 nullCount := uint64(float64(distinctCount) * dummyNotUniqueNull) 1494 1495 var cols []string 1496 var types []sql.Type 1497 for _, e := range idx.ColumnExpressionTypes() { 1498 cols = append(cols, strings.TrimPrefix(strings.ToLower(e.Expression), tablePrefix)) 1499 types = append(types, e.Type) 1500 } 1501 1502 var class sql.IndexClass 1503 switch { 1504 case idx.IsSpatial(): 1505 class = sql.IndexClassSpatial 1506 case idx.IsFullText(): 1507 class = sql.IndexClassFulltext 1508 default: 1509 class = sql.IndexClassDefault 1510 } 1511 1512 qual := sql.NewStatQualifier(dbName, tableName, strings.ToLower(idx.ID())) 1513 stat := stats.NewStatistic(rowCount, distinctCount, nullCount, avgSize, time.Now(), qual, cols, types, nil, class, nil) 1514 1515 fds, idxCols, err := indexFds(tableName, sch, idx) 1516 if err != nil { 1517 return nil, err 1518 } 1519 ret := stat.WithFuncDeps(fds) 1520 ret = ret.WithColSet(idxCols) 1521 return ret, nil 1522 } 1523 1524 func newConjCollector(s sql.Statistic, ordinals map[string]int) *conjCollector { 1525 return &conjCollector{ 1526 stat: s, 1527 ordinals: ordinals, 1528 eqVals: make([]interface{}, len(ordinals)), 1529 nullable: make([]bool, len(ordinals)), 1530 } 1531 } 1532 1533 // conjCollector is used to stack and track changes to 1534 // an index histogram for a list of conjugate filters 1535 type conjCollector struct { 1536 stat sql.Statistic 1537 ordinals map[string]int 1538 missingPrefix int 1539 constant sql.FastIntSet 1540 eqVals []interface{} 1541 nullable []bool 1542 applied sql.FastIntSet 1543 isFalse bool 1544 } 1545 1546 func (c *conjCollector) add(f *iScanLeaf) error { 1547 c.applied.Add(int(f.Id())) 1548 var err error 1549 switch f.Op() { 1550 case indexScanOpNullSafeEq: 1551 err = c.addEq(f.gf.Name(), f.litValue, true) 1552 case indexScanOpEq: 1553 err = c.addEq(f.gf.Name(), f.litValue, false) 1554 case indexScanOpInSet: 1555 // TODO cost UNION of equals 1556 err = c.addEq(f.gf.Name(), f.setValues[0], false) 1557 default: 1558 err = c.addIneq(f.Op(), f.gf.Name(), f.litValue) 1559 } 1560 return err 1561 } 1562 1563 func (c *conjCollector) addEq(col string, val interface{}, nullSafe bool) error { 1564 // make constant 1565 ord := c.ordinals[col] 1566 if c.constant.Contains(ord + 1) { 1567 if c.eqVals[ord] != val { 1568 // FALSE filter 1569 c.isFalse = true 1570 return nil 1571 } 1572 return nil 1573 } 1574 1575 c.constant.Add(ord + 1) 1576 c.eqVals[ord] = val 1577 c.nullable[ord] = nullSafe 1578 1579 if ord == c.missingPrefix { 1580 last := ord 1581 for next, hasNext := c.constant.Next(last + 1); hasNext && next == last+1; next, hasNext = c.constant.Next(next + 1) { 1582 // In first loop, next is always last+1 because we just added ord. 1583 // Keep iterating while consecutive bits are set, end on gap. 1584 last = next 1585 } 1586 c.missingPrefix = last 1587 1588 // truncate buckets 1589 var err error 1590 c.stat, err = stats.PrefixKey(c.stat, c.eqVals[:ord+1], c.nullable) 1591 if err != nil { 1592 return err 1593 } 1594 } 1595 return nil 1596 } 1597 1598 func (c *conjCollector) addIneq(op indexScanOp, col string, val interface{}) error { 1599 ord := c.ordinals[col] 1600 if ord > 0 { 1601 return nil 1602 } 1603 err := c.cmpFirstCol(op, val) 1604 if err != nil { 1605 return err 1606 } 1607 return c.truncateMcvs(ord, op, val) 1608 } 1609 1610 // cmpFirstCol checks whether we should try to range truncate the first 1611 // column in the index 1612 func (c *conjCollector) cmpFirstCol(op indexScanOp, val interface{}) error { 1613 // check if first col already constant 1614 // otherwise attempt to truncate histogram 1615 var err error 1616 if c.constant.Contains(1) { 1617 return nil 1618 } 1619 switch op { 1620 case indexScanOpNotEq: 1621 // todo notEq 1622 c.stat, err = stats.PrefixGt(c.stat, val) 1623 case indexScanOpGt: 1624 c.stat, err = stats.PrefixGt(c.stat, val) 1625 case indexScanOpGte: 1626 c.stat, err = stats.PrefixGte(c.stat, val) 1627 case indexScanOpLt: 1628 c.stat, err = stats.PrefixLt(c.stat, val) 1629 case indexScanOpLte: 1630 c.stat, err = stats.PrefixLte(c.stat, val) 1631 case indexScanOpIsNull: 1632 c.stat, err = stats.PrefixIsNull(c.stat) 1633 case indexScanOpIsNotNull: 1634 c.stat, err = stats.PrefixIsNotNull(c.stat) 1635 } 1636 return err 1637 } 1638 1639 func (c *conjCollector) truncateMcvs(i int, op indexScanOp, val interface{}) error { 1640 var err error 1641 switch op { 1642 case indexScanOpGt: 1643 c.stat, err = stats.McvPrefixGt(c.stat, i, val) 1644 case indexScanOpGte: 1645 c.stat, err = stats.McvPrefixGte(c.stat, i, val) 1646 case indexScanOpLt: 1647 c.stat, err = stats.McvPrefixLt(c.stat, i, val) 1648 case indexScanOpLte: 1649 c.stat, err = stats.McvPrefixLte(c.stat, i, val) 1650 case indexScanOpIsNull: 1651 c.stat, err = stats.McvPrefixIsNull(c.stat, i, val) 1652 case indexScanOpIsNotNull: 1653 c.stat, err = stats.McvPrefixIsNotNull(c.stat, i, val) 1654 } 1655 return err 1656 }