github.com/dolthub/go-mysql-server@v0.18.0/sql/analyzer/costed_index_scan.go

github.com/dolthub/go-mysql-server@v0.18.0/sql/analyzer/costed_index_scan.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package analyzer
    16  
    17  import (
    18  	"fmt"
    19  	"sort"
    20  	"strings"
    21  	"time"
    22  
    23  	"github.com/dolthub/go-mysql-server/sql"
    24  	"github.com/dolthub/go-mysql-server/sql/expression"
    25  	"github.com/dolthub/go-mysql-server/sql/expression/function/spatial"
    26  	"github.com/dolthub/go-mysql-server/sql/fulltext"
    27  	"github.com/dolthub/go-mysql-server/sql/memo"
    28  	"github.com/dolthub/go-mysql-server/sql/plan"
    29  	"github.com/dolthub/go-mysql-server/sql/rowexec"
    30  	"github.com/dolthub/go-mysql-server/sql/stats"
    31  	"github.com/dolthub/go-mysql-server/sql/transform"
    32  	"github.com/dolthub/go-mysql-server/sql/types"
    33  )
    34  
    35  // costedIndexScans matches a Filter-ResolvedTable pattern, and tries to
    36  // use those filters to create a better IndexedTableAccess plan. We first
    37  // convert the filter into a format that separates index-supported and
    38  // unsupported filters, the unsupported remaining in the Filter parent.
    39  // We then attempt to construct index scans using each table index and the
    40  // set of index-supported filters. Each individual index greedily consumes
    41  // filters. We use statistical cost and functional dependencies to compare
    42  // indexScan options. Then we use metadata for the best indexScan to
    43  // (1) convert the included filters to a sql.RangeCollection needed and
    44  // then a sql.IndexLookup, and (2) collect the unused filters as a
    45  // replacement parent Filter.
    46  //
    47  // It is worth noting that AND and OR filters behave differently. An OR
    48  // filter can only be converted into an index scan if its entire child
    49  // tree can be converted into a sql.Range. An AND filter can convert a
    50  // fraction of its conjunctions into an indexScan, with the excluded
    51  // remaining in the parent filter. Much of the format conversions focus
    52  // on maintaining this invariant.
    53  func costedIndexScans(ctx *sql.Context, a *Analyzer, n sql.Node) (sql.Node, transform.TreeIdentity, error) {
    54  	return transform.Node(n, func(n sql.Node) (sql.Node, transform.TreeIdentity, error) {
    55  		filter, ok := n.(*plan.Filter)
    56  		if !ok {
    57  			return n, transform.SameTree, nil
    58  		}
    59  
    60  		var rt sql.TableNode
    61  		var aliasName string
    62  		switch n := filter.Child.(type) {
    63  		case *plan.ResolvedTable:
    64  			rt = n
    65  		case *plan.TableAlias:
    66  			rt, _ = n.Child.(sql.TableNode)
    67  			aliasName = n.Name()
    68  		}
    69  		if rt == nil {
    70  			return n, transform.SameTree, nil
    71  		}
    72  
    73  		if is, ok := rt.UnderlyingTable().(sql.IndexSearchableTable); ok && is.SkipIndexCosting() {
    74  			lookup, err := is.LookupForExpressions(ctx, expression.SplitConjunction(filter.Expression))
    75  			if err != nil {
    76  				return n, transform.SameTree, err
    77  			}
    78  			if lookup.IsEmpty() {
    79  				return n, transform.SameTree, nil
    80  			}
    81  			ret, err := plan.NewStaticIndexedAccessForTableNode(rt, lookup)
    82  			if err != nil {
    83  				return n, transform.SameTree, err
    84  			}
    85  			return plan.NewFilter(filter.Expression, ret), transform.NewTree, nil
    86  		} else if iat, ok := rt.UnderlyingTable().(sql.IndexAddressableTable); ok {
    87  			indexes, err := iat.GetIndexes(ctx)
    88  			if err != nil {
    89  				return n, transform.SameTree, err
    90  			}
    91  			ita, _, filters, err := getCostedIndexScan(ctx, a.Catalog, rt, indexes, expression.SplitConjunction(filter.Expression))
    92  			if err != nil || ita == nil {
    93  				return n, transform.SameTree, err
    94  			}
    95  			var ret sql.Node = ita
    96  			if aliasName != "" {
    97  				ret = plan.NewTableAlias(aliasName, ret)
    98  			}
    99  			// excluded from tree + not included in index scan => filter above scan
   100  			if len(filters) > 0 {
   101  				ret = plan.NewFilter(expression.JoinAnd(filters...), ret)
   102  			}
   103  			return ret, transform.NewTree, nil
   104  		}
   105  		return n, transform.SameTree, nil
   106  	})
   107  }
   108  
   109  func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.TableNode, indexes []sql.Index, filters []sql.Expression) (*plan.IndexedTableAccess, sql.Statistic, []sql.Expression, error) {
   110  	statistics, err := statsProv.GetTableStats(ctx, strings.ToLower(rt.Database().Name()), strings.ToLower(rt.Name()))
   111  	if err != nil {
   112  		return nil, nil, nil, err
   113  	}
   114  
   115  	qualToStat := make(map[sql.StatQualifier]sql.Statistic)
   116  	for _, stat := range statistics {
   117  		if prev, ok := qualToStat[stat.Qualifier()]; !ok || ok && len(stat.Columns()) > len(prev.Columns()) {
   118  			qualToStat[stat.Qualifier()] = stat
   119  		}
   120  	}
   121  
   122  	// flatten expression tree for costing
   123  	c := newIndexCoster(ctx, rt.Name())
   124  	root, leftover, imprecise := c.flatten(expression.JoinAnd(filters...))
   125  	if root == nil {
   126  		return nil, nil, nil, err
   127  	}
   128  
   129  	iat, ok := rt.UnderlyingTable().(sql.IndexAddressableTable)
   130  	if !ok {
   131  		return nil, nil, nil, err
   132  	}
   133  
   134  	// run each index through coster, save the cheapest
   135  	var dbName string
   136  	if dbTab, ok := rt.UnderlyingTable().(sql.Databaseable); ok {
   137  		dbName = strings.ToLower(dbTab.Database())
   138  	}
   139  	tableName := strings.ToLower(rt.UnderlyingTable().Name())
   140  
   141  	if len(qualToStat) > 0 {
   142  		// don't mix and match real and default stats
   143  		for _, idx := range indexes {
   144  			qual := sql.NewStatQualifier(dbName, tableName, strings.ToLower(idx.ID()))
   145  			_, ok := qualToStat[qual]
   146  			if !ok {
   147  				qualToStat = nil
   148  				break
   149  			}
   150  		}
   151  	}
   152  
   153  	for _, idx := range indexes {
   154  		qual := sql.NewStatQualifier(dbName, tableName, strings.ToLower(idx.ID()))
   155  		stat, ok := qualToStat[qual]
   156  		if !ok {
   157  			stat, err = uniformDistStatisticsForIndex(ctx, statsProv, iat, idx)
   158  		}
   159  		err := c.cost(root, stat, idx)
   160  		if err != nil {
   161  			return nil, nil, nil, err
   162  		}
   163  	}
   164  
   165  	if c.bestStat == nil || c.bestFilters.Empty() {
   166  		return nil, nil, nil, err
   167  	}
   168  
   169  	targetId := c.bestStat.Qualifier().Index()
   170  	var idx sql.Index
   171  	for _, i := range indexes {
   172  		if strings.EqualFold(i.ID(), targetId) {
   173  			idx = i
   174  			break
   175  		}
   176  	}
   177  	if idx == nil {
   178  		return nil, nil, nil, fmt.Errorf("tried building indexScan with unknown statistic index: %s", targetId)
   179  	}
   180  
   181  	// separate |include| and |leftover| filters
   182  	b := newIndexScanRangeBuilder(ctx, idx, c.bestFilters, imprecise, c.idToExpr)
   183  	if leftover != nil {
   184  		b.leftover = append(b.leftover, leftover)
   185  	}
   186  	ranges, err := b.buildRangeCollection(root)
   187  	if err != nil {
   188  		return nil, nil, nil, err
   189  	}
   190  
   191  	var emptyLookup bool
   192  	if len(ranges) == 0 {
   193  		emptyLookup = true
   194  	} else if len(ranges) == 1 {
   195  		emptyLookup, err = ranges[0].IsEmpty()
   196  		if err != nil {
   197  			return nil, nil, nil, err
   198  		}
   199  		allRange := true
   200  		for i, r := range ranges[0] {
   201  			_, uok := r.UpperBound.(sql.AboveAll)
   202  			_, lok := r.LowerBound.(sql.BelowNull)
   203  			allRange = allRange && uok && lok
   204  			if i == 0 && allRange {
   205  				// no prefix restriction
   206  				return nil, nil, nil, err
   207  			}
   208  		}
   209  		if allRange {
   210  			return nil, nil, nil, err
   211  		}
   212  	}
   213  
   214  	if !idx.CanSupport(ranges...) {
   215  		return nil, nil, nil, err
   216  	}
   217  
   218  	if idx.IsSpatial() && len(ranges) > 1 {
   219  		// spatials don't support disjunct ranges
   220  		return nil, nil, nil, err
   221  	}
   222  
   223  	// create ranges, lookup, ITA for best indexScan
   224  	// TODO: use FALSE filters to replace empty tables
   225  	lookup := sql.NewIndexLookup(idx, ranges, false, emptyLookup, idx.IsSpatial(), false)
   226  
   227  	var ret *plan.IndexedTableAccess
   228  	if idx.IsFullText() {
   229  		id, _ := c.bestFilters.Next(1)
   230  		ma := c.idToExpr[indexScanId(id)]
   231  		matchAgainst, ok := ma.(*expression.MatchAgainst)
   232  		if !ok {
   233  			return nil, nil, nil, fmt.Errorf("Full-Text index found in filter with unknown expression: %T", ma)
   234  		}
   235  		if matchAgainst.KeyCols.Type == fulltext.KeyType_None {
   236  			return nil, nil, nil, err
   237  		}
   238  		ret = plan.NewStaticIndexedAccessForFullTextTable(rt, lookup, &rowexec.FulltextFilterTable{
   239  			MatchAgainst: matchAgainst,
   240  			Table:        rt,
   241  		})
   242  	} else {
   243  		ret, err = plan.NewStaticIndexedAccessForTableNode(rt, lookup)
   244  		if err != nil {
   245  			return nil, nil, nil, err
   246  		}
   247  	}
   248  
   249  	var retFilters []sql.Expression
   250  	if !iat.PreciseMatch() {
   251  		// cannot drop any filters
   252  		retFilters = filters
   253  	} else if len(b.leftover) > 0 {
   254  		// excluded from tree + not included in index scan => filter above scan
   255  		retFilters = b.leftover
   256  	}
   257  
   258  	return ret, c.bestStat, retFilters, nil
   259  }
   260  
   261  func addIndexScans(m *memo.Memo) error {
   262  	return memo.DfsRel(m.Root(), func(e memo.RelExpr) error {
   263  		filter, ok := e.(*memo.Filter)
   264  		if !ok {
   265  			return nil
   266  		}
   267  
   268  		var rt sql.TableNode
   269  		var aliasName string
   270  		switch n := filter.Child.First.(type) {
   271  		case *memo.TableScan:
   272  			rt = n.Table.(sql.TableNode)
   273  		case *memo.TableAlias:
   274  			rt, ok = n.Table.Child.(sql.TableNode)
   275  			if !ok {
   276  				return nil
   277  			}
   278  			aliasName = n.Name()
   279  		default:
   280  			return nil
   281  		}
   282  
   283  		indexes := filter.Child.First.(memo.SourceRel).Indexes()
   284  
   285  		if is, ok := rt.UnderlyingTable().(sql.IndexSearchableTable); ok && is.SkipIndexCosting() {
   286  			lookup, err := is.LookupForExpressions(m.Ctx, filter.Filters)
   287  			if err != nil {
   288  				m.HandleErr(err)
   289  			}
   290  			if lookup.IsEmpty() {
   291  				return nil
   292  			}
   293  			ret, err := plan.NewStaticIndexedAccessForTableNode(rt, lookup)
   294  			if err != nil {
   295  				m.HandleErr(err)
   296  
   297  			}
   298  			// TODO add ITA to filter group
   299  			// todo memoize ITA
   300  			// we explicitly put ITA as child of filter group for this shortcut
   301  			var idx *memo.Index
   302  			for _, i := range indexes {
   303  				if i.SqlIdx().ID() == lookup.Index.ID() {
   304  					idx = i
   305  					break
   306  				}
   307  			}
   308  			itaGroup := m.MemoizeIndexScan(nil, ret, aliasName, idx, nil)
   309  			m.MemoizeFilter(filter.Group(), itaGroup, filter.Filters)
   310  		} else {
   311  			sqlIndexes := make([]sql.Index, len(indexes))
   312  			for i, idx := range indexes {
   313  				sqlIndexes[i] = idx.SqlIdx()
   314  			}
   315  			ita, stat, filters, err := getCostedIndexScan(m.Ctx, m.StatsProvider(), rt, sqlIndexes, filter.Filters)
   316  			if err != nil {
   317  				m.HandleErr(err)
   318  			}
   319  			if ita != nil {
   320  				var idx *memo.Index
   321  				for _, i := range indexes {
   322  					if ita.Index().ID() == i.SqlIdx().ID() {
   323  						idx = i
   324  						break
   325  					}
   326  				}
   327  				var itaGrp *memo.ExprGroup
   328  				if len(filters) > 0 {
   329  					// set the indexed path as best. correct for cases where
   330  					// indexScan is incompatible with best join operator
   331  					itaGrp = m.MemoizeIndexScan(nil, ita, aliasName, idx, stat)
   332  					itaGrp.Best = itaGrp.First
   333  					itaGrp.Done = true
   334  					itaGrp.HintOk = true
   335  					itaGrp.Best.SetDistinct(memo.NoDistinctOp)
   336  					fGrp := m.MemoizeFilter(filter.Group(), itaGrp, filters)
   337  					fGrp.Best = fGrp.First
   338  					fGrp.Done = true
   339  					fGrp.HintOk = true
   340  					fGrp.Best.SetDistinct(memo.NoDistinctOp)
   341  				} else {
   342  					itaGrp = m.MemoizeIndexScan(filter.Group(), ita, aliasName, idx, stat)
   343  				}
   344  			}
   345  		}
   346  		return nil
   347  	})
   348  }
   349  
   350  func newIndexCoster(ctx *sql.Context, underlyingName string) *indexCoster {
   351  	return &indexCoster{
   352  		ctx:            ctx,
   353  		i:              1,
   354  		idToExpr:       make(map[indexScanId]sql.Expression),
   355  		underlyingName: underlyingName,
   356  	}
   357  }
   358  
   359  type indexCoster struct {
   360  	ctx *sql.Context
   361  	i   indexScanId
   362  	// idToExpr is a record of conj decomposition so we can remove duplicates later
   363  	idToExpr map[indexScanId]sql.Expression
   364  	// bestStat is the lowest cardinality indexScan option
   365  	bestStat sql.Statistic
   366  	// bestFilters is the set of conjunctions used to create bestStat
   367  	bestFilters sql.FastIntSet
   368  	// bestConstant are the constant best filters
   369  	bestConstant sql.FastIntSet
   370  	// prefix key of the best indexScan
   371  	bestPrefix     int
   372  	underlyingName string
   373  }
   374  
   375  // cost tries to build the lowest cardinality index scan for an expression
   376  // tree rooted at |f| on the index |idx| whose statistics are represented by |stat|.
   377  func (c *indexCoster) cost(f indexFilter, stat sql.Statistic, idx sql.Index) error {
   378  	ordinals := ordinalsForStat(stat)
   379  
   380  	newStat := stat
   381  	var filters sql.FastIntSet
   382  	var prefix int
   383  	var err error
   384  	var ok bool
   385  
   386  	switch f := f.(type) {
   387  	case *iScanAnd:
   388  		newStat, filters, prefix, err = c.costIndexScanAnd(f, stat, ordinals, idx)
   389  		if err != nil {
   390  			return err
   391  		}
   392  
   393  	case *iScanOr:
   394  		newStat, ok, err = c.costIndexScanOr(f, stat, ordinals, idx)
   395  		if err != nil {
   396  			return err
   397  		}
   398  		if ok {
   399  			filters.Add(int(f.id))
   400  		}
   401  	case *iScanLeaf:
   402  		newStat, ok, prefix, err = c.costIndexScanLeaf(f, stat, ordinals, idx)
   403  		if err != nil {
   404  			return err
   405  		}
   406  		if ok {
   407  			filters.Add(int(f.id))
   408  		}
   409  	default:
   410  		panic("unreachable")
   411  	}
   412  
   413  	c.updateBest(newStat, filters, prefix)
   414  	return nil
   415  }
   416  
   417  func (c *indexCoster) updateBest(s sql.Statistic, filters sql.FastIntSet, prefix int) {
   418  	if s == nil || filters.Len() == 0 {
   419  		return
   420  	}
   421  
   422  	var update bool
   423  	defer func() {
   424  		if update {
   425  			c.bestStat = s
   426  			c.bestFilters = filters
   427  			c.bestPrefix = prefix
   428  		}
   429  	}()
   430  
   431  	if c.bestStat == nil || s.RowCount() < c.bestStat.RowCount() {
   432  		update = true
   433  		return
   434  	} else if c.bestStat.FuncDeps().HasMax1Row() {
   435  		return
   436  	} else if c.bestPrefix == 0 || prefix == 0 && c.bestPrefix != prefix {
   437  		// any prefix is better than no prefix
   438  		update = prefix > c.bestPrefix
   439  		return
   440  	} else if s.RowCount() == c.bestStat.RowCount() {
   441  		// hand rules when stats don't exist or match exactly
   442  		cmp := s.FuncDeps()
   443  		best := c.bestStat.FuncDeps()
   444  		if cmp.HasMax1Row() {
   445  			update = true
   446  			return
   447  		}
   448  
   449  		bestKey, bok := best.StrictKey()
   450  		cmpKey, cok := cmp.StrictKey()
   451  		if cok && !bok {
   452  			// prefer unique key
   453  			update = true
   454  			return
   455  		} else if bok && !cok {
   456  			// prefer unique key
   457  			return
   458  		} else if cok && bok {
   459  			// prefer shorter strict key
   460  			if cmpKey.Len() < bestKey.Len() {
   461  				update = true
   462  				return
   463  			}
   464  		}
   465  
   466  		// the one below is sketchy, this is why we need costing
   467  		// prefer unique key even if non-unique has more constants
   468  		_, bestHasLax := best.LaxKey()
   469  		_, cmpHasLax := cmp.LaxKey()
   470  		if cmp.Constants().Len() > best.Constants().Len() {
   471  			if bestHasLax && !cmpHasLax {
   472  				// keep unique key
   473  				return
   474  			}
   475  			update = true
   476  			return
   477  		} else if cmp.Constants().Len() < best.Constants().Len() {
   478  			if cmpHasLax && !bestHasLax {
   479  				// keep unique key
   480  				update = true
   481  			}
   482  			return
   483  		}
   484  
   485  		if filters.Len() > c.bestFilters.Len() {
   486  			update = true
   487  			return
   488  		}
   489  
   490  		if s.ColSet().Len()-filters.Len() < c.bestStat.ColSet().Len()-c.bestFilters.Len() {
   491  			// prefer 1 range filter over 1 column index (1 - 1 = 0)
   492  			// vs. 1 range filter over 2 column index (2 - 1 = 1)
   493  			update = true
   494  			return
   495  		}
   496  
   497  		{
   498  			// if no unique keys, prefer equality over ranges
   499  			bestConst, bestIsNull := c.getConstAndNullFilters(c.bestFilters)
   500  			cmpConst, cmpIsNull := c.getConstAndNullFilters(c.bestFilters)
   501  			if cmpConst.Len() > bestConst.Len() {
   502  				update = true
   503  				return
   504  			}
   505  			if cmpIsNull.Len() > bestIsNull.Len() {
   506  				update = true
   507  				return
   508  			}
   509  		}
   510  
   511  		{
   512  			if strings.EqualFold(s.Qualifier().Index(), "primary") {
   513  				update = true
   514  				return
   515  			} else if strings.EqualFold(c.bestStat.Qualifier().Index(), "primary") {
   516  				return
   517  			}
   518  			if strings.Compare(s.Qualifier().Index(), c.bestStat.Qualifier().Index()) < 0 {
   519  				// if they are still equal, use index name to make deterministic
   520  				update = true
   521  				return
   522  			}
   523  		}
   524  	}
   525  }
   526  
   527  func (c *indexCoster) getConstAndNullFilters(filters sql.FastIntSet) (sql.FastIntSet, sql.FastIntSet) {
   528  	var isConst sql.FastIntSet
   529  	var isNull sql.FastIntSet
   530  	for i, hasNext := filters.Next(0); hasNext; i, hasNext = filters.Next(i + 1) {
   531  		e := c.idToExpr[indexScanId(i)]
   532  		switch e.(type) {
   533  		case *expression.Equals:
   534  			isConst.Add(i)
   535  		case *expression.IsNull:
   536  			isNull.Add(i)
   537  		case *expression.NullSafeEquals:
   538  			isConst.Add(i)
   539  			isNull.Add(i)
   540  		}
   541  	}
   542  	return isConst, isNull
   543  }
   544  
   545  // flatten converts a filter into a tree of indexFilter, a format designed
   546  // to make costing index scans easier. We return the root of the new tree
   547  // and a conjunction of filters that cannot be pushed into index scans.
   548  func (c *indexCoster) flatten(e sql.Expression) (indexFilter, sql.Expression, sql.FastIntSet) {
   549  	switch e := e.(type) {
   550  	case *expression.And:
   551  		c.idToExpr[c.i] = e
   552  		newAnd := &iScanAnd{id: c.i}
   553  		c.i++
   554  		invalid, imprecise := c.flattenAnd(e, newAnd)
   555  		var leftovers []sql.Expression
   556  		for i, hasMore := invalid.Next(1); hasMore; i, hasMore = invalid.Next(i + 1) {
   557  			f, ok := c.idToExpr[indexScanId(i)]
   558  			if !ok {
   559  				panic("todo filter map not working")
   560  			}
   561  			leftovers = append(leftovers, f)
   562  		}
   563  		return newAnd, expression.JoinAnd(leftovers...), imprecise
   564  
   565  	case *expression.Or:
   566  		c.idToExpr[c.i] = e
   567  		newOr := &iScanOr{id: c.i}
   568  		c.i++
   569  		valid, imp := c.flattenOr(e, newOr)
   570  		if !valid {
   571  			return nil, e, sql.FastIntSet{}
   572  		}
   573  		var imprecise sql.FastIntSet
   574  		if imp {
   575  			imprecise.Add(int(newOr.id))
   576  		}
   577  		return newOr, nil, imprecise
   578  
   579  	default:
   580  		c.idToExpr[c.i] = e
   581  		leaf, ok := newLeaf(c.ctx, c.i, e, c.underlyingName)
   582  		c.i++
   583  		if !ok {
   584  			return nil, e, sql.FastIntSet{}
   585  		}
   586  		var imprecise sql.FastIntSet
   587  		if !expression.PreciseComparison(e) {
   588  			imprecise.Add(int(leaf.id))
   589  		}
   590  		return leaf, nil, imprecise
   591  	}
   592  }
   593  
   594  // flattenAnd return two bitsets to indicate invalid index filter ids, and imprecise filter ids
   595  func (c *indexCoster) flattenAnd(e *expression.And, and *iScanAnd) (sql.FastIntSet, sql.FastIntSet) {
   596  	var invalid sql.FastIntSet
   597  	var imprecise sql.FastIntSet
   598  	for _, e := range e.Children() {
   599  		switch e := e.(type) {
   600  		case *expression.And:
   601  			c.idToExpr[c.i] = e
   602  			c.i++
   603  			inv, imp := c.flattenAnd(e, and)
   604  			invalid = invalid.Union(inv)
   605  			imprecise = invalid.Union(imp)
   606  		case *expression.Or:
   607  			c.idToExpr[c.i] = e
   608  			newOr := &iScanOr{id: c.i}
   609  			c.i++
   610  			valid, imp := c.flattenOr(e, newOr)
   611  			if !valid {
   612  				// this or is invalid
   613  				invalid.Add(int(newOr.Id()))
   614  			} else {
   615  				and.orChildren = append(and.orChildren, newOr)
   616  				if imp {
   617  					imprecise.Add(int(newOr.id))
   618  				}
   619  			}
   620  		default:
   621  			c.idToExpr[c.i] = e
   622  			leaf, ok := newLeaf(c.ctx, c.i, e, c.underlyingName)
   623  			if !ok {
   624  				invalid.Add(int(c.i))
   625  			} else {
   626  				and.newLeaf(leaf)
   627  				if !expression.PreciseComparison(e) {
   628  					imprecise.Add(int(leaf.id))
   629  				}
   630  			}
   631  			// keep a ref to the invalid |e|
   632  			c.i++
   633  		}
   634  	}
   635  	return invalid, imprecise
   636  }
   637  
   638  func (c *indexCoster) flattenOr(e *expression.Or, or *iScanOr) (bool, bool) {
   639  	var imprecise bool
   640  	for _, e := range e.Children() {
   641  		switch e := e.(type) {
   642  		case *expression.And:
   643  			c.idToExpr[c.i] = e
   644  			newAnd := &iScanAnd{id: c.i}
   645  			c.i++
   646  			inv, imp := c.flattenAnd(e, newAnd)
   647  			if !inv.Empty() {
   648  				return false, false
   649  			}
   650  			or.children = append(or.children, newAnd)
   651  			imprecise = imprecise || !imp.Empty()
   652  		case *expression.Or:
   653  			c.idToExpr[c.i] = e
   654  			c.i++
   655  			ok, imp := c.flattenOr(e, or)
   656  			if !ok {
   657  				return false, false
   658  			}
   659  			imprecise = imprecise || imp
   660  		default:
   661  			c.idToExpr[c.i] = e
   662  			leaf, ok := newLeaf(c.ctx, c.i, e, c.underlyingName)
   663  			if !ok {
   664  				return false, false
   665  			} else {
   666  				c.i++
   667  				or.children = append(or.children, leaf)
   668  				if !expression.PreciseComparison(e) {
   669  					imprecise = true
   670  				}
   671  			}
   672  		}
   673  	}
   674  	return true, imprecise
   675  }
   676  
   677  func newIndexScanRangeBuilder(ctx *sql.Context, idx sql.Index, include, imprecise sql.FastIntSet, idToExpr map[indexScanId]sql.Expression) *indexScanRangeBuilder {
   678  	return &indexScanRangeBuilder{
   679  		ctx:       ctx,
   680  		idx:       idx,
   681  		include:   include,
   682  		imprecise: imprecise,
   683  		idToExpr:  idToExpr,
   684  	}
   685  }
   686  
   687  type indexScanRangeBuilder struct {
   688  	ctx       *sql.Context
   689  	idx       sql.Index
   690  	include   sql.FastIntSet
   691  	imprecise sql.FastIntSet
   692  	idToExpr  map[indexScanId]sql.Expression
   693  	conjIb    *sql.IndexBuilder
   694  	allRanges sql.RangeCollection
   695  	leftover  []sql.Expression
   696  	tableName string
   697  }
   698  
   699  // buildRangeCollection converts our representation of the best index scan
   700  // into the format that represents an index lookup, a list of sql.Range.
   701  func (b *indexScanRangeBuilder) buildRangeCollection(f indexFilter) (sql.RangeCollection, error) {
   702  	inScan := b.include.Contains(int(f.Id()))
   703  
   704  	var ranges sql.RangeCollection
   705  	var err error
   706  	switch f := f.(type) {
   707  	case *iScanAnd:
   708  		ranges, err = b.rangeBuildAnd(f, inScan)
   709  	case *iScanOr:
   710  		ranges, err = b.rangeBuildOr(f, inScan)
   711  	case *iScanLeaf:
   712  		ranges, err = b.rangeBuildLeaf(f, inScan)
   713  	default:
   714  		return nil, fmt.Errorf("unknown indexFilter type: %T", f)
   715  	}
   716  
   717  	if err != nil {
   718  		return nil, err
   719  	}
   720  	return sql.RemoveOverlappingRanges(ranges...)
   721  }
   722  
   723  func (b *indexScanRangeBuilder) Ranges() (sql.RangeCollection, error) {
   724  	return sql.RemoveOverlappingRanges(b.allRanges...)
   725  }
   726  
   727  func (b *indexScanRangeBuilder) rangeBuildAnd(f *iScanAnd, inScan bool) (sql.RangeCollection, error) {
   728  	// no leftover check for AND, it's children may be included in scan
   729  	inScan = inScan || b.include.Contains(int(f.Id()))
   730  
   731  	var ret sql.RangeCollection
   732  	for _, or := range f.orChildren {
   733  		// separate range builder for each, before UNIONing
   734  		ranges, err := b.rangeBuildOr(or.(*iScanOr), inScan)
   735  		if err != nil {
   736  			return nil, err
   737  		}
   738  		if ranges == nil {
   739  			continue
   740  		}
   741  		if ret == nil {
   742  			ret = ranges
   743  			continue
   744  		}
   745  		ret, err = ret.Intersect(ranges)
   746  		if err != nil {
   747  			return nil, err
   748  		}
   749  	}
   750  
   751  	partBuilder := sql.NewIndexBuilder(b.idx)
   752  	for _, leaf := range f.leaves() {
   753  		switch leaf.Op() {
   754  		case indexScanOpSpatialEq:
   755  			ranges, err := b.rangeBuildSpatialLeaf(leaf, inScan)
   756  			if err != nil {
   757  				return nil, err
   758  			}
   759  			if ranges != nil {
   760  				ret, err = ret.Intersect(partBuilder.Ranges(b.ctx))
   761  				if err != nil {
   762  					return nil, err
   763  				}
   764  			}
   765  		case indexScanOpFulltextEq:
   766  			ranges, err := b.rangeBuildFulltextLeaf(leaf, inScan)
   767  			if err != nil {
   768  				return nil, err
   769  			}
   770  			if ranges != nil {
   771  				ret, err = ret.Intersect(partBuilder.Ranges(b.ctx))
   772  				if err != nil {
   773  					return nil, err
   774  				}
   775  			}
   776  		default:
   777  			b.rangeBuildDefaultLeaf(partBuilder, leaf, inScan)
   778  		}
   779  	}
   780  
   781  	if _, err := partBuilder.Build(b.ctx); err != nil {
   782  		return nil, err
   783  	}
   784  
   785  	if ret == nil {
   786  		return partBuilder.Ranges(b.ctx), nil
   787  	}
   788  
   789  	ret, err := ret.Intersect(partBuilder.Ranges(b.ctx))
   790  	if err != nil {
   791  		return nil, err
   792  	}
   793  
   794  	return ret, nil
   795  }
   796  
   797  func (b *indexScanRangeBuilder) rangeBuildOr(f *iScanOr, inScan bool) (sql.RangeCollection, error) {
   798  	inScan = !b.markLeftover(f, inScan)
   799  	if !inScan {
   800  		return nil, nil
   801  	}
   802  
   803  	// imprecise filters cannot be removed
   804  	b.markImprecise(f)
   805  
   806  	//todo union the or ranges
   807  	var ret sql.RangeCollection
   808  	for _, c := range f.children {
   809  		var ranges sql.RangeCollection
   810  		var err error
   811  		switch c := c.(type) {
   812  		case *iScanAnd:
   813  			ranges, err = b.rangeBuildAnd(c, inScan)
   814  		case *iScanLeaf:
   815  			ranges, err = b.rangeBuildLeaf(c, inScan)
   816  		default:
   817  			return nil, fmt.Errorf("invalid *iScanOr child: %T", c)
   818  		}
   819  		if err != nil {
   820  			return nil, err
   821  		}
   822  		ret = append(ret, ranges...)
   823  	}
   824  	return ret, nil
   825  }
   826  
   827  func (b *indexScanRangeBuilder) rangeBuildSpatialLeaf(f *iScanLeaf, inScan bool) (sql.RangeCollection, error) {
   828  	inScan = !b.markLeftover(f, inScan)
   829  	if inScan {
   830  		// always mark leftover
   831  		b.leftover = append(b.leftover, b.idToExpr[f.Id()])
   832  	} else {
   833  		return nil, nil
   834  	}
   835  
   836  	g, ok := f.litValue.(types.GeometryValue)
   837  	if !ok {
   838  		return nil, sql.ErrInvalidGISData.New()
   839  	}
   840  	minX, minY, maxX, maxY := g.BBox()
   841  	lower := types.Point{X: minX, Y: minY}
   842  	upper := types.Point{X: maxX, Y: maxY}
   843  
   844  	return sql.RangeCollection{{{
   845  		LowerBound: sql.Below{Key: lower},
   846  		UpperBound: sql.Above{Key: upper},
   847  		Typ:        f.gf.Type(),
   848  	}}}, nil
   849  }
   850  
   851  func (b *indexScanRangeBuilder) rangeBuildFulltextLeaf(f *iScanLeaf, inScan bool) (sql.RangeCollection, error) {
   852  	// fulltext leaf doesn't use ranges
   853  	inScan = !b.markLeftover(f, inScan)
   854  	if inScan {
   855  		// always mark leftover
   856  		b.leftover = append(b.leftover, b.idToExpr[f.Id()])
   857  	} else {
   858  		return nil, nil
   859  	}
   860  	return sql.RangeCollection{{sql.EmptyRangeColumnExpr(f.gf.Type())}}, nil
   861  }
   862  
   863  func (b *indexScanRangeBuilder) rangeBuildLeaf(f *iScanLeaf, inScan bool) (sql.RangeCollection, error) {
   864  	switch f.Op() {
   865  	case indexScanOpSpatialEq:
   866  		return b.rangeBuildSpatialLeaf(f, inScan)
   867  	case indexScanOpFulltextEq:
   868  		return b.rangeBuildFulltextLeaf(f, inScan)
   869  	default:
   870  		bb := sql.NewIndexBuilder(b.idx)
   871  		b.rangeBuildDefaultLeaf(bb, f, inScan)
   872  		if _, err := bb.Build(b.ctx); err != nil {
   873  			return nil, err
   874  		}
   875  		return bb.Ranges(b.ctx), nil
   876  	}
   877  }
   878  
   879  func (b *indexScanRangeBuilder) rangeBuildDefaultLeaf(bb *sql.IndexBuilder, f *iScanLeaf, inScan bool) {
   880  	inScan = !b.markLeftover(f, inScan)
   881  	if !inScan {
   882  		return
   883  	}
   884  
   885  	b.markImprecise(f)
   886  
   887  	name := f.normString()
   888  	switch f.Op() {
   889  	case indexScanOpEq:
   890  		bb.Equals(b.ctx, name, f.litValue)
   891  	case indexScanOpNotEq:
   892  		bb.NotEquals(b.ctx, name, f.litValue)
   893  	case indexScanOpInSet:
   894  		bb.Equals(b.ctx, name, f.setValues...)
   895  	case indexScanOpNotInSet:
   896  		for _, v := range f.setValues {
   897  			bb.NotEquals(b.ctx, name, v)
   898  		}
   899  	case indexScanOpGt:
   900  		bb.GreaterThan(b.ctx, name, f.litValue)
   901  	case indexScanOpGte:
   902  		bb.GreaterOrEqual(b.ctx, name, f.litValue)
   903  	case indexScanOpLt:
   904  		bb.LessThan(b.ctx, name, f.litValue)
   905  	case indexScanOpLte:
   906  		bb.LessOrEqual(b.ctx, name, f.litValue)
   907  	case indexScanOpIsNotNull:
   908  		bb.IsNotNull(b.ctx, name)
   909  	case indexScanOpIsNull:
   910  		bb.IsNull(b.ctx, name)
   911  	case indexScanOpNullSafeEq:
   912  		if f.litValue == nil {
   913  			bb.IsNull(b.ctx, name)
   914  		} else {
   915  			bb.Equals(b.ctx, name, f.litValue)
   916  		}
   917  	default:
   918  		panic(fmt.Sprintf("unknown indexScanOp: %d", f.Op()))
   919  	}
   920  }
   921  
   922  // markLeftover is used to check if leaf nodes and OR filters are left out
   923  // of the index lookup. We omit this check for AND filters because a portion
   924  // of their children can contribute to the scan.
   925  func (b *indexScanRangeBuilder) markLeftover(f indexFilter, inScan bool) bool {
   926  	if !inScan && !b.include.Contains(int(f.Id())) {
   927  		b.leftover = append(b.leftover, b.idToExpr[f.Id()])
   928  		return true
   929  	}
   930  	return false
   931  }
   932  
   933  func (b *indexScanRangeBuilder) markImprecise(f indexFilter) {
   934  	if b.imprecise.Contains(int(f.Id())) {
   935  		b.leftover = append(b.leftover, b.idToExpr[f.Id()])
   936  	}
   937  }
   938  
   939  // indexFilter decomposes filter conjunction into a format
   940  // amenable for checking index prefix alignment
   941  type indexFilter interface {
   942  	Op() indexScanOp
   943  	Id() indexScanId
   944  }
   945  
   946  type iScanLeaf struct {
   947  	op            indexScanOp
   948  	id            indexScanId
   949  	gf            *expression.GetField
   950  	underlying    string
   951  	litValue      interface{}
   952  	setValues     []interface{}
   953  	fulltextIndex string
   954  }
   955  
   956  func (l *iScanLeaf) normString() string {
   957  	if l.underlying != "" {
   958  		return fmt.Sprintf("%s.%s", strings.ToLower(l.underlying), strings.ToLower(l.gf.Name()))
   959  	}
   960  	return strings.ToLower(l.gf.String())
   961  }
   962  
   963  func (l *iScanLeaf) Id() indexScanId {
   964  	return l.id
   965  }
   966  
   967  func (l *iScanLeaf) Op() indexScanOp {
   968  	return l.op
   969  }
   970  
   971  type iScanOr struct {
   972  	id       indexScanId
   973  	children []indexFilter
   974  }
   975  
   976  func (o *iScanOr) Id() indexScanId {
   977  	return o.id
   978  }
   979  
   980  func (o *iScanOr) Op() indexScanOp {
   981  	return indexScanOpOr
   982  }
   983  
   984  func newIScanAnd(id indexScanId) *iScanAnd {
   985  	return &iScanAnd{
   986  		id: id,
   987  	}
   988  }
   989  
   990  type iScanAnd struct {
   991  	id           indexScanId
   992  	leafChildren map[string][]*iScanLeaf
   993  	orChildren   []indexFilter
   994  	cnt          int
   995  }
   996  
   997  func (a *iScanAnd) Op() indexScanOp {
   998  	return indexScanOpAnd
   999  }
  1000  
  1001  func (a *iScanAnd) Id() indexScanId {
  1002  	return a.id
  1003  }
  1004  
  1005  func (a *iScanAnd) newLeaf(l *iScanLeaf) {
  1006  	if a.leafChildren == nil {
  1007  		a.leafChildren = make(map[string][]*iScanLeaf)
  1008  	}
  1009  	a.leafChildren[strings.ToLower(l.gf.Name())] = append(a.leafChildren[strings.ToLower(l.gf.Name())], l)
  1010  }
  1011  
  1012  // leaves returns a list of this nodes leaf filters, sorted by id
  1013  func (a *iScanAnd) leaves() []*iScanLeaf {
  1014  	var ret []*iScanLeaf
  1015  	for _, colLeaves := range a.leafChildren {
  1016  		for _, leaf := range colLeaves {
  1017  			ret = append(ret, leaf)
  1018  		}
  1019  	}
  1020  	sort.SliceStable(ret, func(i, j int) bool {
  1021  		return ret[i].id < ret[j].id
  1022  	})
  1023  	return ret
  1024  }
  1025  
  1026  func (a *iScanAnd) childCnt() int {
  1027  	if a.cnt > 0 {
  1028  		return a.cnt
  1029  	}
  1030  	cnt := len(a.orChildren)
  1031  	for _, leaves := range a.leafChildren {
  1032  		cnt += len(leaves)
  1033  	}
  1034  	a.cnt = cnt
  1035  	return a.cnt
  1036  }
  1037  
  1038  func formatIndexFilter(f indexFilter) string {
  1039  	b := &strings.Builder{}
  1040  	formatIndexFilterRec(b, 0, f)
  1041  	return b.String()
  1042  }
  1043  
  1044  func formatIndexFilterRec(b *strings.Builder, nesting int, f indexFilter) {
  1045  	if f == nil {
  1046  		return
  1047  	}
  1048  	switch f := f.(type) {
  1049  	case *iScanAnd:
  1050  		for i := 0; i < nesting; i++ {
  1051  			b.WriteString("  ")
  1052  		}
  1053  		fmt.Fprintf(b, "(%d: and", f.Id())
  1054  		for _, leaf := range f.leaves() {
  1055  			fmt.Fprintf(b, "\n")
  1056  			formatIndexFilterRec(b, nesting+1, leaf)
  1057  		}
  1058  		for _, or := range f.orChildren {
  1059  			fmt.Fprintf(b, "\n")
  1060  			formatIndexFilterRec(b, nesting+1, or)
  1061  		}
  1062  
  1063  		fmt.Fprintf(b, ")")
  1064  
  1065  	case *iScanOr:
  1066  		for i := 0; i < nesting; i++ {
  1067  			b.WriteString("  ")
  1068  		}
  1069  		fmt.Fprintf(b, "(%d: or", f.Id())
  1070  
  1071  		for _, c := range f.children {
  1072  			fmt.Fprintf(b, "\n")
  1073  			formatIndexFilterRec(b, nesting+1, c)
  1074  		}
  1075  		fmt.Fprintf(b, ")")
  1076  
  1077  	case *iScanLeaf:
  1078  		for i := 0; i < nesting; i++ {
  1079  			b.WriteString("  ")
  1080  		}
  1081  		switch f.Op() {
  1082  		case indexScanOpIsNull, indexScanOpIsNotNull:
  1083  			fmt.Fprintf(b, "(%d: %s %s)", f.Id(), f.gf, f.Op())
  1084  		case indexScanOpInSet, indexScanOpNotInSet:
  1085  			var valStrs []string
  1086  			for _, v := range f.setValues {
  1087  				valStrs = append(valStrs, fmt.Sprintf("%v", v))
  1088  			}
  1089  			fmt.Fprintf(b, "(%d: %s %s (%s))", f.Id(), f.gf, f.Op(), strings.Join(valStrs, ", "))
  1090  		default:
  1091  			fmt.Fprintf(b, "(%d: %s %s %v)", f.Id(), f.gf, f.Op(), f.litValue)
  1092  		}
  1093  
  1094  	default:
  1095  		panic(fmt.Sprintf("unknown indexFilter type :%T", f))
  1096  	}
  1097  }
  1098  
  1099  type indexScanId uint16
  1100  
  1101  func ordinalsForStat(stat sql.Statistic) map[string]int {
  1102  	ret := make(map[string]int)
  1103  	for i, c := range stat.Columns() {
  1104  		ret[strings.ToLower(c)] = i
  1105  	}
  1106  	return ret
  1107  }
  1108  
  1109  // costIndexScanAnd applies (1) series of disjunctions and (2) a set of
  1110  // conjunctions to an index represented by a statistic. We return the
  1111  // updated statistic, the subset of applicable filters, the maximum prefix
  1112  // key created by a subset of equality filters (from conjunction only),
  1113  // or an error if applicable.
  1114  func (c *indexCoster) costIndexScanAnd(filter *iScanAnd, s sql.Statistic, ordinals map[string]int, idx sql.Index) (sql.Statistic, sql.FastIntSet, int, error) {
  1115  	// first step finds the conjunctions that match index prefix columns.
  1116  	// we divide into eqFilters and rangeFilters
  1117  
  1118  	ret := s
  1119  	var exact sql.FastIntSet
  1120  
  1121  	if len(filter.orChildren) > 0 {
  1122  		for _, or := range filter.orChildren {
  1123  			childStat, ok, err := c.costIndexScanOr(or.(*iScanOr), s, ordinals, idx)
  1124  			if err != nil {
  1125  				return nil, sql.FastIntSet{}, 0, err
  1126  			}
  1127  			// if valid, INTERSECT
  1128  			if ok {
  1129  				ret = stats.Intersect(ret, childStat)
  1130  				exact.Add(int(or.Id()))
  1131  			}
  1132  		}
  1133  	}
  1134  
  1135  	conj := newConjCollector(ret, ordinals)
  1136  	for _, c := range s.Columns() {
  1137  		if colFilters, ok := filter.leafChildren[c]; ok {
  1138  			for _, f := range colFilters {
  1139  				conj.add(f)
  1140  			}
  1141  		}
  1142  	}
  1143  
  1144  	if exact.Len()+conj.applied.Len() == filter.childCnt() {
  1145  		// matched all filters
  1146  		return conj.stat, sql.NewFastIntSet(int(filter.id)), conj.missingPrefix, nil
  1147  	}
  1148  
  1149  	return conj.stat, exact.Union(conj.applied), conj.missingPrefix, nil
  1150  }
  1151  
  1152  func (c *indexCoster) costIndexScanOr(filter *iScanOr, s sql.Statistic, ordinals map[string]int, idx sql.Index) (sql.Statistic, bool, error) {
  1153  	// OR just unions the statistics from each child?
  1154  	// if one of the children is invalid, we balk and return false
  1155  	// otherwise we union the buckets between the children
  1156  	ret := s
  1157  	for _, child := range filter.children {
  1158  		switch child := child.(type) {
  1159  		case *iScanAnd:
  1160  			childStat, ids, _, err := c.costIndexScanAnd(child, s, ordinals, idx)
  1161  			if err != nil {
  1162  				return nil, false, err
  1163  			}
  1164  			if ids.Len() != 1 || !ids.Contains(int(child.Id())) {
  1165  				// scan option missed some filters
  1166  				return nil, false, nil
  1167  			}
  1168  			ret = stats.Union(s, childStat)
  1169  
  1170  		case *iScanLeaf:
  1171  			var ok bool
  1172  			childStat, ok, _, err := c.costIndexScanLeaf(child, s, ordinals, idx)
  1173  			if err != nil {
  1174  				return nil, false, err
  1175  			}
  1176  			if !ok {
  1177  				return nil, false, nil
  1178  			}
  1179  			ret = stats.Union(s, childStat)
  1180  
  1181  		default:
  1182  			return nil, false, fmt.Errorf("invalid *iScanOr child: %T", child)
  1183  		}
  1184  	}
  1185  	return ret, true, nil
  1186  }
  1187  
  1188  // indexHasContentHashedFieldForFilter returns true if the given index |idx| has a content-hashed field that is used
  1189  // by the given filter |filter|. |ordinals| provides a mapping from filter expression to position in |idx|. Indexes
  1190  // with content-hashed fields can only be used for a subset of filter operations.
  1191  func indexHasContentHashedFieldForFilter(filter *iScanLeaf, idx sql.Index, ordinals map[string]int) bool {
  1192  	// Only unique indexes are currently able to use content-hashed fields
  1193  	if !idx.IsUnique() {
  1194  		return false
  1195  	}
  1196  
  1197  	i := ordinals[filter.gf.Name()]
  1198  	columnExpressionType := idx.ColumnExpressionTypes()[i]
  1199  
  1200  	// Only TEXT/BLOB types can currently use content-hashes in indexes
  1201  	if !types.IsTextBlob(columnExpressionType.Type) {
  1202  		return false
  1203  	}
  1204  
  1205  	prefixLength := uint16(0)
  1206  	if len(idx.PrefixLengths()) > i {
  1207  		prefixLength = idx.PrefixLengths()[i]
  1208  	}
  1209  	return prefixLength == 0
  1210  }
  1211  
  1212  // costIndexScanLeaf tries to apply a leaf filter to an index represented
  1213  // by a statistic, returning the updated statistic, whether the filter was
  1214  // applicable, and the maximum prefix key (0 or 1 for a leaf).
  1215  func (c *indexCoster) costIndexScanLeaf(filter *iScanLeaf, s sql.Statistic, ordinals map[string]int, idx sql.Index) (sql.Statistic, bool, int, error) {
  1216  	ord, ok := ordinals[strings.ToLower(filter.gf.Name())]
  1217  	if !ok {
  1218  		return nil, false, 0, nil
  1219  	}
  1220  
  1221  	// indexes with content-hashed fields can be used to test equality or compare with NULL,
  1222  	// but can't be used for other comparisons, such as less than or greater than.
  1223  	if indexHasContentHashedFieldForFilter(filter, idx, ordinals) {
  1224  		switch filter.op {
  1225  		case indexScanOpEq, indexScanOpNotEq, indexScanOpNullSafeEq, indexScanOpIsNull, indexScanOpIsNotNull:
  1226  		default:
  1227  			return nil, false, 0, nil
  1228  		}
  1229  	}
  1230  
  1231  	switch filter.op {
  1232  	case indexScanOpSpatialEq:
  1233  		stat, ok, err := c.costSpatial(filter, s, ord)
  1234  		return stat, ok, 0, err
  1235  	case indexScanOpFulltextEq:
  1236  		stat, ok, err := c.costFulltext(filter, s, ord)
  1237  		return stat, ok, 0, err
  1238  	default:
  1239  		conj := newConjCollector(s, ordinals)
  1240  		conj.add(filter)
  1241  		return conj.stat, true, conj.missingPrefix, nil
  1242  	}
  1243  }
  1244  
  1245  func (c *indexCoster) costSpatial(filter *iScanLeaf, s sql.Statistic, ordinal int) (sql.Statistic, bool, error) {
  1246  	return s, s.IndexClass() == sql.IndexClassSpatial && ordinal == 0 && filter.litValue != nil, nil
  1247  }
  1248  
  1249  func (c *indexCoster) costFulltext(filter *iScanLeaf, s sql.Statistic, ordinal int) (sql.Statistic, bool, error) {
  1250  	// check that the filter's index matches the fulltext index
  1251  	return s, s.IndexClass() == sql.IndexClassFulltext && s.Qualifier().Index() == filter.fulltextIndex, nil
  1252  }
  1253  
  1254  type indexScanOp uint8
  1255  
  1256  //go:generate stringer -type=indexScanOp -linecomment
  1257  
  1258  const (
  1259  	indexScanOpEq         indexScanOp = iota // =
  1260  	indexScanOpNullSafeEq                    // <=>
  1261  	indexScanOpInSet                         // =
  1262  	indexScanOpNotInSet                      // !=
  1263  	indexScanOpNotEq                         // !=
  1264  	indexScanOpGt                            // >
  1265  	indexScanOpGte                           // >=
  1266  	indexScanOpLt                            // <
  1267  	indexScanOpLte                           // <=
  1268  	indexScanOpAnd                           // &&
  1269  	indexScanOpOr                            // ||
  1270  	indexScanOpIsNull                        // IS NULL
  1271  	indexScanOpIsNotNull                     // IS NOT NULL
  1272  	indexScanOpSpatialEq                     // SpatialEq
  1273  	indexScanOpFulltextEq                    // FulltextEq
  1274  )
  1275  
  1276  // swap returns the identity op for swapping a comparison's LHS and RHS
  1277  func (o indexScanOp) swap() indexScanOp {
  1278  	switch o {
  1279  	case indexScanOpGt:
  1280  		return indexScanOpLt
  1281  	case indexScanOpGte:
  1282  		return indexScanOpLte
  1283  	case indexScanOpLt:
  1284  		return indexScanOpGt
  1285  	case indexScanOpLte:
  1286  		return indexScanOpGte
  1287  	default:
  1288  		return o
  1289  	}
  1290  }
  1291  
  1292  func newLeaf(ctx *sql.Context, id indexScanId, e sql.Expression, underlying string) (*iScanLeaf, bool) {
  1293  	var op indexScanOp
  1294  	var left sql.Expression
  1295  	var right sql.Expression
  1296  	switch e := e.(type) {
  1297  	case *expression.NullSafeEquals:
  1298  		op = indexScanOpNullSafeEq
  1299  		right = e.Right()
  1300  		left = e.Left()
  1301  	case *expression.Equals:
  1302  		op = indexScanOpEq
  1303  		right = e.Right()
  1304  		left = e.Left()
  1305  	case *expression.InTuple:
  1306  		op = indexScanOpInSet
  1307  		right = e.Right()
  1308  		left = e.Left()
  1309  	case *expression.HashInTuple:
  1310  		op = indexScanOpInSet
  1311  		right = e.Right()
  1312  		left = e.Left()
  1313  	case *expression.LessThan:
  1314  		left = e.Left()
  1315  		right = e.Right()
  1316  		op = indexScanOpLt
  1317  	case *expression.GreaterThanOrEqual:
  1318  		left = e.Left()
  1319  		right = e.Right()
  1320  		op = indexScanOpGte
  1321  	case *expression.GreaterThan:
  1322  		left = e.Left()
  1323  		right = e.Right()
  1324  		op = indexScanOpGt
  1325  	case *expression.LessThanOrEqual:
  1326  		left = e.Left()
  1327  		right = e.Right()
  1328  		op = indexScanOpLte
  1329  	case *expression.IsNull:
  1330  		left = e.Child
  1331  		op = indexScanOpIsNull
  1332  	case *expression.Not:
  1333  		switch e := e.Child.(type) {
  1334  		case *expression.IsNull:
  1335  			left = e.Child
  1336  			op = indexScanOpIsNotNull
  1337  		case *expression.Equals:
  1338  			left = e.Left()
  1339  			right = e.Right()
  1340  			op = indexScanOpNotEq
  1341  		case *expression.InTuple:
  1342  			op = indexScanOpNotInSet
  1343  			right = e.Right()
  1344  			left = e.Left()
  1345  		case *expression.HashInTuple:
  1346  			op = indexScanOpNotInSet
  1347  			right = e.Right()
  1348  			left = e.Left()
  1349  		default:
  1350  			return nil, false
  1351  		}
  1352  	case *spatial.Intersects, *spatial.Within, *spatial.STEquals:
  1353  		op = indexScanOpSpatialEq
  1354  		children := e.Children()
  1355  		left = children[0]
  1356  		right = children[1]
  1357  	case *expression.MatchAgainst:
  1358  		op = indexScanOpFulltextEq
  1359  		return &iScanLeaf{id: id, op: op, gf: e.Columns[0].(*expression.GetField), underlying: underlying, fulltextIndex: e.GetIndex().ID()}, true
  1360  	default:
  1361  		return nil, false
  1362  	}
  1363  
  1364  	if _, ok := left.(*expression.GetField); !ok {
  1365  		left, right = right, left
  1366  		op = op.swap()
  1367  	}
  1368  
  1369  	gf, ok := left.(*expression.GetField)
  1370  	if !ok {
  1371  		return nil, false
  1372  	}
  1373  
  1374  	if op == indexScanOpIsNull || op == indexScanOpIsNotNull {
  1375  		return &iScanLeaf{id: id, gf: gf, op: op, underlying: underlying}, true
  1376  	}
  1377  
  1378  	if !isEvaluable(right) {
  1379  		return nil, false
  1380  	}
  1381  
  1382  	if op == indexScanOpInSet || op == indexScanOpNotInSet {
  1383  		tup := right.(expression.Tuple)
  1384  		var litSet []interface{}
  1385  		for _, lit := range tup {
  1386  			value, err := lit.Eval(ctx, nil)
  1387  			if err != nil {
  1388  				return nil, false
  1389  			}
  1390  			litSet = append(litSet, value)
  1391  		}
  1392  		return &iScanLeaf{id: id, gf: gf, op: op, setValues: litSet, underlying: underlying}, true
  1393  	}
  1394  
  1395  	value, err := right.Eval(ctx, nil)
  1396  	if err != nil {
  1397  		return nil, false
  1398  	}
  1399  
  1400  	return &iScanLeaf{id: id, gf: gf, op: op, litValue: value, underlying: underlying}, true
  1401  }
  1402  
  1403  const dummyNotUniqueDistinct = .90
  1404  const dummyNotUniqueNull = .03
  1405  
  1406  func uniformDistStatisticsForIndex(ctx *sql.Context, statsProv sql.StatsProvider, iat sql.IndexAddressableTable, idx sql.Index) (sql.Statistic, error) {
  1407  	var rowCount uint64
  1408  	var avgSize uint64
  1409  
  1410  	rowCount, _ = statsProv.RowCount(ctx, idx.Database(), idx.Table())
  1411  
  1412  	if st, ok := iat.(sql.StatisticsTable); ok {
  1413  		rCnt, _, err := st.RowCount(ctx)
  1414  		if err != nil {
  1415  			return nil, err
  1416  		}
  1417  		if rowCount == 0 {
  1418  			rowCount = rCnt
  1419  		}
  1420  		if rowCount > 0 {
  1421  			dataSize, err := st.DataLength(ctx)
  1422  			if err != nil {
  1423  				return nil, err
  1424  			}
  1425  			avgSize = dataSize / rowCount
  1426  		}
  1427  	}
  1428  
  1429  	var dbName string
  1430  	if dbTable, ok := iat.(sql.Databaseable); ok {
  1431  		dbName = strings.ToLower(dbTable.Database())
  1432  	}
  1433  	tableName := strings.ToLower(iat.Name())
  1434  
  1435  	var sch sql.Schema
  1436  	if pkt, ok := iat.(sql.PrimaryKeyTable); ok {
  1437  		sch = pkt.PrimaryKeySchema().Schema
  1438  	} else {
  1439  		sch = iat.Schema()
  1440  	}
  1441  
  1442  	return newUniformDistStatistic(dbName, tableName, sch, idx, rowCount, avgSize)
  1443  }
  1444  
  1445  func indexFds(tableName string, sch sql.Schema, idx sql.Index) (*sql.FuncDepSet, sql.ColSet, error) {
  1446  	var idxCols sql.ColSet
  1447  	pref := fmt.Sprintf("%s.", tableName)
  1448  	for _, col := range idx.ColumnExpressionTypes() {
  1449  		colName := strings.TrimPrefix(strings.ToLower(col.Expression), pref)
  1450  		i := sch.IndexOfColName(colName)
  1451  		if i < 0 {
  1452  			return nil, idxCols, fmt.Errorf("column not found on table during stats building: %s", colName)
  1453  		}
  1454  		idxCols.Add(sql.ColumnId(i + 1))
  1455  	}
  1456  
  1457  	var all sql.ColSet
  1458  	var notNull sql.ColSet
  1459  	for i, col := range sch {
  1460  		all.Add(sql.ColumnId(i + 1))
  1461  		if !col.Nullable {
  1462  			notNull.Add(sql.ColumnId(i + 1))
  1463  		}
  1464  	}
  1465  
  1466  	strict := true
  1467  	for i, hasNext := idxCols.Next(1); hasNext; i, hasNext = idxCols.Next(i + 1) {
  1468  		if !notNull.Contains(i) {
  1469  			strict = false
  1470  		}
  1471  	}
  1472  
  1473  	var strictKeys []sql.ColSet
  1474  	var laxKeys []sql.ColSet
  1475  	if !idx.IsUnique() {
  1476  		// not an FD
  1477  	} else if strict {
  1478  		strictKeys = append(strictKeys, idxCols)
  1479  	} else {
  1480  		laxKeys = append(laxKeys, idxCols)
  1481  	}
  1482  	return sql.NewTablescanFDs(all, strictKeys, laxKeys, notNull), idxCols, nil
  1483  }
  1484  
  1485  func newUniformDistStatistic(dbName, tableName string, sch sql.Schema, idx sql.Index, rowCount, avgSize uint64) (sql.Statistic, error) {
  1486  	tablePrefix := fmt.Sprintf("%s.", tableName)
  1487  
  1488  	distinctCount := rowCount
  1489  	if !idx.IsUnique() {
  1490  		distinctCount = uint64(float64(distinctCount) * dummyNotUniqueDistinct)
  1491  	}
  1492  
  1493  	nullCount := uint64(float64(distinctCount) * dummyNotUniqueNull)
  1494  
  1495  	var cols []string
  1496  	var types []sql.Type
  1497  	for _, e := range idx.ColumnExpressionTypes() {
  1498  		cols = append(cols, strings.TrimPrefix(strings.ToLower(e.Expression), tablePrefix))
  1499  		types = append(types, e.Type)
  1500  	}
  1501  
  1502  	var class sql.IndexClass
  1503  	switch {
  1504  	case idx.IsSpatial():
  1505  		class = sql.IndexClassSpatial
  1506  	case idx.IsFullText():
  1507  		class = sql.IndexClassFulltext
  1508  	default:
  1509  		class = sql.IndexClassDefault
  1510  	}
  1511  
  1512  	qual := sql.NewStatQualifier(dbName, tableName, strings.ToLower(idx.ID()))
  1513  	stat := stats.NewStatistic(rowCount, distinctCount, nullCount, avgSize, time.Now(), qual, cols, types, nil, class, nil)
  1514  
  1515  	fds, idxCols, err := indexFds(tableName, sch, idx)
  1516  	if err != nil {
  1517  		return nil, err
  1518  	}
  1519  	ret := stat.WithFuncDeps(fds)
  1520  	ret = ret.WithColSet(idxCols)
  1521  	return ret, nil
  1522  }
  1523  
  1524  func newConjCollector(s sql.Statistic, ordinals map[string]int) *conjCollector {
  1525  	return &conjCollector{
  1526  		stat:     s,
  1527  		ordinals: ordinals,
  1528  		eqVals:   make([]interface{}, len(ordinals)),
  1529  		nullable: make([]bool, len(ordinals)),
  1530  	}
  1531  }
  1532  
  1533  // conjCollector is used to stack and track changes to
  1534  // an index histogram for a list of conjugate filters
  1535  type conjCollector struct {
  1536  	stat          sql.Statistic
  1537  	ordinals      map[string]int
  1538  	missingPrefix int
  1539  	constant      sql.FastIntSet
  1540  	eqVals        []interface{}
  1541  	nullable      []bool
  1542  	applied       sql.FastIntSet
  1543  	isFalse       bool
  1544  }
  1545  
  1546  func (c *conjCollector) add(f *iScanLeaf) error {
  1547  	c.applied.Add(int(f.Id()))
  1548  	var err error
  1549  	switch f.Op() {
  1550  	case indexScanOpNullSafeEq:
  1551  		err = c.addEq(f.gf.Name(), f.litValue, true)
  1552  	case indexScanOpEq:
  1553  		err = c.addEq(f.gf.Name(), f.litValue, false)
  1554  	case indexScanOpInSet:
  1555  		// TODO cost UNION of equals
  1556  		err = c.addEq(f.gf.Name(), f.setValues[0], false)
  1557  	default:
  1558  		err = c.addIneq(f.Op(), f.gf.Name(), f.litValue)
  1559  	}
  1560  	return err
  1561  }
  1562  
  1563  func (c *conjCollector) addEq(col string, val interface{}, nullSafe bool) error {
  1564  	// make constant
  1565  	ord := c.ordinals[col]
  1566  	if c.constant.Contains(ord + 1) {
  1567  		if c.eqVals[ord] != val {
  1568  			// FALSE filter
  1569  			c.isFalse = true
  1570  			return nil
  1571  		}
  1572  		return nil
  1573  	}
  1574  
  1575  	c.constant.Add(ord + 1)
  1576  	c.eqVals[ord] = val
  1577  	c.nullable[ord] = nullSafe
  1578  
  1579  	if ord == c.missingPrefix {
  1580  		last := ord
  1581  		for next, hasNext := c.constant.Next(last + 1); hasNext && next == last+1; next, hasNext = c.constant.Next(next + 1) {
  1582  			// In first loop, next is always last+1 because we just added ord.
  1583  			// Keep iterating while consecutive bits are set, end on gap.
  1584  			last = next
  1585  		}
  1586  		c.missingPrefix = last
  1587  
  1588  		// truncate buckets
  1589  		var err error
  1590  		c.stat, err = stats.PrefixKey(c.stat, c.eqVals[:ord+1], c.nullable)
  1591  		if err != nil {
  1592  			return err
  1593  		}
  1594  	}
  1595  	return nil
  1596  }
  1597  
  1598  func (c *conjCollector) addIneq(op indexScanOp, col string, val interface{}) error {
  1599  	ord := c.ordinals[col]
  1600  	if ord > 0 {
  1601  		return nil
  1602  	}
  1603  	err := c.cmpFirstCol(op, val)
  1604  	if err != nil {
  1605  		return err
  1606  	}
  1607  	return c.truncateMcvs(ord, op, val)
  1608  }
  1609  
  1610  // cmpFirstCol checks whether we should try to range truncate the first
  1611  // column in the index
  1612  func (c *conjCollector) cmpFirstCol(op indexScanOp, val interface{}) error {
  1613  	// check if first col already constant
  1614  	// otherwise attempt to truncate histogram
  1615  	var err error
  1616  	if c.constant.Contains(1) {
  1617  		return nil
  1618  	}
  1619  	switch op {
  1620  	case indexScanOpNotEq:
  1621  		// todo notEq
  1622  		c.stat, err = stats.PrefixGt(c.stat, val)
  1623  	case indexScanOpGt:
  1624  		c.stat, err = stats.PrefixGt(c.stat, val)
  1625  	case indexScanOpGte:
  1626  		c.stat, err = stats.PrefixGte(c.stat, val)
  1627  	case indexScanOpLt:
  1628  		c.stat, err = stats.PrefixLt(c.stat, val)
  1629  	case indexScanOpLte:
  1630  		c.stat, err = stats.PrefixLte(c.stat, val)
  1631  	case indexScanOpIsNull:
  1632  		c.stat, err = stats.PrefixIsNull(c.stat)
  1633  	case indexScanOpIsNotNull:
  1634  		c.stat, err = stats.PrefixIsNotNull(c.stat)
  1635  	}
  1636  	return err
  1637  }
  1638  
  1639  func (c *conjCollector) truncateMcvs(i int, op indexScanOp, val interface{}) error {
  1640  	var err error
  1641  	switch op {
  1642  	case indexScanOpGt:
  1643  		c.stat, err = stats.McvPrefixGt(c.stat, i, val)
  1644  	case indexScanOpGte:
  1645  		c.stat, err = stats.McvPrefixGte(c.stat, i, val)
  1646  	case indexScanOpLt:
  1647  		c.stat, err = stats.McvPrefixLt(c.stat, i, val)
  1648  	case indexScanOpLte:
  1649  		c.stat, err = stats.McvPrefixLte(c.stat, i, val)
  1650  	case indexScanOpIsNull:
  1651  		c.stat, err = stats.McvPrefixIsNull(c.stat, i, val)
  1652  	case indexScanOpIsNotNull:
  1653  		c.stat, err = stats.McvPrefixIsNotNull(c.stat, i, val)
  1654  	}
  1655  	return err
  1656  }