github.com/dolthub/go-mysql-server@v0.18.0/sql/analyzer/index_analyzer.go (about)

     1  // Copyright 2020-2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package analyzer
    16  
    17  import (
    18  	"sort"
    19  	"strings"
    20  
    21  	"github.com/dolthub/go-mysql-server/sql/transform"
    22  
    23  	"github.com/dolthub/go-mysql-server/sql"
    24  	"github.com/dolthub/go-mysql-server/sql/plan"
    25  )
    26  
    27  type indexAnalyzer struct {
    28  	// TODO: these need to be qualified by database name as well to be valid. Otherwise we can't distinguish between two
    29  	//  tables with the same name in different databases. But right now table nodes aren't qualified by their resolved
    30  	//  database in the plan, so we can't do this.
    31  	indexesByTable map[string][]sql.Index
    32  	indexRegistry  *sql.IndexRegistry
    33  	registryIdxes  []sql.Index
    34  }
    35  
    36  // newIndexAnalyzerForNode returns an analyzer for indexes available in the node given, keyed by the table name. These
    37  // might come from either the tables themselves natively, or else from an index driver that has indexes for the tables
    38  // included in the nodes. Indexes are keyed by the aliased name of the table, if applicable. These names must be
    39  // unaliased when matching against the names of tables in index definitions.
    40  func newIndexAnalyzerForNode(ctx *sql.Context, n sql.Node) (*indexAnalyzer, error) {
    41  	var analysisErr error
    42  	indexes := make(map[string][]sql.Index)
    43  
    44  	var indexesForTable = func(name string, table sql.Table) error {
    45  		name = strings.ToLower(name)
    46  		it, ok := table.(sql.IndexAddressableTable)
    47  
    48  		if !ok {
    49  			return nil
    50  		}
    51  
    52  		idxes, err := it.GetIndexes(ctx)
    53  		if err != nil {
    54  			return err
    55  		}
    56  
    57  		indexes[name] = append(indexes[name], idxes...)
    58  		return nil
    59  	}
    60  
    61  	// Find all of the native indexed tables in the node (those that don't require a driver)
    62  	if n != nil {
    63  		transform.Inspect(n, func(n sql.Node) bool {
    64  			switch n := n.(type) {
    65  			// Because we previously pushed filters as close to their relevant tables as possible, we know that there
    66  			// cannot be another Filter between our node and any tables with relevant indexes.
    67  			case *plan.Filter:
    68  				return false
    69  			case *plan.TableAlias:
    70  				rt, ok := n.Child.(sql.TableNode)
    71  				if !ok {
    72  					return false
    73  				}
    74  
    75  				err := indexesForTable(n.Name(), rt.UnderlyingTable())
    76  				if err != nil {
    77  					analysisErr = err
    78  					return false
    79  				}
    80  
    81  				return false
    82  			case *plan.ResolvedTable:
    83  				err := indexesForTable(n.Name(), n.UnderlyingTable())
    84  				if err != nil {
    85  					analysisErr = err
    86  					return false
    87  				}
    88  			case *plan.IndexedTableAccess:
    89  				err := indexesForTable(n.Name(), n.TableNode.UnderlyingTable())
    90  				if err != nil {
    91  					analysisErr = err
    92  					return false
    93  				}
    94  			}
    95  			return true
    96  		})
    97  	}
    98  
    99  	if analysisErr != nil {
   100  		return nil, analysisErr
   101  	}
   102  
   103  	var idxRegistry *sql.IndexRegistry
   104  	if ctx.GetIndexRegistry().HasIndexes() {
   105  		idxRegistry = ctx.GetIndexRegistry()
   106  	}
   107  
   108  	return &indexAnalyzer{
   109  		indexesByTable: indexes,
   110  		indexRegistry:  idxRegistry,
   111  	}, nil
   112  }
   113  
   114  // IndexesByTable returns all indexes on the table named. The table must be present in the node used to create the
   115  // analyzer.
   116  func (r *indexAnalyzer) IndexesByTable(ctx *sql.Context, db, table string) []sql.Index {
   117  	indexes := r.indexesByTable[strings.ToLower(table)]
   118  
   119  	if r.indexRegistry != nil {
   120  		idxes := r.indexRegistry.IndexesByTable(db, table)
   121  		for _, idx := range idxes {
   122  			indexes = append(indexes, idx)
   123  		}
   124  	}
   125  
   126  	return indexes
   127  }
   128  
   129  // MatchingIndex returns the index that best fits the given expressions. See MatchingIndexes for the rules regarding
   130  // which index is considered the best.
   131  func (r *indexAnalyzer) MatchingIndex(ctx *sql.Context, table, db string, exprs ...sql.Expression) sql.Index {
   132  	indexes := r.MatchingIndexes(ctx, table, db, exprs...)
   133  	if len(indexes) > 0 {
   134  		return indexes[0]
   135  	}
   136  	return nil
   137  }
   138  
   139  // MatchingIndexes returns a list of all matching indexes for the given expressions. The returned order of the indexes
   140  // are deterministic and follow the given rules, from the highest priority in descending order:
   141  //
   142  //  1. Expressions exactly match the index
   143  //  2. Expressions match as much of the index prefix as possible
   144  //  3. Primary Key index ordered before secondary indexes
   145  //     TODO: for rule 3, we want to prioritize "covering" indexes over non-covering indexes, but sql.Index doesn't
   146  //     provide the necessary information to evaluate this condition. Primary Key status approximates it.
   147  //  4. Largest index by expression count
   148  //  5. Index ID in ascending order
   149  //
   150  // It is worth noting that all returned indexes will have at least the first index expression satisfied (creating a
   151  // partial index), as otherwise the index would be no better than a table scan (for which integrators may have
   152  // optimizations).
   153  func (r *indexAnalyzer) MatchingIndexes(ctx *sql.Context, table, db string, exprs ...sql.Expression) []sql.Index {
   154  	// As multiple expressions may be the same, we filter out duplicates
   155  	distinctExprs := make(map[string]struct{})
   156  	var exprStrs []string
   157  	for _, e := range exprs {
   158  		es := strings.ToLower(e.String())
   159  		if _, ok := distinctExprs[es]; !ok {
   160  			distinctExprs[es] = struct{}{}
   161  			exprStrs = append(exprStrs, es)
   162  		}
   163  	}
   164  
   165  	type idxWithLen struct {
   166  		sql.Index
   167  		exprLen     int
   168  		prefixCount int
   169  	}
   170  
   171  	var indexes []idxWithLen
   172  	for _, idx := range r.indexesByTable[strings.ToLower(table)] {
   173  		indexExprs := idx.Expressions()
   174  		if ok, prefixCount := exprsAreIndexSubset(exprStrs, indexExprs); ok && prefixCount >= 1 {
   175  			indexes = append(indexes, idxWithLen{idx, len(indexExprs), prefixCount})
   176  		}
   177  	}
   178  
   179  	if r.indexRegistry != nil {
   180  		idx, prefixCount, err := r.indexRegistry.MatchingIndex(ctx, db, exprs...)
   181  		if err != nil {
   182  			// We just abandon indexes rather than returning an error here
   183  			return nil
   184  		}
   185  		if idx != nil && prefixCount >= 1 {
   186  			r.registryIdxes = append(r.registryIdxes, idx)
   187  			indexes = append(indexes, idxWithLen{idx, len(idx.Expressions()), prefixCount})
   188  		}
   189  	}
   190  
   191  	exprLen := len(exprStrs)
   192  	sort.Slice(indexes, func(i, j int) bool {
   193  		idxI := indexes[i]
   194  		idxJ := indexes[j]
   195  		if idxI.exprLen == exprLen && idxJ.exprLen != exprLen {
   196  			return true
   197  		} else if idxI.exprLen != exprLen && idxJ.exprLen == exprLen {
   198  			return false
   199  		} else if idxI.prefixCount != idxJ.prefixCount {
   200  			return idxI.prefixCount > idxJ.prefixCount
   201  			// TODO: ID() == "PRIMARY" is purely convention
   202  		} else if idxI.ID() == "PRIMARY" || idxJ.ID() == "PRIMARY" {
   203  			return idxI.ID() == "PRIMARY"
   204  		} else if idxI.exprLen != idxJ.exprLen {
   205  			return idxI.exprLen > idxJ.exprLen
   206  		} else {
   207  			return idxI.Index.ID() < idxJ.Index.ID()
   208  		}
   209  	})
   210  	sortedIndexes := make([]sql.Index, len(indexes))
   211  	for i := 0; i < len(sortedIndexes); i++ {
   212  		sortedIndexes[i] = indexes[i].Index
   213  	}
   214  	return sortedIndexes
   215  }
   216  
   217  // ExpressionsWithIndexes finds all the combinations of expressions with matching indexes. This only matches
   218  // multi-column indexes. Sorts the list of expressions by their length in descending order.
   219  func (r *indexAnalyzer) ExpressionsWithIndexes(db string, exprs ...sql.Expression) [][]sql.Expression {
   220  	var results [][]sql.Expression
   221  
   222  	// First find matches in the native indexes
   223  	for _, idxes := range r.indexesByTable {
   224  	Indexes:
   225  		for _, idx := range idxes {
   226  			var used = make(map[int]struct{})
   227  			var matched []sql.Expression
   228  			for _, ie := range idx.Expressions() {
   229  				var found bool
   230  				for i, e := range exprs {
   231  					if _, ok := used[i]; ok {
   232  						continue
   233  					}
   234  
   235  					if strings.EqualFold(ie, e.String()) {
   236  						used[i] = struct{}{}
   237  						found = true
   238  						matched = append(matched, e)
   239  						break
   240  					}
   241  				}
   242  
   243  				if !found {
   244  					break
   245  				}
   246  			}
   247  			if len(matched) == 0 {
   248  				continue Indexes
   249  			}
   250  
   251  			results = append(results, matched)
   252  		}
   253  	}
   254  
   255  	// Expand the search to the index registry if present
   256  	if r.indexRegistry != nil {
   257  		indexes := r.indexRegistry.ExpressionsWithIndexes(db, exprs...)
   258  		results = append(results, indexes...)
   259  	}
   260  
   261  	sort.SliceStable(results, func(i, j int) bool {
   262  		return len(results[i]) > len(results[j])
   263  	})
   264  	return results
   265  }
   266  
   267  // releaseUsedIndexes should be called in the top level function of index analysis to return any held res
   268  func (r *indexAnalyzer) releaseUsedIndexes() {
   269  	if r.indexRegistry == nil {
   270  		return
   271  	}
   272  
   273  	for _, i := range r.registryIdxes {
   274  		if i != nil {
   275  			r.indexRegistry.ReleaseIndex(i)
   276  		}
   277  	}
   278  }
   279  
   280  // exprsAreIndexSubset returns whether exprs are a subset of indexExprs. If they are a subset, then also returns how
   281  // many expressions are the prefix to the index expressions. If the first index expression is not present, then the scan
   282  // is equivalent to a table scan (which may have special optimizations that do not apply to an index scan). With at
   283  // least the first index expression (prefixCount >= 1), the searchable area for the index is limited, making an index
   284  // scan useful. It is assumed that indexExprs are ordered by their declaration. For example `INDEX (v3, v2, v1)` would
   285  // pass in `[]string{"v3", "v2", v1"}` and no other order.
   286  //
   287  // The returned prefixCount states how many expressions are a part of the index prefix. If len(exprs) == prefixCount
   288  // then all of the expressions are a prefix. If prefixCount == 0 then no expressions are part of the index prefix. This
   289  // is not recommended for direct index usage, but should instead be used for indexes that may intersect another.
   290  //
   291  // Using the above example index, the filter (v2 < 5 AND v1 < 5) is a subset but not a prefix. However, it may be
   292  // intersected with (v3 > 1 AND v1 > 1) which contains a prefix (but is not a prefix in its entirety).
   293  func exprsAreIndexSubset(exprs, indexExprs []string) (ok bool, prefixCount int) {
   294  	if len(exprs) > len(indexExprs) {
   295  		return false, 0
   296  	}
   297  
   298  	visitedIndexExprs := make([]bool, len(indexExprs))
   299  	for _, expr := range exprs {
   300  		found := false
   301  		for j, indexExpr := range indexExprs {
   302  			if visitedIndexExprs[j] {
   303  				continue
   304  			}
   305  			if strings.EqualFold(expr, indexExpr) {
   306  				visitedIndexExprs[j] = true
   307  				found = true
   308  				break
   309  			}
   310  		}
   311  		if !found {
   312  			return false, 0
   313  		}
   314  	}
   315  
   316  	// This checks the length of the prefix by checking how many true booleans are encountered before the first false
   317  	for i, visitedExpr := range visitedIndexExprs {
   318  		if visitedExpr {
   319  			continue
   320  		}
   321  		return true, i
   322  	}
   323  
   324  	return true, len(exprs)
   325  }