github.com/dolthub/go-mysql-server@v0.18.0/sql/index_registry.go

github.com/dolthub/go-mysql-server@v0.18.0/sql/index_registry.go (about)

     1  // Copyright 2020-2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package sql
    16  
    17  import (
    18  	"sort"
    19  	"strings"
    20  	"sync"
    21  
    22  	"github.com/sirupsen/logrus"
    23  
    24  	"github.com/dolthub/go-mysql-server/internal/similartext"
    25  )
    26  
    27  // IndexRegistry keeps track of all driver-provided indexes in the engine.
    28  type IndexRegistry struct {
    29  	// Root path where all the data of the indexes is stored on disk.
    30  	Root string
    31  
    32  	mut        sync.RWMutex
    33  	indexes    map[indexKey]DriverIndex
    34  	indexOrder []indexKey
    35  	statuses   map[indexKey]IndexStatus
    36  
    37  	driversMut sync.RWMutex
    38  	drivers    map[string]IndexDriver
    39  
    40  	rcmut            sync.RWMutex
    41  	refCounts        map[indexKey]int
    42  	deleteIndexQueue map[indexKey]chan<- struct{}
    43  	indexLoaders     map[dbTableTuple][]func(ctx *Context) error
    44  }
    45  
    46  // NewIndexRegistry returns a new Index Registry.
    47  func NewIndexRegistry() *IndexRegistry {
    48  	return &IndexRegistry{
    49  		indexes:          make(map[indexKey]DriverIndex),
    50  		statuses:         make(map[indexKey]IndexStatus),
    51  		drivers:          make(map[string]IndexDriver),
    52  		refCounts:        make(map[indexKey]int),
    53  		deleteIndexQueue: make(map[indexKey]chan<- struct{}),
    54  		indexLoaders:     make(map[dbTableTuple][]func(ctx *Context) error),
    55  	}
    56  }
    57  
    58  // IndexDriver returns the IndexDriver with the given ID.
    59  func (r *IndexRegistry) IndexDriver(id string) IndexDriver {
    60  	r.driversMut.RLock()
    61  	defer r.driversMut.RUnlock()
    62  	return r.drivers[id]
    63  }
    64  
    65  // HasIndexes returns whether the index registry has any registered indexes. Not thread safe, so the answer is
    66  // approximate in the face of drivers and indexes being added and removed.
    67  func (r *IndexRegistry) HasIndexes() bool {
    68  	return len(r.indexes) > 0 || len(r.drivers) > 0
    69  }
    70  
    71  // DefaultIndexDriver returns the default index driver, which is the only
    72  // driver when there is 1 driver in the registry. If there are more than
    73  // 1 drivers in the registry, this will return the empty string, as there
    74  // is no clear default driver.
    75  func (r *IndexRegistry) DefaultIndexDriver() IndexDriver {
    76  	r.driversMut.RLock()
    77  	defer r.driversMut.RUnlock()
    78  	if len(r.drivers) == 1 {
    79  		for _, d := range r.drivers {
    80  			return d
    81  		}
    82  	}
    83  	return nil
    84  }
    85  
    86  // RegisterIndexDriver registers a new index driver.
    87  func (r *IndexRegistry) RegisterIndexDriver(driver IndexDriver) {
    88  	r.driversMut.Lock()
    89  	defer r.driversMut.Unlock()
    90  	r.drivers[driver.ID()] = driver
    91  }
    92  
    93  // LoadIndexes creates load functions for all indexes for all dbs, tables and drivers.  These functions are called
    94  // as needed by the query
    95  func (r *IndexRegistry) LoadIndexes(ctx *Context, dbs []Database) error {
    96  	r.driversMut.RLock()
    97  	defer r.driversMut.RUnlock()
    98  	r.mut.Lock()
    99  	defer r.mut.Unlock()
   100  
   101  	for drIdx := range r.drivers {
   102  		driver := r.drivers[drIdx]
   103  		for dbIdx := range dbs {
   104  			db := dbs[dbIdx]
   105  			tNames, err := db.GetTableNames(ctx)
   106  
   107  			if err != nil {
   108  				return err
   109  			}
   110  
   111  			for tIdx := range tNames {
   112  				tName := tNames[tIdx]
   113  
   114  				loadF := func(ctx *Context) error {
   115  					t, ok, err := db.GetTableInsensitive(ctx, tName)
   116  
   117  					if err != nil {
   118  						return err
   119  					} else if !ok {
   120  						panic("Failed to find table in list of table names")
   121  					}
   122  
   123  					indexes, err := driver.LoadAll(ctx, db.Name(), t.Name())
   124  					if err != nil {
   125  						return err
   126  					}
   127  
   128  					var checksum string
   129  					if c, ok := t.(Checksumable); ok && len(indexes) != 0 {
   130  						checksum, err = c.Checksum()
   131  						if err != nil {
   132  							return err
   133  						}
   134  					}
   135  
   136  					for _, idx := range indexes {
   137  						k := indexKey{db.Name(), idx.ID()}
   138  						r.indexes[k] = idx
   139  						r.indexOrder = append(r.indexOrder, k)
   140  
   141  						var idxChecksum string
   142  						if c, ok := idx.(Checksumable); ok {
   143  							idxChecksum, err = c.Checksum()
   144  							if err != nil {
   145  								return err
   146  							}
   147  						}
   148  
   149  						if checksum == "" || checksum == idxChecksum {
   150  							r.statuses[k] = IndexReady
   151  						} else {
   152  							logrus.Warnf(
   153  								"index %q is outdated and will not be used, you can remove it using `DROP INDEX %s ON %s`",
   154  								idx.ID(),
   155  								idx.ID(),
   156  								idx.Table(),
   157  							)
   158  							r.MarkOutdated(idx)
   159  						}
   160  					}
   161  
   162  					return nil
   163  				}
   164  
   165  				dbTT := dbTableTuple{db.Name(), tName}
   166  				r.indexLoaders[dbTT] = append(r.indexLoaders[dbTT], loadF)
   167  			}
   168  		}
   169  	}
   170  
   171  	return nil
   172  }
   173  
   174  func (r *IndexRegistry) registerIndexesForTable(ctx *Context, dbName, tName string) error {
   175  	r.driversMut.RLock()
   176  	defer r.driversMut.RUnlock()
   177  
   178  	dbTT := dbTableTuple{dbName, tName}
   179  
   180  	if loaders, ok := r.indexLoaders[dbTT]; ok {
   181  		for _, loader := range loaders {
   182  			err := loader(ctx)
   183  
   184  			if err != nil {
   185  				return err
   186  			}
   187  		}
   188  
   189  		delete(r.indexLoaders, dbTT)
   190  	}
   191  
   192  	return nil
   193  }
   194  
   195  // MarkOutdated sets the index status as outdated. This method is not thread
   196  // safe and should not be used directly except for testing.
   197  func (r *IndexRegistry) MarkOutdated(idx Index) {
   198  	r.statuses[indexKey{idx.Database(), idx.ID()}] = IndexOutdated
   199  }
   200  
   201  func (r *IndexRegistry) retainIndex(db, id string) {
   202  	r.rcmut.Lock()
   203  	defer r.rcmut.Unlock()
   204  	key := indexKey{db, id}
   205  	r.refCounts[key]++
   206  }
   207  
   208  // CanUseIndex returns whether the given index is ready to use or not.
   209  func (r *IndexRegistry) CanUseIndex(idx Index) bool {
   210  	r.mut.RLock()
   211  	defer r.mut.RUnlock()
   212  	return r.canUseIndex(idx)
   213  }
   214  
   215  // CanRemoveIndex returns whether the given index is ready to be removed.
   216  func (r *IndexRegistry) CanRemoveIndex(idx Index) bool {
   217  	if idx == nil {
   218  		return false
   219  	}
   220  
   221  	r.mut.RLock()
   222  	defer r.mut.RUnlock()
   223  	status := r.statuses[indexKey{idx.Database(), idx.ID()}]
   224  	return status == IndexReady || status == IndexOutdated
   225  }
   226  
   227  func (r *IndexRegistry) canUseIndex(idx Index) bool {
   228  	if idx == nil {
   229  		return false
   230  	}
   231  	return r.statuses[indexKey{idx.Database(), idx.ID()}].IsUsable()
   232  }
   233  
   234  // setStatus is not thread-safe, it should be guarded using mut.
   235  func (r *IndexRegistry) setStatus(idx Index, status IndexStatus) {
   236  	r.statuses[indexKey{idx.Database(), idx.ID()}] = status
   237  }
   238  
   239  // ReleaseIndex releases an index after it's been used.
   240  func (r *IndexRegistry) ReleaseIndex(idx Index) {
   241  	r.rcmut.Lock()
   242  	defer r.rcmut.Unlock()
   243  	key := indexKey{idx.Database(), idx.ID()}
   244  	r.refCounts[key]--
   245  	if r.refCounts[key] > 0 {
   246  		return
   247  	}
   248  
   249  	if ch, ok := r.deleteIndexQueue[key]; ok {
   250  		close(ch)
   251  		delete(r.deleteIndexQueue, key)
   252  	}
   253  }
   254  
   255  // Index returns the index with the given id. It may return nil if the index is
   256  // not found.
   257  func (r *IndexRegistry) Index(db, id string) DriverIndex {
   258  	r.mut.RLock()
   259  	defer r.mut.RUnlock()
   260  
   261  	r.retainIndex(db, id)
   262  	return r.indexes[indexKey{db, strings.ToLower(id)}]
   263  }
   264  
   265  // IndexesByTable returns a slice of all the indexes existing on the given table.
   266  func (r *IndexRegistry) IndexesByTable(db, table string) []DriverIndex {
   267  	r.mut.RLock()
   268  	defer r.mut.RUnlock()
   269  
   270  	var indexes []DriverIndex
   271  	for _, key := range r.indexOrder {
   272  		idx := r.indexes[key]
   273  		if idx.Database() == db && idx.Table() == table {
   274  			indexes = append(indexes, idx)
   275  			r.retainIndex(db, idx.ID())
   276  		}
   277  	}
   278  
   279  	return indexes
   280  }
   281  
   282  type exprWithTable interface {
   283  	Table() string
   284  }
   285  
   286  // MatchingIndex returns the index that best fits the given expressions. See analyzer.MatchingIndexes for the rules
   287  // regarding which index is considered the best. If no index matches then returns nil.
   288  func (r *IndexRegistry) MatchingIndex(ctx *Context, db string, expr ...Expression) (index Index, prefixCount int, err error) {
   289  	r.mut.RLock()
   290  	defer r.mut.RUnlock()
   291  
   292  	expressions := make([]string, len(expr))
   293  	for i, e := range expr {
   294  		expressions[i] = e.String()
   295  		var err error
   296  		Inspect(e, func(e Expression) bool {
   297  			if e == nil {
   298  				return true
   299  			}
   300  			if val, ok := e.(exprWithTable); ok {
   301  				iErr := r.registerIndexesForTable(ctx, db, val.Table())
   302  				if iErr != nil {
   303  					iErr = err
   304  				}
   305  			}
   306  			return true
   307  		})
   308  		if err != nil {
   309  			return nil, 0, err
   310  		}
   311  	}
   312  
   313  	type idxWithLen struct {
   314  		Index
   315  		exprLen     int
   316  		prefixCount int
   317  	}
   318  
   319  	var indexes []idxWithLen
   320  	for _, k := range r.indexOrder {
   321  		idx := r.indexes[k]
   322  		if !r.canUseIndex(idx) {
   323  			continue
   324  		}
   325  
   326  		if idx.Database() == db {
   327  			indexExprs := idx.Expressions()
   328  			if ok, pc := exprsAreIndexSubset(expressions, indexExprs); ok && pc >= 1 {
   329  				indexes = append(indexes, idxWithLen{idx, len(indexExprs), pc})
   330  			}
   331  		}
   332  	}
   333  	if len(indexes) == 0 {
   334  		return nil, 0, nil
   335  	}
   336  
   337  	exprLen := len(expressions)
   338  	sort.Slice(indexes, func(i, j int) bool {
   339  		idxI := indexes[i]
   340  		idxJ := indexes[j]
   341  		if idxI.exprLen == exprLen && idxJ.exprLen != exprLen {
   342  			return true
   343  		} else if idxI.exprLen != exprLen && idxJ.exprLen == exprLen {
   344  			return false
   345  		} else if idxI.prefixCount != idxJ.prefixCount {
   346  			return idxI.prefixCount > idxJ.prefixCount
   347  		} else if idxI.exprLen != idxJ.exprLen {
   348  			return idxI.exprLen > idxJ.exprLen
   349  		} else {
   350  			return idxI.Index.ID() < idxJ.Index.ID()
   351  		}
   352  	})
   353  	r.retainIndex(db, indexes[0].Index.ID())
   354  	return indexes[0].Index, indexes[0].prefixCount, nil
   355  }
   356  
   357  // ExpressionsWithIndexes finds all the combinations of expressions with
   358  // matching indexes. This only matches multi-column indexes.
   359  func (r *IndexRegistry) ExpressionsWithIndexes(
   360  	db string,
   361  	exprs ...Expression,
   362  ) [][]Expression {
   363  	r.mut.RLock()
   364  	defer r.mut.RUnlock()
   365  
   366  	var results [][]Expression
   367  Indexes:
   368  	for _, idx := range r.indexes {
   369  		if !r.canUseIndex(idx) {
   370  			continue
   371  		}
   372  
   373  		var used = make(map[int]struct{})
   374  		var matched []Expression
   375  		for _, ie := range idx.Expressions() {
   376  			var found bool
   377  			for i, e := range exprs {
   378  				if _, ok := used[i]; ok {
   379  					continue
   380  				}
   381  
   382  				if ie == e.String() {
   383  					used[i] = struct{}{}
   384  					found = true
   385  					matched = append(matched, e)
   386  					break
   387  				}
   388  			}
   389  
   390  			if !found {
   391  				break
   392  			}
   393  		}
   394  		if len(matched) == 0 {
   395  			continue Indexes
   396  		}
   397  
   398  		results = append(results, matched)
   399  	}
   400  
   401  	sort.SliceStable(results, func(i, j int) bool {
   402  		return len(results[i]) > len(results[j])
   403  	})
   404  	return results
   405  }
   406  
   407  func (r *IndexRegistry) validateIndexToAdd(idx Index) error {
   408  	r.mut.RLock()
   409  	defer r.mut.RUnlock()
   410  
   411  	for _, i := range r.indexes {
   412  		if i.Database() != idx.Database() {
   413  			continue
   414  		}
   415  
   416  		if i.ID() == idx.ID() {
   417  			return ErrIndexIDAlreadyRegistered.New(idx.ID())
   418  		}
   419  
   420  		if exprListsEqual(i.Expressions(), idx.Expressions()) {
   421  			return ErrIndexExpressionAlreadyRegistered.New(
   422  				strings.Join(idx.Expressions(), ", "),
   423  			)
   424  		}
   425  	}
   426  
   427  	return nil
   428  }
   429  
   430  // exprListsEqual returns whether a and b have the same items.
   431  func exprListsEqual(a, b []string) bool {
   432  	if len(a) != len(b) {
   433  		return false
   434  	}
   435  
   436  	var visited = make([]bool, len(b))
   437  
   438  	for _, va := range a {
   439  		found := false
   440  
   441  		for j, vb := range b {
   442  			if visited[j] {
   443  				continue
   444  			}
   445  
   446  			if va == vb {
   447  				visited[j] = true
   448  				found = true
   449  				break
   450  			}
   451  		}
   452  
   453  		if !found {
   454  			return false
   455  		}
   456  	}
   457  
   458  	return true
   459  }
   460  
   461  // TODO: move this somewhere so that it's not super public but doesn't create an import cycle
   462  // exprsAreIndexSubset returns whether exprs are a subset of indexExprs. If they are a subset, then also returns how
   463  // many expressions are the prefix to the index expressions. If the first index expression is not present, then the scan
   464  // is equivalent to a table scan (which may have special optimizations that do not apply to an index scan). With at
   465  // least the first index expression (prefixCount >= 1), the searchable area for the index is limited, making an index
   466  // scan useful. It is assumed that indexExprs are ordered by their declaration. For example `INDEX (v3, v2, v1)` would
   467  // pass in `[]string{"v3", "v2", v1"}` and no other order.
   468  //
   469  // The returned prefixCount states how many expressions are a part of the index prefix. If len(exprs) == prefixCount
   470  // then all of the expressions are a prefix. If prefixCount == 0 then no expressions are part of the index prefix. This
   471  // is not recommended for direct index usage, but should instead be used for indexes that may intersect another.
   472  //
   473  // Using the above example index, the filter (v2 < 5 AND v1 < 5) is a subset but not a prefix. However, it may be
   474  // intersected with (v3 > 1 AND v1 > 1) which contains a prefix (but is not a prefix in its entirety).
   475  func exprsAreIndexSubset(exprs, indexExprs []string) (ok bool, prefixCount int) {
   476  	if len(exprs) > len(indexExprs) {
   477  		return false, 0
   478  	}
   479  
   480  	visitedIndexExprs := make([]bool, len(indexExprs))
   481  	for _, expr := range exprs {
   482  		found := false
   483  		for j, indexExpr := range indexExprs {
   484  			if visitedIndexExprs[j] {
   485  				continue
   486  			}
   487  			if expr == indexExpr {
   488  				visitedIndexExprs[j] = true
   489  				found = true
   490  				break
   491  			}
   492  		}
   493  		if !found {
   494  			return false, 0
   495  		}
   496  	}
   497  
   498  	// This checks the length of the prefix by checking how many true booleans are encountered before the first false
   499  	for i, visitedExpr := range visitedIndexExprs {
   500  		if visitedExpr {
   501  			continue
   502  		}
   503  		return true, i
   504  	}
   505  
   506  	return true, len(exprs)
   507  }
   508  
   509  // AddIndex adds the given index to the registry. The added index will be
   510  // marked as creating, so nobody can register two indexes with the same
   511  // expression or id while the other is still being created.
   512  // When something is sent through the returned channel, it means the index has
   513  // finished its creation and will be marked as ready.
   514  // Another channel is returned to notify the user when the index is ready.
   515  func (r *IndexRegistry) AddIndex(
   516  	idx DriverIndex,
   517  ) (created chan<- struct{}, ready <-chan struct{}, err error) {
   518  	if err := r.validateIndexToAdd(idx); err != nil {
   519  		return nil, nil, err
   520  	}
   521  
   522  	r.mut.Lock()
   523  	r.setStatus(idx, IndexNotReady)
   524  	key := indexKey{idx.Database(), idx.ID()}
   525  	r.indexes[key] = idx
   526  	r.indexOrder = append(r.indexOrder, key)
   527  	r.mut.Unlock()
   528  
   529  	var _created = make(chan struct{})
   530  	var _ready = make(chan struct{})
   531  	go func() {
   532  		<-_created
   533  		r.mut.Lock()
   534  		defer r.mut.Unlock()
   535  		r.setStatus(idx, IndexReady)
   536  		close(_ready)
   537  	}()
   538  
   539  	return _created, _ready, nil
   540  }
   541  
   542  // DeleteIndex deletes an index from the registry by its id. First, it marks
   543  // the index for deletion but does not remove it, so queries that are using it
   544  // may still do so. The returned channel will send a message when the index can
   545  // be deleted from disk.
   546  // If force is true, it will delete the index even if it's not ready for usage.
   547  // Only use that parameter if you know what you're doing.
   548  func (r *IndexRegistry) DeleteIndex(db, id string, force bool) (<-chan struct{}, error) {
   549  	r.mut.RLock()
   550  	var key indexKey
   551  
   552  	if len(r.indexes) == 0 {
   553  		return nil, ErrIndexNotFound.New(id)
   554  	}
   555  
   556  	var indexNames []string
   557  
   558  	for k, idx := range r.indexes {
   559  		if strings.ToLower(id) == idx.ID() {
   560  			if !force && !r.CanRemoveIndex(idx) {
   561  				r.mut.RUnlock()
   562  				return nil, ErrIndexDeleteInvalidStatus.New(id)
   563  			}
   564  			r.setStatus(idx, IndexNotReady)
   565  			key = k
   566  			break
   567  		}
   568  		indexNames = append(indexNames, idx.ID())
   569  	}
   570  	r.mut.RUnlock()
   571  
   572  	if key.id == "" {
   573  		similar := similartext.Find(indexNames, id)
   574  		return nil, ErrIndexNotFound.New(id + similar)
   575  	}
   576  
   577  	var done = make(chan struct{}, 1)
   578  
   579  	r.rcmut.Lock()
   580  	// If no query is using this index just delete it right away
   581  	if force || r.refCounts[key] <= 0 {
   582  		r.mut.Lock()
   583  		defer r.mut.Unlock()
   584  		defer r.rcmut.Unlock()
   585  
   586  		delete(r.indexes, key)
   587  		var pos = -1
   588  		for i, k := range r.indexOrder {
   589  			if k == key {
   590  				pos = i
   591  				break
   592  			}
   593  		}
   594  		if pos >= 0 {
   595  			r.indexOrder = append(r.indexOrder[:pos], r.indexOrder[pos+1:]...)
   596  		}
   597  		close(done)
   598  		return done, nil
   599  	}
   600  
   601  	var onReadyToDelete = make(chan struct{})
   602  	r.deleteIndexQueue[key] = onReadyToDelete
   603  	r.rcmut.Unlock()
   604  
   605  	go func() {
   606  		<-onReadyToDelete
   607  		r.mut.Lock()
   608  		defer r.mut.Unlock()
   609  		delete(r.indexes, key)
   610  
   611  		done <- struct{}{}
   612  	}()
   613  
   614  	return done, nil
   615  }
   616  
   617  type indexKey struct {
   618  	db, id string
   619  }
   620  
   621  type dbTableTuple struct {
   622  	db, tbl string
   623  }
   624  
   625  // IndexStatus represents the current status in which the index is.
   626  type IndexStatus byte
   627  
   628  const (
   629  	// IndexNotReady means the index is not ready to be used.
   630  	IndexNotReady IndexStatus = iota
   631  	// IndexReady means the index can be used.
   632  	IndexReady
   633  	// IndexOutdated means the index is loaded but will not be used because the
   634  	// contents in it are outdated.
   635  	IndexOutdated
   636  )
   637  
   638  // IsUsable returns whether the index can be used or not based on the status.
   639  func (s IndexStatus) IsUsable() bool {
   640  	return s == IndexReady
   641  }
   642  
   643  func (s IndexStatus) String() string {
   644  	switch s {
   645  	case IndexReady:
   646  		return "ready"
   647  	default:
   648  		return "not ready"
   649  	}
   650  }