github.com/dolthub/go-mysql-server@v0.18.0/sql/index_registry.go (about) 1 // Copyright 2020-2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package sql 16 17 import ( 18 "sort" 19 "strings" 20 "sync" 21 22 "github.com/sirupsen/logrus" 23 24 "github.com/dolthub/go-mysql-server/internal/similartext" 25 ) 26 27 // IndexRegistry keeps track of all driver-provided indexes in the engine. 28 type IndexRegistry struct { 29 // Root path where all the data of the indexes is stored on disk. 30 Root string 31 32 mut sync.RWMutex 33 indexes map[indexKey]DriverIndex 34 indexOrder []indexKey 35 statuses map[indexKey]IndexStatus 36 37 driversMut sync.RWMutex 38 drivers map[string]IndexDriver 39 40 rcmut sync.RWMutex 41 refCounts map[indexKey]int 42 deleteIndexQueue map[indexKey]chan<- struct{} 43 indexLoaders map[dbTableTuple][]func(ctx *Context) error 44 } 45 46 // NewIndexRegistry returns a new Index Registry. 47 func NewIndexRegistry() *IndexRegistry { 48 return &IndexRegistry{ 49 indexes: make(map[indexKey]DriverIndex), 50 statuses: make(map[indexKey]IndexStatus), 51 drivers: make(map[string]IndexDriver), 52 refCounts: make(map[indexKey]int), 53 deleteIndexQueue: make(map[indexKey]chan<- struct{}), 54 indexLoaders: make(map[dbTableTuple][]func(ctx *Context) error), 55 } 56 } 57 58 // IndexDriver returns the IndexDriver with the given ID. 59 func (r *IndexRegistry) IndexDriver(id string) IndexDriver { 60 r.driversMut.RLock() 61 defer r.driversMut.RUnlock() 62 return r.drivers[id] 63 } 64 65 // HasIndexes returns whether the index registry has any registered indexes. Not thread safe, so the answer is 66 // approximate in the face of drivers and indexes being added and removed. 67 func (r *IndexRegistry) HasIndexes() bool { 68 return len(r.indexes) > 0 || len(r.drivers) > 0 69 } 70 71 // DefaultIndexDriver returns the default index driver, which is the only 72 // driver when there is 1 driver in the registry. If there are more than 73 // 1 drivers in the registry, this will return the empty string, as there 74 // is no clear default driver. 75 func (r *IndexRegistry) DefaultIndexDriver() IndexDriver { 76 r.driversMut.RLock() 77 defer r.driversMut.RUnlock() 78 if len(r.drivers) == 1 { 79 for _, d := range r.drivers { 80 return d 81 } 82 } 83 return nil 84 } 85 86 // RegisterIndexDriver registers a new index driver. 87 func (r *IndexRegistry) RegisterIndexDriver(driver IndexDriver) { 88 r.driversMut.Lock() 89 defer r.driversMut.Unlock() 90 r.drivers[driver.ID()] = driver 91 } 92 93 // LoadIndexes creates load functions for all indexes for all dbs, tables and drivers. These functions are called 94 // as needed by the query 95 func (r *IndexRegistry) LoadIndexes(ctx *Context, dbs []Database) error { 96 r.driversMut.RLock() 97 defer r.driversMut.RUnlock() 98 r.mut.Lock() 99 defer r.mut.Unlock() 100 101 for drIdx := range r.drivers { 102 driver := r.drivers[drIdx] 103 for dbIdx := range dbs { 104 db := dbs[dbIdx] 105 tNames, err := db.GetTableNames(ctx) 106 107 if err != nil { 108 return err 109 } 110 111 for tIdx := range tNames { 112 tName := tNames[tIdx] 113 114 loadF := func(ctx *Context) error { 115 t, ok, err := db.GetTableInsensitive(ctx, tName) 116 117 if err != nil { 118 return err 119 } else if !ok { 120 panic("Failed to find table in list of table names") 121 } 122 123 indexes, err := driver.LoadAll(ctx, db.Name(), t.Name()) 124 if err != nil { 125 return err 126 } 127 128 var checksum string 129 if c, ok := t.(Checksumable); ok && len(indexes) != 0 { 130 checksum, err = c.Checksum() 131 if err != nil { 132 return err 133 } 134 } 135 136 for _, idx := range indexes { 137 k := indexKey{db.Name(), idx.ID()} 138 r.indexes[k] = idx 139 r.indexOrder = append(r.indexOrder, k) 140 141 var idxChecksum string 142 if c, ok := idx.(Checksumable); ok { 143 idxChecksum, err = c.Checksum() 144 if err != nil { 145 return err 146 } 147 } 148 149 if checksum == "" || checksum == idxChecksum { 150 r.statuses[k] = IndexReady 151 } else { 152 logrus.Warnf( 153 "index %q is outdated and will not be used, you can remove it using `DROP INDEX %s ON %s`", 154 idx.ID(), 155 idx.ID(), 156 idx.Table(), 157 ) 158 r.MarkOutdated(idx) 159 } 160 } 161 162 return nil 163 } 164 165 dbTT := dbTableTuple{db.Name(), tName} 166 r.indexLoaders[dbTT] = append(r.indexLoaders[dbTT], loadF) 167 } 168 } 169 } 170 171 return nil 172 } 173 174 func (r *IndexRegistry) registerIndexesForTable(ctx *Context, dbName, tName string) error { 175 r.driversMut.RLock() 176 defer r.driversMut.RUnlock() 177 178 dbTT := dbTableTuple{dbName, tName} 179 180 if loaders, ok := r.indexLoaders[dbTT]; ok { 181 for _, loader := range loaders { 182 err := loader(ctx) 183 184 if err != nil { 185 return err 186 } 187 } 188 189 delete(r.indexLoaders, dbTT) 190 } 191 192 return nil 193 } 194 195 // MarkOutdated sets the index status as outdated. This method is not thread 196 // safe and should not be used directly except for testing. 197 func (r *IndexRegistry) MarkOutdated(idx Index) { 198 r.statuses[indexKey{idx.Database(), idx.ID()}] = IndexOutdated 199 } 200 201 func (r *IndexRegistry) retainIndex(db, id string) { 202 r.rcmut.Lock() 203 defer r.rcmut.Unlock() 204 key := indexKey{db, id} 205 r.refCounts[key]++ 206 } 207 208 // CanUseIndex returns whether the given index is ready to use or not. 209 func (r *IndexRegistry) CanUseIndex(idx Index) bool { 210 r.mut.RLock() 211 defer r.mut.RUnlock() 212 return r.canUseIndex(idx) 213 } 214 215 // CanRemoveIndex returns whether the given index is ready to be removed. 216 func (r *IndexRegistry) CanRemoveIndex(idx Index) bool { 217 if idx == nil { 218 return false 219 } 220 221 r.mut.RLock() 222 defer r.mut.RUnlock() 223 status := r.statuses[indexKey{idx.Database(), idx.ID()}] 224 return status == IndexReady || status == IndexOutdated 225 } 226 227 func (r *IndexRegistry) canUseIndex(idx Index) bool { 228 if idx == nil { 229 return false 230 } 231 return r.statuses[indexKey{idx.Database(), idx.ID()}].IsUsable() 232 } 233 234 // setStatus is not thread-safe, it should be guarded using mut. 235 func (r *IndexRegistry) setStatus(idx Index, status IndexStatus) { 236 r.statuses[indexKey{idx.Database(), idx.ID()}] = status 237 } 238 239 // ReleaseIndex releases an index after it's been used. 240 func (r *IndexRegistry) ReleaseIndex(idx Index) { 241 r.rcmut.Lock() 242 defer r.rcmut.Unlock() 243 key := indexKey{idx.Database(), idx.ID()} 244 r.refCounts[key]-- 245 if r.refCounts[key] > 0 { 246 return 247 } 248 249 if ch, ok := r.deleteIndexQueue[key]; ok { 250 close(ch) 251 delete(r.deleteIndexQueue, key) 252 } 253 } 254 255 // Index returns the index with the given id. It may return nil if the index is 256 // not found. 257 func (r *IndexRegistry) Index(db, id string) DriverIndex { 258 r.mut.RLock() 259 defer r.mut.RUnlock() 260 261 r.retainIndex(db, id) 262 return r.indexes[indexKey{db, strings.ToLower(id)}] 263 } 264 265 // IndexesByTable returns a slice of all the indexes existing on the given table. 266 func (r *IndexRegistry) IndexesByTable(db, table string) []DriverIndex { 267 r.mut.RLock() 268 defer r.mut.RUnlock() 269 270 var indexes []DriverIndex 271 for _, key := range r.indexOrder { 272 idx := r.indexes[key] 273 if idx.Database() == db && idx.Table() == table { 274 indexes = append(indexes, idx) 275 r.retainIndex(db, idx.ID()) 276 } 277 } 278 279 return indexes 280 } 281 282 type exprWithTable interface { 283 Table() string 284 } 285 286 // MatchingIndex returns the index that best fits the given expressions. See analyzer.MatchingIndexes for the rules 287 // regarding which index is considered the best. If no index matches then returns nil. 288 func (r *IndexRegistry) MatchingIndex(ctx *Context, db string, expr ...Expression) (index Index, prefixCount int, err error) { 289 r.mut.RLock() 290 defer r.mut.RUnlock() 291 292 expressions := make([]string, len(expr)) 293 for i, e := range expr { 294 expressions[i] = e.String() 295 var err error 296 Inspect(e, func(e Expression) bool { 297 if e == nil { 298 return true 299 } 300 if val, ok := e.(exprWithTable); ok { 301 iErr := r.registerIndexesForTable(ctx, db, val.Table()) 302 if iErr != nil { 303 iErr = err 304 } 305 } 306 return true 307 }) 308 if err != nil { 309 return nil, 0, err 310 } 311 } 312 313 type idxWithLen struct { 314 Index 315 exprLen int 316 prefixCount int 317 } 318 319 var indexes []idxWithLen 320 for _, k := range r.indexOrder { 321 idx := r.indexes[k] 322 if !r.canUseIndex(idx) { 323 continue 324 } 325 326 if idx.Database() == db { 327 indexExprs := idx.Expressions() 328 if ok, pc := exprsAreIndexSubset(expressions, indexExprs); ok && pc >= 1 { 329 indexes = append(indexes, idxWithLen{idx, len(indexExprs), pc}) 330 } 331 } 332 } 333 if len(indexes) == 0 { 334 return nil, 0, nil 335 } 336 337 exprLen := len(expressions) 338 sort.Slice(indexes, func(i, j int) bool { 339 idxI := indexes[i] 340 idxJ := indexes[j] 341 if idxI.exprLen == exprLen && idxJ.exprLen != exprLen { 342 return true 343 } else if idxI.exprLen != exprLen && idxJ.exprLen == exprLen { 344 return false 345 } else if idxI.prefixCount != idxJ.prefixCount { 346 return idxI.prefixCount > idxJ.prefixCount 347 } else if idxI.exprLen != idxJ.exprLen { 348 return idxI.exprLen > idxJ.exprLen 349 } else { 350 return idxI.Index.ID() < idxJ.Index.ID() 351 } 352 }) 353 r.retainIndex(db, indexes[0].Index.ID()) 354 return indexes[0].Index, indexes[0].prefixCount, nil 355 } 356 357 // ExpressionsWithIndexes finds all the combinations of expressions with 358 // matching indexes. This only matches multi-column indexes. 359 func (r *IndexRegistry) ExpressionsWithIndexes( 360 db string, 361 exprs ...Expression, 362 ) [][]Expression { 363 r.mut.RLock() 364 defer r.mut.RUnlock() 365 366 var results [][]Expression 367 Indexes: 368 for _, idx := range r.indexes { 369 if !r.canUseIndex(idx) { 370 continue 371 } 372 373 var used = make(map[int]struct{}) 374 var matched []Expression 375 for _, ie := range idx.Expressions() { 376 var found bool 377 for i, e := range exprs { 378 if _, ok := used[i]; ok { 379 continue 380 } 381 382 if ie == e.String() { 383 used[i] = struct{}{} 384 found = true 385 matched = append(matched, e) 386 break 387 } 388 } 389 390 if !found { 391 break 392 } 393 } 394 if len(matched) == 0 { 395 continue Indexes 396 } 397 398 results = append(results, matched) 399 } 400 401 sort.SliceStable(results, func(i, j int) bool { 402 return len(results[i]) > len(results[j]) 403 }) 404 return results 405 } 406 407 func (r *IndexRegistry) validateIndexToAdd(idx Index) error { 408 r.mut.RLock() 409 defer r.mut.RUnlock() 410 411 for _, i := range r.indexes { 412 if i.Database() != idx.Database() { 413 continue 414 } 415 416 if i.ID() == idx.ID() { 417 return ErrIndexIDAlreadyRegistered.New(idx.ID()) 418 } 419 420 if exprListsEqual(i.Expressions(), idx.Expressions()) { 421 return ErrIndexExpressionAlreadyRegistered.New( 422 strings.Join(idx.Expressions(), ", "), 423 ) 424 } 425 } 426 427 return nil 428 } 429 430 // exprListsEqual returns whether a and b have the same items. 431 func exprListsEqual(a, b []string) bool { 432 if len(a) != len(b) { 433 return false 434 } 435 436 var visited = make([]bool, len(b)) 437 438 for _, va := range a { 439 found := false 440 441 for j, vb := range b { 442 if visited[j] { 443 continue 444 } 445 446 if va == vb { 447 visited[j] = true 448 found = true 449 break 450 } 451 } 452 453 if !found { 454 return false 455 } 456 } 457 458 return true 459 } 460 461 // TODO: move this somewhere so that it's not super public but doesn't create an import cycle 462 // exprsAreIndexSubset returns whether exprs are a subset of indexExprs. If they are a subset, then also returns how 463 // many expressions are the prefix to the index expressions. If the first index expression is not present, then the scan 464 // is equivalent to a table scan (which may have special optimizations that do not apply to an index scan). With at 465 // least the first index expression (prefixCount >= 1), the searchable area for the index is limited, making an index 466 // scan useful. It is assumed that indexExprs are ordered by their declaration. For example `INDEX (v3, v2, v1)` would 467 // pass in `[]string{"v3", "v2", v1"}` and no other order. 468 // 469 // The returned prefixCount states how many expressions are a part of the index prefix. If len(exprs) == prefixCount 470 // then all of the expressions are a prefix. If prefixCount == 0 then no expressions are part of the index prefix. This 471 // is not recommended for direct index usage, but should instead be used for indexes that may intersect another. 472 // 473 // Using the above example index, the filter (v2 < 5 AND v1 < 5) is a subset but not a prefix. However, it may be 474 // intersected with (v3 > 1 AND v1 > 1) which contains a prefix (but is not a prefix in its entirety). 475 func exprsAreIndexSubset(exprs, indexExprs []string) (ok bool, prefixCount int) { 476 if len(exprs) > len(indexExprs) { 477 return false, 0 478 } 479 480 visitedIndexExprs := make([]bool, len(indexExprs)) 481 for _, expr := range exprs { 482 found := false 483 for j, indexExpr := range indexExprs { 484 if visitedIndexExprs[j] { 485 continue 486 } 487 if expr == indexExpr { 488 visitedIndexExprs[j] = true 489 found = true 490 break 491 } 492 } 493 if !found { 494 return false, 0 495 } 496 } 497 498 // This checks the length of the prefix by checking how many true booleans are encountered before the first false 499 for i, visitedExpr := range visitedIndexExprs { 500 if visitedExpr { 501 continue 502 } 503 return true, i 504 } 505 506 return true, len(exprs) 507 } 508 509 // AddIndex adds the given index to the registry. The added index will be 510 // marked as creating, so nobody can register two indexes with the same 511 // expression or id while the other is still being created. 512 // When something is sent through the returned channel, it means the index has 513 // finished its creation and will be marked as ready. 514 // Another channel is returned to notify the user when the index is ready. 515 func (r *IndexRegistry) AddIndex( 516 idx DriverIndex, 517 ) (created chan<- struct{}, ready <-chan struct{}, err error) { 518 if err := r.validateIndexToAdd(idx); err != nil { 519 return nil, nil, err 520 } 521 522 r.mut.Lock() 523 r.setStatus(idx, IndexNotReady) 524 key := indexKey{idx.Database(), idx.ID()} 525 r.indexes[key] = idx 526 r.indexOrder = append(r.indexOrder, key) 527 r.mut.Unlock() 528 529 var _created = make(chan struct{}) 530 var _ready = make(chan struct{}) 531 go func() { 532 <-_created 533 r.mut.Lock() 534 defer r.mut.Unlock() 535 r.setStatus(idx, IndexReady) 536 close(_ready) 537 }() 538 539 return _created, _ready, nil 540 } 541 542 // DeleteIndex deletes an index from the registry by its id. First, it marks 543 // the index for deletion but does not remove it, so queries that are using it 544 // may still do so. The returned channel will send a message when the index can 545 // be deleted from disk. 546 // If force is true, it will delete the index even if it's not ready for usage. 547 // Only use that parameter if you know what you're doing. 548 func (r *IndexRegistry) DeleteIndex(db, id string, force bool) (<-chan struct{}, error) { 549 r.mut.RLock() 550 var key indexKey 551 552 if len(r.indexes) == 0 { 553 return nil, ErrIndexNotFound.New(id) 554 } 555 556 var indexNames []string 557 558 for k, idx := range r.indexes { 559 if strings.ToLower(id) == idx.ID() { 560 if !force && !r.CanRemoveIndex(idx) { 561 r.mut.RUnlock() 562 return nil, ErrIndexDeleteInvalidStatus.New(id) 563 } 564 r.setStatus(idx, IndexNotReady) 565 key = k 566 break 567 } 568 indexNames = append(indexNames, idx.ID()) 569 } 570 r.mut.RUnlock() 571 572 if key.id == "" { 573 similar := similartext.Find(indexNames, id) 574 return nil, ErrIndexNotFound.New(id + similar) 575 } 576 577 var done = make(chan struct{}, 1) 578 579 r.rcmut.Lock() 580 // If no query is using this index just delete it right away 581 if force || r.refCounts[key] <= 0 { 582 r.mut.Lock() 583 defer r.mut.Unlock() 584 defer r.rcmut.Unlock() 585 586 delete(r.indexes, key) 587 var pos = -1 588 for i, k := range r.indexOrder { 589 if k == key { 590 pos = i 591 break 592 } 593 } 594 if pos >= 0 { 595 r.indexOrder = append(r.indexOrder[:pos], r.indexOrder[pos+1:]...) 596 } 597 close(done) 598 return done, nil 599 } 600 601 var onReadyToDelete = make(chan struct{}) 602 r.deleteIndexQueue[key] = onReadyToDelete 603 r.rcmut.Unlock() 604 605 go func() { 606 <-onReadyToDelete 607 r.mut.Lock() 608 defer r.mut.Unlock() 609 delete(r.indexes, key) 610 611 done <- struct{}{} 612 }() 613 614 return done, nil 615 } 616 617 type indexKey struct { 618 db, id string 619 } 620 621 type dbTableTuple struct { 622 db, tbl string 623 } 624 625 // IndexStatus represents the current status in which the index is. 626 type IndexStatus byte 627 628 const ( 629 // IndexNotReady means the index is not ready to be used. 630 IndexNotReady IndexStatus = iota 631 // IndexReady means the index can be used. 632 IndexReady 633 // IndexOutdated means the index is loaded but will not be used because the 634 // contents in it are outdated. 635 IndexOutdated 636 ) 637 638 // IsUsable returns whether the index can be used or not based on the status. 639 func (s IndexStatus) IsUsable() bool { 640 return s == IndexReady 641 } 642 643 func (s IndexStatus) String() string { 644 switch s { 645 case IndexReady: 646 return "ready" 647 default: 648 return "not ready" 649 } 650 }