github.com/dolthub/go-mysql-server@v0.18.0/sql/analyzer/index_analyzer.go (about) 1 // Copyright 2020-2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package analyzer 16 17 import ( 18 "sort" 19 "strings" 20 21 "github.com/dolthub/go-mysql-server/sql/transform" 22 23 "github.com/dolthub/go-mysql-server/sql" 24 "github.com/dolthub/go-mysql-server/sql/plan" 25 ) 26 27 type indexAnalyzer struct { 28 // TODO: these need to be qualified by database name as well to be valid. Otherwise we can't distinguish between two 29 // tables with the same name in different databases. But right now table nodes aren't qualified by their resolved 30 // database in the plan, so we can't do this. 31 indexesByTable map[string][]sql.Index 32 indexRegistry *sql.IndexRegistry 33 registryIdxes []sql.Index 34 } 35 36 // newIndexAnalyzerForNode returns an analyzer for indexes available in the node given, keyed by the table name. These 37 // might come from either the tables themselves natively, or else from an index driver that has indexes for the tables 38 // included in the nodes. Indexes are keyed by the aliased name of the table, if applicable. These names must be 39 // unaliased when matching against the names of tables in index definitions. 40 func newIndexAnalyzerForNode(ctx *sql.Context, n sql.Node) (*indexAnalyzer, error) { 41 var analysisErr error 42 indexes := make(map[string][]sql.Index) 43 44 var indexesForTable = func(name string, table sql.Table) error { 45 name = strings.ToLower(name) 46 it, ok := table.(sql.IndexAddressableTable) 47 48 if !ok { 49 return nil 50 } 51 52 idxes, err := it.GetIndexes(ctx) 53 if err != nil { 54 return err 55 } 56 57 indexes[name] = append(indexes[name], idxes...) 58 return nil 59 } 60 61 // Find all of the native indexed tables in the node (those that don't require a driver) 62 if n != nil { 63 transform.Inspect(n, func(n sql.Node) bool { 64 switch n := n.(type) { 65 // Because we previously pushed filters as close to their relevant tables as possible, we know that there 66 // cannot be another Filter between our node and any tables with relevant indexes. 67 case *plan.Filter: 68 return false 69 case *plan.TableAlias: 70 rt, ok := n.Child.(sql.TableNode) 71 if !ok { 72 return false 73 } 74 75 err := indexesForTable(n.Name(), rt.UnderlyingTable()) 76 if err != nil { 77 analysisErr = err 78 return false 79 } 80 81 return false 82 case *plan.ResolvedTable: 83 err := indexesForTable(n.Name(), n.UnderlyingTable()) 84 if err != nil { 85 analysisErr = err 86 return false 87 } 88 case *plan.IndexedTableAccess: 89 err := indexesForTable(n.Name(), n.TableNode.UnderlyingTable()) 90 if err != nil { 91 analysisErr = err 92 return false 93 } 94 } 95 return true 96 }) 97 } 98 99 if analysisErr != nil { 100 return nil, analysisErr 101 } 102 103 var idxRegistry *sql.IndexRegistry 104 if ctx.GetIndexRegistry().HasIndexes() { 105 idxRegistry = ctx.GetIndexRegistry() 106 } 107 108 return &indexAnalyzer{ 109 indexesByTable: indexes, 110 indexRegistry: idxRegistry, 111 }, nil 112 } 113 114 // IndexesByTable returns all indexes on the table named. The table must be present in the node used to create the 115 // analyzer. 116 func (r *indexAnalyzer) IndexesByTable(ctx *sql.Context, db, table string) []sql.Index { 117 indexes := r.indexesByTable[strings.ToLower(table)] 118 119 if r.indexRegistry != nil { 120 idxes := r.indexRegistry.IndexesByTable(db, table) 121 for _, idx := range idxes { 122 indexes = append(indexes, idx) 123 } 124 } 125 126 return indexes 127 } 128 129 // MatchingIndex returns the index that best fits the given expressions. See MatchingIndexes for the rules regarding 130 // which index is considered the best. 131 func (r *indexAnalyzer) MatchingIndex(ctx *sql.Context, table, db string, exprs ...sql.Expression) sql.Index { 132 indexes := r.MatchingIndexes(ctx, table, db, exprs...) 133 if len(indexes) > 0 { 134 return indexes[0] 135 } 136 return nil 137 } 138 139 // MatchingIndexes returns a list of all matching indexes for the given expressions. The returned order of the indexes 140 // are deterministic and follow the given rules, from the highest priority in descending order: 141 // 142 // 1. Expressions exactly match the index 143 // 2. Expressions match as much of the index prefix as possible 144 // 3. Primary Key index ordered before secondary indexes 145 // TODO: for rule 3, we want to prioritize "covering" indexes over non-covering indexes, but sql.Index doesn't 146 // provide the necessary information to evaluate this condition. Primary Key status approximates it. 147 // 4. Largest index by expression count 148 // 5. Index ID in ascending order 149 // 150 // It is worth noting that all returned indexes will have at least the first index expression satisfied (creating a 151 // partial index), as otherwise the index would be no better than a table scan (for which integrators may have 152 // optimizations). 153 func (r *indexAnalyzer) MatchingIndexes(ctx *sql.Context, table, db string, exprs ...sql.Expression) []sql.Index { 154 // As multiple expressions may be the same, we filter out duplicates 155 distinctExprs := make(map[string]struct{}) 156 var exprStrs []string 157 for _, e := range exprs { 158 es := strings.ToLower(e.String()) 159 if _, ok := distinctExprs[es]; !ok { 160 distinctExprs[es] = struct{}{} 161 exprStrs = append(exprStrs, es) 162 } 163 } 164 165 type idxWithLen struct { 166 sql.Index 167 exprLen int 168 prefixCount int 169 } 170 171 var indexes []idxWithLen 172 for _, idx := range r.indexesByTable[strings.ToLower(table)] { 173 indexExprs := idx.Expressions() 174 if ok, prefixCount := exprsAreIndexSubset(exprStrs, indexExprs); ok && prefixCount >= 1 { 175 indexes = append(indexes, idxWithLen{idx, len(indexExprs), prefixCount}) 176 } 177 } 178 179 if r.indexRegistry != nil { 180 idx, prefixCount, err := r.indexRegistry.MatchingIndex(ctx, db, exprs...) 181 if err != nil { 182 // We just abandon indexes rather than returning an error here 183 return nil 184 } 185 if idx != nil && prefixCount >= 1 { 186 r.registryIdxes = append(r.registryIdxes, idx) 187 indexes = append(indexes, idxWithLen{idx, len(idx.Expressions()), prefixCount}) 188 } 189 } 190 191 exprLen := len(exprStrs) 192 sort.Slice(indexes, func(i, j int) bool { 193 idxI := indexes[i] 194 idxJ := indexes[j] 195 if idxI.exprLen == exprLen && idxJ.exprLen != exprLen { 196 return true 197 } else if idxI.exprLen != exprLen && idxJ.exprLen == exprLen { 198 return false 199 } else if idxI.prefixCount != idxJ.prefixCount { 200 return idxI.prefixCount > idxJ.prefixCount 201 // TODO: ID() == "PRIMARY" is purely convention 202 } else if idxI.ID() == "PRIMARY" || idxJ.ID() == "PRIMARY" { 203 return idxI.ID() == "PRIMARY" 204 } else if idxI.exprLen != idxJ.exprLen { 205 return idxI.exprLen > idxJ.exprLen 206 } else { 207 return idxI.Index.ID() < idxJ.Index.ID() 208 } 209 }) 210 sortedIndexes := make([]sql.Index, len(indexes)) 211 for i := 0; i < len(sortedIndexes); i++ { 212 sortedIndexes[i] = indexes[i].Index 213 } 214 return sortedIndexes 215 } 216 217 // ExpressionsWithIndexes finds all the combinations of expressions with matching indexes. This only matches 218 // multi-column indexes. Sorts the list of expressions by their length in descending order. 219 func (r *indexAnalyzer) ExpressionsWithIndexes(db string, exprs ...sql.Expression) [][]sql.Expression { 220 var results [][]sql.Expression 221 222 // First find matches in the native indexes 223 for _, idxes := range r.indexesByTable { 224 Indexes: 225 for _, idx := range idxes { 226 var used = make(map[int]struct{}) 227 var matched []sql.Expression 228 for _, ie := range idx.Expressions() { 229 var found bool 230 for i, e := range exprs { 231 if _, ok := used[i]; ok { 232 continue 233 } 234 235 if strings.EqualFold(ie, e.String()) { 236 used[i] = struct{}{} 237 found = true 238 matched = append(matched, e) 239 break 240 } 241 } 242 243 if !found { 244 break 245 } 246 } 247 if len(matched) == 0 { 248 continue Indexes 249 } 250 251 results = append(results, matched) 252 } 253 } 254 255 // Expand the search to the index registry if present 256 if r.indexRegistry != nil { 257 indexes := r.indexRegistry.ExpressionsWithIndexes(db, exprs...) 258 results = append(results, indexes...) 259 } 260 261 sort.SliceStable(results, func(i, j int) bool { 262 return len(results[i]) > len(results[j]) 263 }) 264 return results 265 } 266 267 // releaseUsedIndexes should be called in the top level function of index analysis to return any held res 268 func (r *indexAnalyzer) releaseUsedIndexes() { 269 if r.indexRegistry == nil { 270 return 271 } 272 273 for _, i := range r.registryIdxes { 274 if i != nil { 275 r.indexRegistry.ReleaseIndex(i) 276 } 277 } 278 } 279 280 // exprsAreIndexSubset returns whether exprs are a subset of indexExprs. If they are a subset, then also returns how 281 // many expressions are the prefix to the index expressions. If the first index expression is not present, then the scan 282 // is equivalent to a table scan (which may have special optimizations that do not apply to an index scan). With at 283 // least the first index expression (prefixCount >= 1), the searchable area for the index is limited, making an index 284 // scan useful. It is assumed that indexExprs are ordered by their declaration. For example `INDEX (v3, v2, v1)` would 285 // pass in `[]string{"v3", "v2", v1"}` and no other order. 286 // 287 // The returned prefixCount states how many expressions are a part of the index prefix. If len(exprs) == prefixCount 288 // then all of the expressions are a prefix. If prefixCount == 0 then no expressions are part of the index prefix. This 289 // is not recommended for direct index usage, but should instead be used for indexes that may intersect another. 290 // 291 // Using the above example index, the filter (v2 < 5 AND v1 < 5) is a subset but not a prefix. However, it may be 292 // intersected with (v3 > 1 AND v1 > 1) which contains a prefix (but is not a prefix in its entirety). 293 func exprsAreIndexSubset(exprs, indexExprs []string) (ok bool, prefixCount int) { 294 if len(exprs) > len(indexExprs) { 295 return false, 0 296 } 297 298 visitedIndexExprs := make([]bool, len(indexExprs)) 299 for _, expr := range exprs { 300 found := false 301 for j, indexExpr := range indexExprs { 302 if visitedIndexExprs[j] { 303 continue 304 } 305 if strings.EqualFold(expr, indexExpr) { 306 visitedIndexExprs[j] = true 307 found = true 308 break 309 } 310 } 311 if !found { 312 return false, 0 313 } 314 } 315 316 // This checks the length of the prefix by checking how many true booleans are encountered before the first false 317 for i, visitedExpr := range visitedIndexExprs { 318 if visitedExpr { 319 continue 320 } 321 return true, i 322 } 323 324 return true, len(exprs) 325 }