go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/gae/impl/memory/datastore_index_selection.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package memory 16 17 import ( 18 "bytes" 19 "fmt" 20 "sort" 21 "strings" 22 23 "go.chromium.org/luci/common/data/cmpbin" 24 "go.chromium.org/luci/common/data/stringset" 25 26 ds "go.chromium.org/luci/gae/service/datastore" 27 ) 28 29 // ErrMissingIndex is returned when the current indexes are not sufficient 30 // for the current query. 31 type ErrMissingIndex struct { 32 ns string 33 Missing *ds.IndexDefinition 34 } 35 36 func (e *ErrMissingIndex) Error() string { 37 yaml, err := e.Missing.YAMLString() 38 if err != nil { 39 panic(err) 40 } 41 return fmt.Sprintf( 42 "Insufficient indexes. Consider adding:\n%s", yaml) 43 } 44 45 // reducedQuery contains only the pieces of the query necessary to iterate for 46 // results. 47 // 48 // deduplication is applied externally 49 // projection / keysonly / entity retrieval is done externally 50 type reducedQuery struct { 51 kc ds.KeyContext 52 kind string 53 54 // eqFilters indicate the set of all prefix constraints which need to be 55 // fulfilled in the composite query. All of these will translate into prefix 56 // bytes for SOME index. 57 eqFilters map[string]stringset.Set 58 59 // suffixFormat is the PRECISE listing of the suffix columns that ALL indexes 60 // in the multi query will have. 61 // 62 // suffixFormat ALWAYS includes the inequality filter (if any) as the 0th 63 // element 64 // suffixFormat ALWAYS includes any additional projections (in ascending 65 // order) after all user defined sort orders 66 // suffixFormat ALWAYS has __key__ as the last column 67 suffixFormat []ds.IndexColumn 68 69 // limits of the inequality and/or full sort order. This is ONLY a suffix, 70 // and it will be appended to the prefix during iteration. 71 start []byte 72 end []byte 73 74 // metadata describing the total number of columns that this query requires to 75 // execute perfectly. 76 numCols int 77 } 78 79 type indexDefinitionSortable struct { 80 // eqFilts is the list of ACTUAL prefix columns. Note that it may contain 81 // redundant columns! (e.g. (tag, tag) is a perfectly valid prefix, becuase 82 // (tag=1, tag=2) is a perfectly valid query). 83 eqFilts []ds.IndexColumn 84 coll memCollection 85 } 86 87 func (i *indexDefinitionSortable) hasAncestor() bool { 88 return len(i.eqFilts) > 0 && i.eqFilts[0].Property == "__ancestor__" 89 } 90 91 func (i *indexDefinitionSortable) numEqHits(c *constraints) int { 92 ret := 0 93 for _, filt := range i.eqFilts { 94 if _, ok := c.constraints[filt.Property]; ok { 95 ret++ 96 } 97 } 98 return ret 99 } 100 101 type indexDefinitionSortableSlice []indexDefinitionSortable 102 103 func (idxs indexDefinitionSortableSlice) Len() int { return len(idxs) } 104 func (idxs indexDefinitionSortableSlice) Swap(i, j int) { idxs[i], idxs[j] = idxs[j], idxs[i] } 105 func (idxs indexDefinitionSortableSlice) Less(i, j int) bool { 106 a, b := idxs[i], idxs[j] 107 if a.coll == nil && b.coll != nil { 108 return true 109 } else if a.coll != nil && b.coll == nil { 110 return false 111 } 112 113 cmp := len(a.eqFilts) - len(b.eqFilts) 114 if cmp < 0 { 115 return true 116 } else if cmp > 0 { 117 return false 118 } 119 for k, col := range a.eqFilts { 120 ocol := b.eqFilts[k] 121 if !col.Descending && ocol.Descending { 122 return true 123 } else if col.Descending && !ocol.Descending { 124 return false 125 } 126 if col.Property < ocol.Property { 127 return true 128 } else if col.Property > ocol.Property { 129 return false 130 } 131 } 132 return false 133 } 134 135 // maybeAddDefinition possibly adds a new indexDefinitionSortable to this slice. 136 // It's only added if it could be useful in servicing q, otherwise this function 137 // is a noop. 138 // 139 // This returns true iff the proposed index is OK and depletes missingTerms to 140 // empty. 141 // 142 // If the proposed index is PERFECT (e.g. contains enough columns to cover all 143 // equality filters, and also has the correct suffix), idxs will be replaced 144 // with JUST that index, and this will return true. 145 func (idxs *indexDefinitionSortableSlice) maybeAddDefinition(q *reducedQuery, s memStore, missingTerms stringset.Set, id *ds.IndexDefinition) bool { 146 // Kindless queries are handled elsewhere. 147 if id.Kind != q.kind { 148 impossible( 149 fmt.Errorf("maybeAddDefinition given index with wrong kind %q v %q", id.Kind, q.kind)) 150 } 151 152 // If we're an ancestor query, and the index is compound, but doesn't include 153 // an Ancestor field, it doesn't work. Builtin indexes can be used for 154 // ancestor queries (and have !Ancestor), assuming that it's only equality 155 // filters (plus inequality on __key__), or a single inequality. 156 if q.eqFilters["__ancestor__"] != nil && !id.Ancestor && !id.Builtin() { 157 impossible( 158 fmt.Errorf("maybeAddDefinition given compound index with wrong ancestor info: %s %#v", id, q)) 159 } 160 161 // add __ancestor__ if necessary 162 sortBy := id.GetFullSortOrder() 163 164 // If the index has fewer fields than we need for the suffix, it can't 165 // possibly help. 166 if len(sortBy) < len(q.suffixFormat) { 167 return false 168 } 169 170 numEqFilts := len(sortBy) - len(q.suffixFormat) 171 // make sure the orders are precisely the same 172 for i, sb := range sortBy[numEqFilts:] { 173 if q.suffixFormat[i] != sb { 174 return false 175 } 176 } 177 178 if id.Builtin() && numEqFilts == 0 { 179 if len(q.eqFilters) > 1 || (len(q.eqFilters) == 1 && q.eqFilters["__ancestor__"] == nil) { 180 return false 181 } 182 if len(sortBy) > 1 && q.eqFilters["__ancestor__"] != nil { 183 return false 184 } 185 } 186 187 // Make sure the equalities section doesn't contain any properties we don't 188 // want in our query. 189 // 190 // numByProp && totalEqFilts will be used to see if this is a perfect match 191 // later. 192 numByProp := make(map[string]int, len(q.eqFilters)) 193 totalEqFilts := 0 194 195 eqFilts := sortBy[:numEqFilts] 196 for _, p := range eqFilts { 197 if _, ok := q.eqFilters[p.Property]; !ok { 198 return false 199 } 200 numByProp[p.Property]++ 201 totalEqFilts++ 202 } 203 204 // ok, we can actually use this 205 206 // Grab the collection for convenience later. We don't want to invalidate this 207 // index's potential just because the collection doesn't exist. If it's 208 // a builtin and it doesn't exist, it still needs to be one of the 'possible' 209 // indexes... it just means that the user's query will end up with no results. 210 coll := s.GetCollection( 211 fmt.Sprintf("idx:%s:%s", q.kc.Namespace, ds.Serialize.ToBytes(*id.PrepForIdxTable()))) 212 213 // First, see if it's a perfect match. If it is, then our search is over. 214 // 215 // A perfect match contains ALL the equality filter columns (or more, since 216 // we can use residuals to fill in the extras). 217 for _, sb := range eqFilts { 218 missingTerms.Del(sb.Property) 219 } 220 221 perfect := false 222 if len(sortBy) == q.numCols { 223 perfect = true 224 for k, num := range numByProp { 225 if num < q.eqFilters[k].Len() { 226 perfect = false 227 break 228 } 229 } 230 } 231 toAdd := indexDefinitionSortable{coll: coll, eqFilts: eqFilts} 232 if perfect { 233 *idxs = indexDefinitionSortableSlice{toAdd} 234 } else { 235 *idxs = append(*idxs, toAdd) 236 } 237 return missingTerms.Len() == 0 238 } 239 240 // getRelevantIndexes retrieves the relevant indexes which could be used to 241 // service q. It returns nil if it's not possible to service q with the current 242 // indexes. 243 func getRelevantIndexes(q *reducedQuery, s memStore) (indexDefinitionSortableSlice, error) { 244 missingTerms := stringset.New(len(q.eqFilters)) 245 for k := range q.eqFilters { 246 if k == "__ancestor__" { 247 // ancestor is not a prefix which can be satisfied by a single index. It 248 // must be satisfied by ALL indexes (and has special logic for this in 249 // the addDefinition logic) 250 continue 251 } 252 missingTerms.Add(k) 253 } 254 idxs := indexDefinitionSortableSlice{} 255 256 // First we add builtins 257 // add 258 // idx:KIND 259 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ 260 Kind: q.kind, 261 }) { 262 return idxs, nil 263 } 264 265 // add 266 // idx:KIND:prop 267 // idx:KIND:-prop 268 props := stringset.New(len(q.eqFilters) + len(q.suffixFormat)) 269 for prop := range q.eqFilters { 270 props.Add(prop) 271 } 272 for _, col := range q.suffixFormat[:len(q.suffixFormat)-1] { 273 props.Add(col.Property) 274 } 275 for _, prop := range props.ToSlice() { 276 if !isSpecialProp(prop) && (strings.HasPrefix(prop, "__") && strings.HasSuffix(prop, "__")) { 277 continue 278 } 279 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ 280 Kind: q.kind, 281 SortBy: []ds.IndexColumn{ 282 {Property: prop}, 283 }, 284 }) { 285 return idxs, nil 286 } 287 if idxs.maybeAddDefinition(q, s, missingTerms, &ds.IndexDefinition{ 288 Kind: q.kind, 289 SortBy: []ds.IndexColumn{ 290 {Property: prop, Descending: true}, 291 }, 292 }) { 293 return idxs, nil 294 } 295 } 296 297 // Try adding all compound indexes whose suffix matches. 298 suffix := &ds.IndexDefinition{ 299 Kind: q.kind, 300 Ancestor: q.eqFilters["__ancestor__"] != nil, 301 SortBy: q.suffixFormat, 302 } 303 walkCompIdxs(s, suffix, func(def *ds.IndexDefinition) bool { 304 // keep walking until we find a perfect index. 305 return !idxs.maybeAddDefinition(q, s, missingTerms, def) 306 }) 307 308 // this query is impossible to fulfill with the current indexes. Not all the 309 // terms (equality + projection) are satisfied. 310 if missingTerms.Len() > 0 || len(idxs) == 0 { 311 remains := &ds.IndexDefinition{ 312 Kind: q.kind, 313 Ancestor: q.eqFilters["__ancestor__"] != nil, 314 } 315 terms := missingTerms.ToSlice() 316 if serializationDeterministic { 317 sort.Strings(terms) 318 } 319 for _, term := range terms { 320 remains.SortBy = append(remains.SortBy, ds.IndexColumn{Property: term}) 321 } 322 remains.SortBy = append(remains.SortBy, q.suffixFormat...) 323 last := remains.SortBy[len(remains.SortBy)-1] 324 if !last.Descending { 325 // this removes the __key__ column, since it's implicit. 326 remains.SortBy = remains.SortBy[:len(remains.SortBy)-1] 327 } 328 if remains.Builtin() { 329 impossible( 330 fmt.Errorf("recommended missing index would be a builtin: %s", remains)) 331 } 332 return nil, &ErrMissingIndex{q.kc.Namespace, remains} 333 } 334 335 return idxs, nil 336 } 337 338 // generate generates a single iterDefinition for the given index. 339 func generate(q *reducedQuery, idx *indexDefinitionSortable, c *constraints) *iterDefinition { 340 def := &iterDefinition{ 341 c: idx.coll, 342 start: q.start, 343 end: q.end, 344 } 345 toJoin := make([][]byte, len(idx.eqFilts)) 346 for _, sb := range idx.eqFilts { 347 val := c.peel(sb.Property) 348 if sb.Descending { 349 val = cmpbin.InvertBytes(val) 350 } 351 toJoin = append(toJoin, val) 352 } 353 def.prefix = bytes.Join(toJoin, nil) 354 def.prefixLen = len(def.prefix) 355 356 if q.eqFilters["__ancestor__"] != nil && !idx.hasAncestor() { 357 // The query requires an ancestor, but the index doesn't explicitly have it 358 // as part of the prefix (otherwise it would have been the first eqFilt 359 // above). This happens when it's a builtin index, or if it's the primary 360 // index (for a kindless query), or if it's the Kind index (for a filterless 361 // query). 362 // 363 // builtin indexes are: 364 // Kind/__key__ 365 // Kind/Prop/__key__ 366 // Kind/Prop/-__key__ 367 if len(q.suffixFormat) > 2 || q.suffixFormat[len(q.suffixFormat)-1].Property != "__key__" { 368 // This should never happen. One of the previous validators would have 369 // selected a different index. But just in case. 370 impossible(fmt.Errorf("cannot supply an implicit ancestor for %#v", idx)) 371 } 372 373 // get the only value out of __ancestor__ 374 anc, _ := q.eqFilters["__ancestor__"].Peek() 375 376 // Intentionally do NOT update prefixLen. This allows multiIterator to 377 // correctly include the entire key in the shared iterator suffix, instead 378 // of just the remainder. 379 380 // chop the terminal null byte off the q.ancestor key... we can accept 381 // anything which is a descendant or an exact match. Removing the last byte 382 // from the key (the terminating null) allows this trick to work. Otherwise 383 // it would be a closed range of EXACTLY this key. 384 chopped := []byte(anc[:len(anc)-1]) 385 if q.suffixFormat[0].Descending { 386 chopped = cmpbin.InvertBytes(chopped) 387 } 388 def.prefix = cmpbin.ConcatBytes(def.prefix, chopped) 389 390 // Update start and end, since we know that if they contain anything, they 391 // contain values for the __key__ field. This is necessary because bytes 392 // are shifting from the suffix to the prefix, and start/end should only 393 // contain suffix (variable) bytes. 394 if def.start != nil { 395 if !bytes.HasPrefix(def.start, chopped) { 396 // again, shouldn't happen, but if it does, we want to know about it. 397 impossible(fmt.Errorf( 398 "start suffix for implied ancestor doesn't start with ancestor! start:%v ancestor:%v", 399 def.start, chopped)) 400 } 401 def.start = def.start[len(chopped):] 402 } 403 if def.end != nil { 404 if !bytes.HasPrefix(def.end, chopped) { 405 impossible(fmt.Errorf( 406 "end suffix for implied ancestor doesn't start with ancestor! end:%v ancestor:%v", 407 def.end, chopped)) 408 } 409 def.end = def.end[len(chopped):] 410 } 411 } 412 413 return def 414 } 415 416 type constraints struct { 417 constraints map[string][][]byte 418 original map[string][][]byte 419 residualMapping map[string]int 420 } 421 422 // peel picks a constraint value for the property. It then removes this value 423 // from constraints (possibly removing the entire row from constraints if it 424 // was the last value). If the value wasn't available in constraints, it picks 425 // the value from residuals. 426 func (c *constraints) peel(prop string) []byte { 427 ret := []byte(nil) 428 if vals, ok := c.constraints[prop]; ok { 429 ret = vals[0] 430 if len(vals) == 1 { 431 delete(c.constraints, prop) 432 } else { 433 c.constraints[prop] = vals[1:] 434 } 435 } else { 436 row := c.original[prop] 437 idx := c.residualMapping[prop] 438 c.residualMapping[prop]++ 439 ret = row[idx%len(row)] 440 } 441 return ret 442 } 443 444 func (c *constraints) empty() bool { 445 return len(c.constraints) == 0 446 } 447 448 // calculateConstraints produces a mapping of all equality filters to the values 449 // that they're constrained to. It also calculates residuals, which are an 450 // arbitrary value for filling index prefixes which have more equality fields 451 // than are necessary. The value doesn't matter, as long as its an equality 452 // constraint in the original query. 453 func calculateConstraints(q *reducedQuery) *constraints { 454 ret := &constraints{ 455 original: make(map[string][][]byte, len(q.eqFilters)), 456 constraints: make(map[string][][]byte, len(q.eqFilters)), 457 residualMapping: make(map[string]int), 458 } 459 for prop, vals := range q.eqFilters { 460 bvals := make([][]byte, 0, vals.Len()) 461 vals.Iter(func(val string) bool { 462 bvals = append(bvals, []byte(val)) 463 return true 464 }) 465 ret.original[prop] = bvals 466 if prop == "__ancestor__" { 467 // exclude __ancestor__ from the constraints. 468 // 469 // This is because it's handled specially during index proposal and 470 // generation. Ancestor is used by ALL indexes, and so its residual value 471 // in ret.original above will be sufficient. 472 continue 473 } 474 ret.constraints[prop] = bvals 475 } 476 return ret 477 } 478 479 // getIndexes returns a set of iterator definitions. Iterating over these 480 // will result in matching suffixes. 481 func getIndexes(q *reducedQuery, s memStore) ([]*iterDefinition, error) { 482 relevantIdxs := indexDefinitionSortableSlice(nil) 483 if q.kind == "" { 484 if coll := s.GetCollection("ents:" + q.kc.Namespace); coll != nil { 485 relevantIdxs = indexDefinitionSortableSlice{{coll: coll}} 486 } 487 } else { 488 err := error(nil) 489 relevantIdxs, err = getRelevantIndexes(q, s) 490 if err != nil { 491 return nil, err 492 } 493 } 494 if len(relevantIdxs) == 0 { 495 return nil, ds.ErrNullQuery 496 } 497 498 // This sorts it so that relevantIdxs goes less filters -> more filters. We 499 // traverse this list backwards, however, so we traverse it in more filters -> 500 // less filters order. 501 sort.Sort(relevantIdxs) 502 503 constraints := calculateConstraints(q) 504 505 ret := []*iterDefinition{} 506 for !constraints.empty() || len(ret) == 0 { 507 bestIdx := (*indexDefinitionSortable)(nil) 508 if len(ret) == 0 { 509 // if ret is empty, take the biggest relevantIdx. It's guaranteed to have 510 // the greatest number of equality filters of any index in the list, and 511 // we know that every equality filter will be pulled from constraints and 512 // not residual. 513 // 514 // This also takes care of the case when the query has no equality filters, 515 // in which case relevantIdxs will actually only contain one index anyway 516 // :) 517 bestIdx = &relevantIdxs[len(relevantIdxs)-1] 518 if bestIdx.coll == nil { 519 return nil, ds.ErrNullQuery 520 } 521 } else { 522 // If ret's not empty, then we need to find the best index we can. The 523 // best index will be the one with the most matching equality columns. 524 // Since relevantIdxs is sorted primarially by the number of equality 525 // columns, we walk down the list until the number of possible columns is 526 // worse than our best-so-far. 527 // 528 // Traversing the list backwards goes from more filters -> less filters, 529 // but also allows us to remove items from the list as we iterate over it. 530 bestNumEqHits := 0 531 for i := len(relevantIdxs) - 1; i >= 0; i-- { 532 idx := &relevantIdxs[i] 533 if len(idx.eqFilts) < bestNumEqHits { 534 // if the number of filters drops below our best hit, it's never going 535 // to get better than that. This index might be helpful on a later 536 // loop though, so don't remove it. 537 break 538 } 539 numHits := 0 540 if idx.coll != nil { 541 numHits = idx.numEqHits(constraints) 542 } 543 if numHits > bestNumEqHits { 544 bestNumEqHits = numHits 545 bestIdx = idx 546 } else if numHits == 0 { 547 // This index will never become useful again, so remove it. 548 relevantIdxs = append(relevantIdxs[:i], relevantIdxs[i+1:]...) 549 } 550 } 551 } 552 if bestIdx == nil { 553 // something is really wrong here... if relevantIdxs is !nil, then we 554 // should always be able to make progress in this loop. 555 impossible(fmt.Errorf("deadlock: cannot fulfil query?")) 556 } 557 ret = append(ret, generate(q, bestIdx, constraints)) 558 } 559 560 return ret, nil 561 }