github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvclient/kvcoord/range_cache.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvcoord 12 13 import ( 14 "bytes" 15 "context" 16 "fmt" 17 "strconv" 18 "strings" 19 "sync" 20 "time" 21 22 "github.com/biogo/store/llrb" 23 "github.com/cockroachdb/cockroach/pkg/keys" 24 "github.com/cockroachdb/cockroach/pkg/kv/kvbase" 25 "github.com/cockroachdb/cockroach/pkg/roachpb" 26 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 27 "github.com/cockroachdb/cockroach/pkg/util/cache" 28 "github.com/cockroachdb/cockroach/pkg/util/contextutil" 29 "github.com/cockroachdb/cockroach/pkg/util/log" 30 "github.com/cockroachdb/cockroach/pkg/util/stop" 31 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 32 "github.com/cockroachdb/cockroach/pkg/util/syncutil/singleflight" 33 "github.com/cockroachdb/cockroach/pkg/util/tracing" 34 "github.com/cockroachdb/errors" 35 "github.com/cockroachdb/logtags" 36 "github.com/opentracing/opentracing-go" 37 ) 38 39 // rangeCacheKey is the key type used to store and sort values in the 40 // RangeCache. 41 type rangeCacheKey roachpb.RKey 42 43 func (a rangeCacheKey) String() string { 44 return roachpb.Key(a).String() 45 } 46 47 // Compare implements the llrb.Comparable interface for rangeCacheKey, so that 48 // it can be used as a key for util.OrderedCache. 49 func (a rangeCacheKey) Compare(b llrb.Comparable) int { 50 return bytes.Compare(a, b.(rangeCacheKey)) 51 } 52 53 // RangeDescriptorDB is a type which can query range descriptors from an 54 // underlying datastore. This interface is used by RangeDescriptorCache to 55 // initially retrieve information which will be cached. 56 type RangeDescriptorDB interface { 57 // RangeLookup takes a key to look up descriptors for. Two slices of range 58 // descriptors are returned. The first of these slices holds descriptors 59 // whose [startKey,endKey) spans contain the given key (possibly from 60 // intents), and the second holds prefetched adjacent descriptors. 61 RangeLookup( 62 ctx context.Context, key roachpb.RKey, useReverseScan bool, 63 ) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error) 64 65 // FirstRange returns the descriptor for the first Range. This is the 66 // Range containing all meta1 entries. 67 FirstRange() (*roachpb.RangeDescriptor, error) 68 } 69 70 // RangeDescriptorCache is used to retrieve range descriptors for 71 // arbitrary keys. Descriptors are initially queried from storage 72 // using a RangeDescriptorDB, but are cached for subsequent lookups. 73 type RangeDescriptorCache struct { 74 st *cluster.Settings 75 stopper *stop.Stopper 76 // RangeDescriptorDB is used to retrieve range descriptors from the 77 // database, which will be cached by this structure. 78 db RangeDescriptorDB 79 // rangeCache caches replica metadata for key ranges. The cache is 80 // filled while servicing read and write requests to the key value 81 // store. 82 rangeCache struct { 83 syncutil.RWMutex 84 cache *cache.OrderedCache 85 } 86 // lookupRequests stores all inflight requests retrieving range 87 // descriptors from the database. It allows multiple RangeDescriptorDB 88 // lookup requests for the same inferred range descriptor to be 89 // multiplexed onto the same database lookup. See makeLookupRequestKey 90 // for details on this inference. 91 lookupRequests singleflight.Group 92 93 // coalesced, if not nil, is sent on every time a request is coalesced onto 94 // another in-flight one. Used by tests to block until a lookup request is 95 // blocked on the single-flight querying the db. 96 coalesced chan struct{} 97 } 98 99 // RangeDescriptorCache implements the kvbase interface. 100 var _ kvbase.RangeDescriptorCache = (*RangeDescriptorCache)(nil) 101 102 type lookupResult struct { 103 desc *roachpb.RangeDescriptor 104 evictToken *EvictionToken 105 } 106 107 // makeLookupRequestKey constructs a key for the lookupRequest group with the 108 // goal of mapping all requests which are inferred to be looking for the same 109 // descriptor onto the same request key to establish request coalescing. 110 // 111 // If the key is part of a descriptor that we previously had cached (but the 112 // cache entry is stale), we use that previous descriptor to coalesce all 113 // requests for keys within it into a single request. Namely, there are three 114 // possible events that may have happened causing our cache to be stale. For 115 // each of these, we try to coalesce all requests that will end up on the same 116 // range post-event together. 117 // - Split: for a split, only the right half of the split will attempt to evict 118 // the stale descriptor because only the right half will be sending to 119 // the wrong range. Once this stale descriptor is evicted, keys from 120 // both halves of the split will miss the cache. Because both sides of 121 // the split will now map to the same lookupResult, it is important to 122 // use EvictAndReplace if possible to insert one of the two new descriptors. 123 // This way, no requests to that descriptor will ever miss the cache and 124 // risk being coalesced into the other request. If this is not possible, 125 // the lookup will still work, but it will require multiple lookups, which 126 // will be launched in series when requests find that their desired key 127 // is outside of the returned descriptor. 128 // - Merges: for a merge, the left half of the merge will never notice. The right 129 // half of the merge will suddenly find its descriptor to be stale, so 130 // it will evict and lookup the new descriptor. We set the key to hash 131 // to the start of the stale descriptor for lookup requests to the right 132 // half of the merge so that all requests will be coalesced to the same 133 // lookupRequest. 134 // - Rebal: for a rebalance, the entire descriptor will suddenly go stale and 135 // requests to it will evict the descriptor. We set the key to hash to 136 // the start of the stale descriptor for lookup requests to the rebalanced 137 // descriptor so that all requests will be coalesced to the same lookupRequest. 138 // 139 // Note that the above description assumes that useReverseScan is false for simplicity. 140 // If useReverseScan is true, we need to use the end key of the stale descriptor instead. 141 func makeLookupRequestKey( 142 key roachpb.RKey, prevDesc *roachpb.RangeDescriptor, useReverseScan bool, 143 ) string { 144 var ret strings.Builder 145 // We only want meta1, meta2, user range lookups to be coalesced with other 146 // meta1, meta2, user range lookups, respectively. Otherwise, deadlocks could 147 // happen due to singleflight. If the range lookup is in a meta range, we 148 // prefix the request key with the corresponding meta prefix to disambiguate 149 // the different lookups. 150 if key.AsRawKey().Compare(keys.Meta1KeyMax) < 0 { 151 ret.Write(keys.Meta1Prefix) 152 } else if key.AsRawKey().Compare(keys.Meta2KeyMax) < 0 { 153 ret.Write(keys.Meta2Prefix) 154 } 155 if prevDesc != nil { 156 if useReverseScan { 157 key = prevDesc.EndKey 158 } else { 159 key = prevDesc.StartKey 160 } 161 } 162 ret.Write(key) 163 ret.WriteString(":") 164 ret.WriteString(strconv.FormatBool(useReverseScan)) 165 // Add the generation of the previous descriptor to the lookup request key to 166 // decrease the number of lookups in the rare double split case. Suppose we 167 // have a range [a, e) that gets split into [a, c) and [c, e). The requests 168 // on [c, e) will fail and will have to retry the lookup. If [a, c) gets 169 // split again into [a, b) and [b, c), we don't want to the requests on [a, 170 // b) to be coalesced with the retried requests on [c, e). To distinguish the 171 // two cases, we can use the generation of the previous descriptor. 172 if prevDesc != nil { 173 ret.WriteString(":") 174 ret.WriteString(strconv.FormatInt(prevDesc.Generation, 10)) 175 } 176 return ret.String() 177 } 178 179 // NewRangeDescriptorCache returns a new RangeDescriptorCache which 180 // uses the given RangeDescriptorDB as the underlying source of range 181 // descriptors. 182 func NewRangeDescriptorCache( 183 st *cluster.Settings, db RangeDescriptorDB, size func() int64, stopper *stop.Stopper, 184 ) *RangeDescriptorCache { 185 rdc := &RangeDescriptorCache{st: st, db: db, stopper: stopper} 186 rdc.rangeCache.cache = cache.NewOrderedCache(cache.Config{ 187 Policy: cache.CacheLRU, 188 ShouldEvict: func(n int, _, _ interface{}) bool { 189 return int64(n) > size() 190 }, 191 }) 192 return rdc 193 } 194 195 func (rdc *RangeDescriptorCache) String() string { 196 rdc.rangeCache.RLock() 197 defer rdc.rangeCache.RUnlock() 198 return rdc.stringLocked() 199 } 200 201 func (rdc *RangeDescriptorCache) stringLocked() string { 202 var buf strings.Builder 203 rdc.rangeCache.cache.Do(func(k, v interface{}) bool { 204 fmt.Fprintf(&buf, "key=%s desc=%+v\n", roachpb.Key(k.(rangeCacheKey)), v) 205 return false 206 }) 207 return buf.String() 208 } 209 210 // EvictionToken holds eviction state between calls to LookupRangeDescriptor. 211 type EvictionToken struct { 212 // rdc is the cache that produced this token - and that will be modified by 213 // Evict(). 214 rdc *RangeDescriptorCache 215 216 // desc is the descriptor that this EvictionToken refers to - the descriptor 217 // that Evict() will evict from rdc. 218 desc *roachpb.RangeDescriptor 219 // nextDesc, if not nil, is the descriptor that should replace desc if desc 220 // proves to be stale - i.e. nextDesc is inserted in the cache automatically 221 // by Evict(). This is used when the range descriptor lookup that populated 222 // the cache returned an intent in addition to the current descriptor value. 223 nextDesc *roachpb.RangeDescriptor 224 225 evictOnce sync.Once // assures that desc is only evicted once 226 } 227 228 func (rdc *RangeDescriptorCache) makeEvictionToken( 229 desc *roachpb.RangeDescriptor, nextDesc *roachpb.RangeDescriptor, 230 ) *EvictionToken { 231 return &EvictionToken{ 232 rdc: rdc, 233 desc: desc, 234 nextDesc: nextDesc, 235 } 236 } 237 238 // Evict instructs the EvictionToken to evict the RangeDescriptor it was created 239 // with from the RangeDescriptorCache. 240 func (et *EvictionToken) Evict(ctx context.Context) { 241 et.EvictAndReplace(ctx) 242 } 243 244 // EvictAndReplace instructs the EvictionToken to evict the RangeDescriptor it was 245 // created with from the RangeDescriptorCache. It also allows the user to provide 246 // new RangeDescriptors to insert into the cache, all atomically. When called without 247 // arguments, EvictAndReplace will behave the same as Evict. 248 func (et *EvictionToken) EvictAndReplace(ctx context.Context, newDescs ...roachpb.RangeDescriptor) { 249 et.evictOnce.Do(func() { 250 et.rdc.rangeCache.Lock() 251 defer et.rdc.rangeCache.Unlock() 252 et.rdc.evictCachedRangeDescriptorLocked(ctx, et.desc) 253 if len(newDescs) > 0 { 254 log.Eventf(ctx, "evicting cached range descriptor with %d replacements", len(newDescs)) 255 et.rdc.insertRangeDescriptorsLocked(ctx, newDescs...) 256 } else if et.nextDesc != nil { 257 log.Eventf(ctx, "evicting cached range descriptor with replacement from token") 258 et.rdc.insertRangeDescriptorsLocked(ctx, *et.nextDesc) 259 } else { 260 log.Eventf(ctx, "evicting cached range descriptor") 261 } 262 }) 263 } 264 265 // LookupRangeDescriptorWithEvictionToken attempts to locate a descriptor for the range 266 // containing the given Key. This is done by first trying the cache, and then 267 // querying the two-level lookup table of range descriptors which cockroach 268 // maintains. The function should be provided with an EvictionToken if one was 269 // acquired from this function on a previous lookup. If not, an empty 270 // EvictionToken can be provided. 271 // 272 // This method first looks up the specified key in the first level of 273 // range metadata, which returns the location of the key within the 274 // second level of range metadata. This second level location is then 275 // queried to retrieve a descriptor for the range where the key's 276 // value resides. Range descriptors retrieved during each search are 277 // cached for subsequent lookups. 278 // 279 // This method returns the RangeDescriptor for the range containing 280 // the key's data and a token to manage evicting the RangeDescriptor 281 // if it is found to be stale, or an error if any occurred. 282 func (rdc *RangeDescriptorCache) LookupRangeDescriptorWithEvictionToken( 283 ctx context.Context, key roachpb.RKey, evictToken *EvictionToken, useReverseScan bool, 284 ) (*roachpb.RangeDescriptor, *EvictionToken, error) { 285 return rdc.lookupRangeDescriptorInternal(ctx, key, evictToken, useReverseScan) 286 } 287 288 // LookupRangeDescriptor presents a simpler interface for looking up a 289 // RangeDescriptor for a key without the eviction tokens or scan direction 290 // control of LookupRangeDescriptorWithEvictionToken. This method is exported 291 // to lower level clients through the kvbase.RangeDescriptorCache interface. 292 func (rdc *RangeDescriptorCache) LookupRangeDescriptor( 293 ctx context.Context, key roachpb.RKey, 294 ) (*roachpb.RangeDescriptor, error) { 295 rd, _, err := rdc.lookupRangeDescriptorInternal(ctx, key, nil, false) 296 return rd, err 297 } 298 299 // lookupRangeDescriptorInternal is called from LookupRangeDescriptor or from tests. 300 // 301 // If a WaitGroup is supplied, it is signaled when the request is 302 // added to the inflight request map (with or without merging) or the 303 // function finishes. Used for testing. 304 func (rdc *RangeDescriptorCache) lookupRangeDescriptorInternal( 305 ctx context.Context, key roachpb.RKey, evictToken *EvictionToken, useReverseScan bool, 306 ) (*roachpb.RangeDescriptor, *EvictionToken, error) { 307 // Retry while we're hitting lookupCoalescingErrors. 308 for { 309 desc, newToken, err := rdc.tryLookupRangeDescriptor(ctx, key, evictToken, useReverseScan) 310 if errors.HasType(err, (lookupCoalescingError{})) { 311 log.VEventf(ctx, 2, "bad lookup coalescing; retrying: %s", err) 312 continue 313 } 314 if err != nil { 315 return nil, nil, err 316 } 317 return desc, newToken, nil 318 } 319 } 320 321 // lookupCoalescingError is returned by tryLookupRangeDescriptor() when the 322 // descriptor database lookup failed because this request was grouped with 323 // another request for another key, and the grouping proved bad since that other 324 // request returned a descriptor that doesn't cover our request. The lookup 325 // should be retried. 326 type lookupCoalescingError struct { 327 // key is the key whose range was being looked-up. 328 key roachpb.RKey 329 wrongDesc *roachpb.RangeDescriptor 330 } 331 332 func (e lookupCoalescingError) Error() string { 333 return fmt.Sprintf("key %q not contained in range lookup's "+ 334 "resulting descriptor %v", e.key, e.wrongDesc) 335 } 336 337 func newLookupCoalescingError(key roachpb.RKey, wrongDesc *roachpb.RangeDescriptor) error { 338 return lookupCoalescingError{ 339 key: key, 340 wrongDesc: wrongDesc, 341 } 342 } 343 344 // tryLookupRangeDescriptor can return a lookupCoalescingError. 345 func (rdc *RangeDescriptorCache) tryLookupRangeDescriptor( 346 ctx context.Context, key roachpb.RKey, evictToken *EvictionToken, useReverseScan bool, 347 ) (*roachpb.RangeDescriptor, *EvictionToken, error) { 348 rdc.rangeCache.RLock() 349 if desc, _ := rdc.getCachedRangeDescriptorLocked(key, useReverseScan); desc != nil { 350 rdc.rangeCache.RUnlock() 351 returnToken := rdc.makeEvictionToken(desc, nil /* nextDesc */) 352 return desc, returnToken, nil 353 } 354 355 if log.V(2) { 356 log.Infof(ctx, "lookup range descriptor: key=%s (reverse: %t)", key, useReverseScan) 357 } 358 359 var prevDesc *roachpb.RangeDescriptor 360 if evictToken != nil { 361 prevDesc = evictToken.desc 362 } 363 requestKey := makeLookupRequestKey(key, prevDesc, useReverseScan) 364 resC, leader := rdc.lookupRequests.DoChan(requestKey, func() (interface{}, error) { 365 var lookupRes lookupResult 366 if err := rdc.stopper.RunTaskWithErr(ctx, "rangecache: range lookup", func(ctx context.Context) error { 367 ctx, reqSpan := tracing.ForkCtxSpan(ctx, "range lookup") 368 defer tracing.FinishSpan(reqSpan) 369 // Clear the context's cancelation. This request services potentially many 370 // callers waiting for its result, and using the flight's leader's 371 // cancelation doesn't make sense. 372 ctx = logtags.WithTags(context.Background(), logtags.FromContext(ctx)) 373 ctx = opentracing.ContextWithSpan(ctx, reqSpan) 374 375 // Since we don't inherit any other cancelation, let's put in a generous 376 // timeout as some protection against unavailable meta ranges. 377 var rs, preRs []roachpb.RangeDescriptor 378 if err := contextutil.RunWithTimeout(ctx, "range lookup", 10*time.Second, 379 func(ctx context.Context) error { 380 var err error 381 rs, preRs, err = rdc.performRangeLookup(ctx, key, useReverseScan) 382 return err 383 }); err != nil { 384 return err 385 } 386 387 switch len(rs) { 388 case 0: 389 return fmt.Errorf("no range descriptors returned for %s", key) 390 case 1: 391 desc := &rs[0] 392 lookupRes = lookupResult{ 393 desc: desc, 394 evictToken: rdc.makeEvictionToken(desc, nil /* nextDesc */), 395 } 396 case 2: 397 desc := &rs[0] 398 nextDesc := &rs[1] 399 lookupRes = lookupResult{ 400 desc: desc, 401 evictToken: rdc.makeEvictionToken(desc, nextDesc), 402 } 403 default: 404 panic(fmt.Sprintf("more than 2 matching range descriptors returned for %s: %v", key, rs)) 405 } 406 407 // We want to be assured that all goroutines which experienced a cache miss 408 // have joined our in-flight request, and all others will experience a 409 // cache hit. This requires atomicity across cache population and 410 // notification, hence this exclusive lock. 411 rdc.rangeCache.Lock() 412 defer rdc.rangeCache.Unlock() 413 414 // Insert the descriptor and the prefetched ones. We don't insert rs[1] 415 // (if any), since it overlaps with rs[0]; rs[1] will be handled by 416 // rs[0]'s eviction token. 417 rdc.insertRangeDescriptorsLocked(ctx, rs[0:1:1]... /* this is rs[0], avoiding an allocation */) 418 rdc.insertRangeDescriptorsLocked(ctx, preRs...) 419 return nil 420 }); err != nil { 421 return nil, err 422 } 423 return lookupRes, nil 424 }) 425 426 // We must use DoChan above so that we can always unlock this mutex. This must 427 // be done *after* the request has been added to the lookupRequests group, or 428 // we risk it racing with an inflight request. 429 rdc.rangeCache.RUnlock() 430 431 if !leader { 432 log.VEvent(ctx, 2, "coalesced range lookup request onto in-flight one") 433 if rdc.coalesced != nil { 434 rdc.coalesced <- struct{}{} 435 } 436 } 437 438 // Wait for the inflight request. 439 var res singleflight.Result 440 select { 441 case res = <-resC: 442 case <-ctx.Done(): 443 return nil, nil, errors.Wrap(ctx.Err(), "aborted during range descriptor lookup") 444 } 445 446 var s string 447 if res.Err != nil { 448 s = res.Err.Error() 449 } else { 450 s = res.Val.(lookupResult).desc.String() 451 } 452 if res.Shared { 453 log.Eventf(ctx, "looked up range descriptor with shared request: %s", s) 454 } else { 455 log.Eventf(ctx, "looked up range descriptor: %s", s) 456 } 457 if res.Err != nil { 458 return nil, nil, res.Err 459 } 460 461 // We might get a descriptor that doesn't contain the key we're looking for 462 // because of bad grouping of requests. For example, say we had a stale 463 // [a-z) in the cache who's info is passed into this function as evictToken. 464 // In the meantime the range has been split to [a-m),[m-z). A request for "a" 465 // will be coalesced with a request for "m" in the singleflight, above, but 466 // one of them will get a wrong results. We return an error that will trigger 467 // a retry at a higher level inside the cache. Note that the retry might find 468 // the descriptor it's looking for in the cache if it was pre-fetched by the 469 // original lookup. 470 lookupRes := res.Val.(lookupResult) 471 desc := lookupRes.desc 472 containsFn := (*roachpb.RangeDescriptor).ContainsKey 473 if useReverseScan { 474 containsFn = (*roachpb.RangeDescriptor).ContainsKeyInverted 475 } 476 if !containsFn(desc, key) { 477 return nil, nil, newLookupCoalescingError(key, desc) 478 } 479 return desc, lookupRes.evictToken, nil 480 } 481 482 // performRangeLookup handles delegating the range lookup to the cache's 483 // RangeDescriptorDB. 484 func (rdc *RangeDescriptorCache) performRangeLookup( 485 ctx context.Context, key roachpb.RKey, useReverseScan bool, 486 ) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error) { 487 // Tag inner operations. 488 ctx = logtags.AddTag(ctx, "range-lookup", key) 489 490 // In this case, the requested key is stored in the cluster's first 491 // range. Return the first range, which is always gossiped and not 492 // queried from the datastore. 493 if keys.RangeMetaKey(key).Equal(roachpb.RKeyMin) { 494 desc, err := rdc.db.FirstRange() 495 if err != nil { 496 return nil, nil, err 497 } 498 return []roachpb.RangeDescriptor{*desc}, nil, nil 499 } 500 501 return rdc.db.RangeLookup(ctx, key, useReverseScan) 502 } 503 504 // Clear clears all RangeDescriptors from the RangeDescriptorCache. 505 func (rdc *RangeDescriptorCache) Clear() { 506 rdc.rangeCache.Lock() 507 defer rdc.rangeCache.Unlock() 508 rdc.rangeCache.cache.Clear() 509 } 510 511 // EvictByKey evicts the descriptor containing the given key, if any. 512 // 513 // Returns true if a descriptor was evicted. 514 func (rdc *RangeDescriptorCache) EvictByKey(ctx context.Context, descKey roachpb.RKey) bool { 515 rdc.rangeCache.Lock() 516 defer rdc.rangeCache.Unlock() 517 518 cachedDesc, entry := rdc.getCachedRangeDescriptorLocked(descKey, false /* inverted */) 519 if cachedDesc == nil { 520 return false 521 } 522 log.VEventf(ctx, 2, "evict cached descriptor: %s", cachedDesc) 523 rdc.rangeCache.cache.DelEntry(entry) 524 return true 525 } 526 527 // evictCachedRangeDescriptorLocked evicts desc from the cache. If desc is not 528 // in the cache, it's a no-op. The caller needs to holds a write lock on 529 // rdc.rangeCache. 530 // 531 // Returns true if the descriptor was evicted from the cache. 532 func (rdc *RangeDescriptorCache) evictCachedRangeDescriptorLocked( 533 ctx context.Context, desc *roachpb.RangeDescriptor, 534 ) bool { 535 cachedDesc, entry := rdc.getCachedRangeDescriptorLocked(desc.StartKey, false /* inverted */) 536 if cachedDesc == nil { 537 return false 538 } 539 540 // Note that we're doing a "compare-and-erase": we want to clean the cache 541 // only if it equals the passed-in descriptor. We use Generation to determine 542 // if the range descriptors are equal. If the range descriptors are not equal, 543 // then likely some other caller already evicted previously, and we can save 544 // work by not doing it again (which would prompt another expensive lookup). 545 if desc.Generation != cachedDesc.Generation { 546 return false 547 } 548 549 log.VEventf(ctx, 2, "evict cached descriptor: desc=%s", cachedDesc) 550 rdc.rangeCache.cache.DelEntry(entry) 551 return true 552 } 553 554 // GetCachedRangeDescriptor retrieves the descriptor of the range which contains 555 // the given key. It returns nil if the descriptor is not found in the cache. 556 // 557 // `inverted` determines the behavior at the range boundary: If set to true 558 // and `key` is the EndKey and StartKey of two adjacent ranges, the first range 559 // is returned instead of the second (which technically contains the given key). 560 func (rdc *RangeDescriptorCache) GetCachedRangeDescriptor( 561 key roachpb.RKey, inverted bool, 562 ) *roachpb.RangeDescriptor { 563 rdc.rangeCache.RLock() 564 defer rdc.rangeCache.RUnlock() 565 desc, _ := rdc.getCachedRangeDescriptorLocked(key, inverted) 566 return desc 567 } 568 569 // getCachedRangeDescriptorLocked is like GetCachedRangeDescriptor, but it 570 // assumes that the caller holds a read lock on rdc.rangeCache. 571 // 572 // In addition to GetCachedRangeDescriptor, it also returns an internal cache 573 // Entry that can be used for descriptor eviction. 574 func (rdc *RangeDescriptorCache) getCachedRangeDescriptorLocked( 575 key roachpb.RKey, inverted bool, 576 ) (*roachpb.RangeDescriptor, *cache.Entry) { 577 // The cache is indexed using the end-key of the range, but the 578 // end-key is non-inverted by default. 579 var metaKey roachpb.RKey 580 if !inverted { 581 metaKey = keys.RangeMetaKey(key.Next()) 582 } else { 583 metaKey = keys.RangeMetaKey(key) 584 } 585 586 entry, ok := rdc.rangeCache.cache.CeilEntry(rangeCacheKey(metaKey)) 587 if !ok { 588 return nil, nil 589 } 590 desc := entry.Value.(*roachpb.RangeDescriptor) 591 592 containsFn := (*roachpb.RangeDescriptor).ContainsKey 593 if inverted { 594 containsFn = (*roachpb.RangeDescriptor).ContainsKeyInverted 595 } 596 597 // Return nil if the key does not belong to the range. 598 if !containsFn(desc, key) { 599 return nil, nil 600 } 601 return desc, entry 602 } 603 604 // InsertRangeDescriptors inserts the provided descriptors in the cache. 605 // This is a no-op for the descriptors that are already present in the cache. 606 func (rdc *RangeDescriptorCache) InsertRangeDescriptors( 607 ctx context.Context, rs ...roachpb.RangeDescriptor, 608 ) { 609 rdc.rangeCache.Lock() 610 defer rdc.rangeCache.Unlock() 611 rdc.insertRangeDescriptorsLocked(ctx, rs...) 612 } 613 614 // insertRangeDescriptorsLocked is like InsertRangeDescriptors, but it assumes 615 // that the caller holds a write lock on rdc.rangeCache. 616 func (rdc *RangeDescriptorCache) insertRangeDescriptorsLocked( 617 ctx context.Context, rs ...roachpb.RangeDescriptor, 618 ) { 619 for i := range rs { 620 if !rs[i].IsInitialized() { 621 panic(fmt.Sprintf("inserting uninitialized desc: %s", rs[i])) 622 } 623 // Note: we append the end key of each range to meta records 624 // so that calls to rdc.rangeCache.cache.Ceil() for a key will return 625 // the correct range. 626 627 // Before adding a new descriptor, make sure we clear out any 628 // pre-existing, overlapping descriptor which might have been 629 // re-inserted due to concurrent range lookups. 630 ok := rdc.clearOlderOverlapping(ctx, &rs[i]) 631 if !ok { 632 // The descriptor is already in the cache, or is stale. 633 continue 634 } 635 rangeKey := keys.RangeMetaKey(rs[i].EndKey) 636 if log.V(2) { 637 log.Infof(ctx, "adding descriptor: key=%s desc=%s", rangeKey, &rs[i]) 638 } 639 rdc.rangeCache.cache.Add(rangeCacheKey(rangeKey), &rs[i]) 640 } 641 } 642 643 // clearOlderOverlapping clears any stale cache entries which overlap the 644 // specified descriptor. Returns false if any any overlapping newer descriptor 645 // is found (or if the descriptor we're trying to insert is already in the 646 // cache). 647 // 648 // Note that even if false is returned, older descriptors are still cleared from 649 // the cache. 650 func (rdc *RangeDescriptorCache) clearOlderOverlapping( 651 ctx context.Context, desc *roachpb.RangeDescriptor, 652 ) bool { 653 startMeta := keys.RangeMetaKey(desc.StartKey) 654 endMeta := keys.RangeMetaKey(desc.EndKey) 655 var entriesToEvict []*cache.Entry 656 newest := true 657 658 // Try to clear the descriptor that covers the end key of desc, if any. For 659 // example, if we are inserting a [/Min, "m") descriptor, we should check if 660 // we should evict an existing [/Min, /Max) descriptor. 661 entry, ok := rdc.rangeCache.cache.CeilEntry(rangeCacheKey(endMeta)) 662 if ok { 663 cached := entry.Value.(*roachpb.RangeDescriptor) 664 // It might be possible that the range descriptor immediately following 665 // desc.EndKey does not contain desc.EndKey, so we explicitly check that it 666 // overlaps. For example, if we are inserting ["a", "c"), we don't want to 667 // check ["c", "d"). We do, however, want to check ["b", "c"), which is why 668 // the end key is inclusive. 669 if cached.StartKey.Less(desc.EndKey) && !cached.EndKey.Less(desc.EndKey) { 670 if desc.Generation <= cached.Generation { 671 // A newer descriptor already exists in cache. 672 newest = false 673 } 674 if newest { 675 entriesToEvict = append(entriesToEvict, entry) 676 } 677 } 678 } 679 680 // Try to clear any descriptors whose end key is contained by the descriptor 681 // we are inserting. We iterate from the range meta key after 682 // RangeMetaKey(desc.StartKey) to RangeMetaKey(desc.EndKey) to avoid clearing 683 // the descriptor that ends when desc starts. For example, if we are 684 // inserting ["b", "c"), we should not evict ["a", "b"). 685 // 686 // Descriptors could be cleared from the cache in the event of a merge or a 687 // lot of concurrency. For example, if ranges ["a", "b") and ["b", "c") are 688 // merged, we should clear both of these if we are inserting ["a", "c"). 689 rdc.rangeCache.cache.DoRangeEntry(func(e *cache.Entry) bool { 690 descriptor := e.Value.(*roachpb.RangeDescriptor) 691 // Check generations to see if we evict. 692 if desc.Generation <= descriptor.Generation { 693 newest = false 694 } else { 695 entriesToEvict = append(entriesToEvict, e) 696 } 697 return false 698 }, rangeCacheKey(startMeta.Next()), rangeCacheKey(endMeta)) 699 700 for _, e := range entriesToEvict { 701 if log.V(2) { 702 log.Infof(ctx, "clearing overlapping descriptor: key=%s desc=%s", 703 e.Key, e.Value.(*roachpb.RangeDescriptor)) 704 } 705 rdc.rangeCache.cache.DelEntry(e) 706 } 707 return newest 708 }