github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvclient/kvcoord/range_cache.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvcoord
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"fmt"
    17  	"strconv"
    18  	"strings"
    19  	"sync"
    20  	"time"
    21  
    22  	"github.com/biogo/store/llrb"
    23  	"github.com/cockroachdb/cockroach/pkg/keys"
    24  	"github.com/cockroachdb/cockroach/pkg/kv/kvbase"
    25  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    26  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    27  	"github.com/cockroachdb/cockroach/pkg/util/cache"
    28  	"github.com/cockroachdb/cockroach/pkg/util/contextutil"
    29  	"github.com/cockroachdb/cockroach/pkg/util/log"
    30  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    31  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    32  	"github.com/cockroachdb/cockroach/pkg/util/syncutil/singleflight"
    33  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    34  	"github.com/cockroachdb/errors"
    35  	"github.com/cockroachdb/logtags"
    36  	"github.com/opentracing/opentracing-go"
    37  )
    38  
    39  // rangeCacheKey is the key type used to store and sort values in the
    40  // RangeCache.
    41  type rangeCacheKey roachpb.RKey
    42  
    43  func (a rangeCacheKey) String() string {
    44  	return roachpb.Key(a).String()
    45  }
    46  
    47  // Compare implements the llrb.Comparable interface for rangeCacheKey, so that
    48  // it can be used as a key for util.OrderedCache.
    49  func (a rangeCacheKey) Compare(b llrb.Comparable) int {
    50  	return bytes.Compare(a, b.(rangeCacheKey))
    51  }
    52  
    53  // RangeDescriptorDB is a type which can query range descriptors from an
    54  // underlying datastore. This interface is used by RangeDescriptorCache to
    55  // initially retrieve information which will be cached.
    56  type RangeDescriptorDB interface {
    57  	// RangeLookup takes a key to look up descriptors for. Two slices of range
    58  	// descriptors are returned. The first of these slices holds descriptors
    59  	// whose [startKey,endKey) spans contain the given key (possibly from
    60  	// intents), and the second holds prefetched adjacent descriptors.
    61  	RangeLookup(
    62  		ctx context.Context, key roachpb.RKey, useReverseScan bool,
    63  	) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error)
    64  
    65  	// FirstRange returns the descriptor for the first Range. This is the
    66  	// Range containing all meta1 entries.
    67  	FirstRange() (*roachpb.RangeDescriptor, error)
    68  }
    69  
    70  // RangeDescriptorCache is used to retrieve range descriptors for
    71  // arbitrary keys. Descriptors are initially queried from storage
    72  // using a RangeDescriptorDB, but are cached for subsequent lookups.
    73  type RangeDescriptorCache struct {
    74  	st      *cluster.Settings
    75  	stopper *stop.Stopper
    76  	// RangeDescriptorDB is used to retrieve range descriptors from the
    77  	// database, which will be cached by this structure.
    78  	db RangeDescriptorDB
    79  	// rangeCache caches replica metadata for key ranges. The cache is
    80  	// filled while servicing read and write requests to the key value
    81  	// store.
    82  	rangeCache struct {
    83  		syncutil.RWMutex
    84  		cache *cache.OrderedCache
    85  	}
    86  	// lookupRequests stores all inflight requests retrieving range
    87  	// descriptors from the database. It allows multiple RangeDescriptorDB
    88  	// lookup requests for the same inferred range descriptor to be
    89  	// multiplexed onto the same database lookup. See makeLookupRequestKey
    90  	// for details on this inference.
    91  	lookupRequests singleflight.Group
    92  
    93  	// coalesced, if not nil, is sent on every time a request is coalesced onto
    94  	// another in-flight one. Used by tests to block until a lookup request is
    95  	// blocked on the single-flight querying the db.
    96  	coalesced chan struct{}
    97  }
    98  
    99  // RangeDescriptorCache implements the kvbase interface.
   100  var _ kvbase.RangeDescriptorCache = (*RangeDescriptorCache)(nil)
   101  
   102  type lookupResult struct {
   103  	desc       *roachpb.RangeDescriptor
   104  	evictToken *EvictionToken
   105  }
   106  
   107  // makeLookupRequestKey constructs a key for the lookupRequest group with the
   108  // goal of mapping all requests which are inferred to be looking for the same
   109  // descriptor onto the same request key to establish request coalescing.
   110  //
   111  // If the key is part of a descriptor that we previously had cached (but the
   112  // cache entry is stale), we use that previous descriptor to coalesce all
   113  // requests for keys within it into a single request. Namely, there are three
   114  // possible events that may have happened causing our cache to be stale. For
   115  // each of these, we try to coalesce all requests that will end up on the same
   116  // range post-event together.
   117  // - Split:  for a split, only the right half of the split will attempt to evict
   118  //           the stale descriptor because only the right half will be sending to
   119  //           the wrong range. Once this stale descriptor is evicted, keys from
   120  //           both halves of the split will miss the cache. Because both sides of
   121  //           the split will now map to the same lookupResult, it is important to
   122  //           use EvictAndReplace if possible to insert one of the two new descriptors.
   123  //           This way, no requests to that descriptor will ever miss the cache and
   124  //           risk being coalesced into the other request. If this is not possible,
   125  //           the lookup will still work, but it will require multiple lookups, which
   126  //           will be launched in series when requests find that their desired key
   127  //           is outside of the returned descriptor.
   128  // - Merges: for a merge, the left half of the merge will never notice. The right
   129  //           half of the merge will suddenly find its descriptor to be stale, so
   130  //           it will evict and lookup the new descriptor. We set the key to hash
   131  //           to the start of the stale descriptor for lookup requests to the right
   132  //           half of the merge so that all requests will be coalesced to the same
   133  //           lookupRequest.
   134  // - Rebal:  for a rebalance, the entire descriptor will suddenly go stale and
   135  //           requests to it will evict the descriptor. We set the key to hash to
   136  //           the start of the stale descriptor for lookup requests to the rebalanced
   137  //           descriptor so that all requests will be coalesced to the same lookupRequest.
   138  //
   139  // Note that the above description assumes that useReverseScan is false for simplicity.
   140  // If useReverseScan is true, we need to use the end key of the stale descriptor instead.
   141  func makeLookupRequestKey(
   142  	key roachpb.RKey, prevDesc *roachpb.RangeDescriptor, useReverseScan bool,
   143  ) string {
   144  	var ret strings.Builder
   145  	// We only want meta1, meta2, user range lookups to be coalesced with other
   146  	// meta1, meta2, user range lookups, respectively. Otherwise, deadlocks could
   147  	// happen due to singleflight. If the range lookup is in a meta range, we
   148  	// prefix the request key with the corresponding meta prefix to disambiguate
   149  	// the different lookups.
   150  	if key.AsRawKey().Compare(keys.Meta1KeyMax) < 0 {
   151  		ret.Write(keys.Meta1Prefix)
   152  	} else if key.AsRawKey().Compare(keys.Meta2KeyMax) < 0 {
   153  		ret.Write(keys.Meta2Prefix)
   154  	}
   155  	if prevDesc != nil {
   156  		if useReverseScan {
   157  			key = prevDesc.EndKey
   158  		} else {
   159  			key = prevDesc.StartKey
   160  		}
   161  	}
   162  	ret.Write(key)
   163  	ret.WriteString(":")
   164  	ret.WriteString(strconv.FormatBool(useReverseScan))
   165  	// Add the generation of the previous descriptor to the lookup request key to
   166  	// decrease the number of lookups in the rare double split case. Suppose we
   167  	// have a range [a, e) that gets split into [a, c) and [c, e). The requests
   168  	// on [c, e) will fail and will have to retry the lookup. If [a, c) gets
   169  	// split again into [a, b) and [b, c), we don't want to the requests on [a,
   170  	// b) to be coalesced with the retried requests on [c, e). To distinguish the
   171  	// two cases, we can use the generation of the previous descriptor.
   172  	if prevDesc != nil {
   173  		ret.WriteString(":")
   174  		ret.WriteString(strconv.FormatInt(prevDesc.Generation, 10))
   175  	}
   176  	return ret.String()
   177  }
   178  
   179  // NewRangeDescriptorCache returns a new RangeDescriptorCache which
   180  // uses the given RangeDescriptorDB as the underlying source of range
   181  // descriptors.
   182  func NewRangeDescriptorCache(
   183  	st *cluster.Settings, db RangeDescriptorDB, size func() int64, stopper *stop.Stopper,
   184  ) *RangeDescriptorCache {
   185  	rdc := &RangeDescriptorCache{st: st, db: db, stopper: stopper}
   186  	rdc.rangeCache.cache = cache.NewOrderedCache(cache.Config{
   187  		Policy: cache.CacheLRU,
   188  		ShouldEvict: func(n int, _, _ interface{}) bool {
   189  			return int64(n) > size()
   190  		},
   191  	})
   192  	return rdc
   193  }
   194  
   195  func (rdc *RangeDescriptorCache) String() string {
   196  	rdc.rangeCache.RLock()
   197  	defer rdc.rangeCache.RUnlock()
   198  	return rdc.stringLocked()
   199  }
   200  
   201  func (rdc *RangeDescriptorCache) stringLocked() string {
   202  	var buf strings.Builder
   203  	rdc.rangeCache.cache.Do(func(k, v interface{}) bool {
   204  		fmt.Fprintf(&buf, "key=%s desc=%+v\n", roachpb.Key(k.(rangeCacheKey)), v)
   205  		return false
   206  	})
   207  	return buf.String()
   208  }
   209  
   210  // EvictionToken holds eviction state between calls to LookupRangeDescriptor.
   211  type EvictionToken struct {
   212  	// rdc is the cache that produced this token - and that will be modified by
   213  	// Evict().
   214  	rdc *RangeDescriptorCache
   215  
   216  	// desc is the descriptor that this EvictionToken refers to - the descriptor
   217  	// that Evict() will evict from rdc.
   218  	desc *roachpb.RangeDescriptor
   219  	// nextDesc, if not nil, is the descriptor that should replace desc if desc
   220  	// proves to be stale - i.e. nextDesc is inserted in the cache automatically
   221  	// by Evict(). This is used when the range descriptor lookup that populated
   222  	// the cache returned an intent in addition to the current descriptor value.
   223  	nextDesc *roachpb.RangeDescriptor
   224  
   225  	evictOnce sync.Once // assures that desc is only evicted once
   226  }
   227  
   228  func (rdc *RangeDescriptorCache) makeEvictionToken(
   229  	desc *roachpb.RangeDescriptor, nextDesc *roachpb.RangeDescriptor,
   230  ) *EvictionToken {
   231  	return &EvictionToken{
   232  		rdc:      rdc,
   233  		desc:     desc,
   234  		nextDesc: nextDesc,
   235  	}
   236  }
   237  
   238  // Evict instructs the EvictionToken to evict the RangeDescriptor it was created
   239  // with from the RangeDescriptorCache.
   240  func (et *EvictionToken) Evict(ctx context.Context) {
   241  	et.EvictAndReplace(ctx)
   242  }
   243  
   244  // EvictAndReplace instructs the EvictionToken to evict the RangeDescriptor it was
   245  // created with from the RangeDescriptorCache. It also allows the user to provide
   246  // new RangeDescriptors to insert into the cache, all atomically. When called without
   247  // arguments, EvictAndReplace will behave the same as Evict.
   248  func (et *EvictionToken) EvictAndReplace(ctx context.Context, newDescs ...roachpb.RangeDescriptor) {
   249  	et.evictOnce.Do(func() {
   250  		et.rdc.rangeCache.Lock()
   251  		defer et.rdc.rangeCache.Unlock()
   252  		et.rdc.evictCachedRangeDescriptorLocked(ctx, et.desc)
   253  		if len(newDescs) > 0 {
   254  			log.Eventf(ctx, "evicting cached range descriptor with %d replacements", len(newDescs))
   255  			et.rdc.insertRangeDescriptorsLocked(ctx, newDescs...)
   256  		} else if et.nextDesc != nil {
   257  			log.Eventf(ctx, "evicting cached range descriptor with replacement from token")
   258  			et.rdc.insertRangeDescriptorsLocked(ctx, *et.nextDesc)
   259  		} else {
   260  			log.Eventf(ctx, "evicting cached range descriptor")
   261  		}
   262  	})
   263  }
   264  
   265  // LookupRangeDescriptorWithEvictionToken attempts to locate a descriptor for the range
   266  // containing the given Key. This is done by first trying the cache, and then
   267  // querying the two-level lookup table of range descriptors which cockroach
   268  // maintains. The function should be provided with an EvictionToken if one was
   269  // acquired from this function on a previous lookup. If not, an empty
   270  // EvictionToken can be provided.
   271  //
   272  // This method first looks up the specified key in the first level of
   273  // range metadata, which returns the location of the key within the
   274  // second level of range metadata. This second level location is then
   275  // queried to retrieve a descriptor for the range where the key's
   276  // value resides. Range descriptors retrieved during each search are
   277  // cached for subsequent lookups.
   278  //
   279  // This method returns the RangeDescriptor for the range containing
   280  // the key's data and a token to manage evicting the RangeDescriptor
   281  // if it is found to be stale, or an error if any occurred.
   282  func (rdc *RangeDescriptorCache) LookupRangeDescriptorWithEvictionToken(
   283  	ctx context.Context, key roachpb.RKey, evictToken *EvictionToken, useReverseScan bool,
   284  ) (*roachpb.RangeDescriptor, *EvictionToken, error) {
   285  	return rdc.lookupRangeDescriptorInternal(ctx, key, evictToken, useReverseScan)
   286  }
   287  
   288  // LookupRangeDescriptor presents a simpler interface for looking up a
   289  // RangeDescriptor for a key without the eviction tokens or scan direction
   290  // control of LookupRangeDescriptorWithEvictionToken. This method is exported
   291  // to lower level clients through the kvbase.RangeDescriptorCache interface.
   292  func (rdc *RangeDescriptorCache) LookupRangeDescriptor(
   293  	ctx context.Context, key roachpb.RKey,
   294  ) (*roachpb.RangeDescriptor, error) {
   295  	rd, _, err := rdc.lookupRangeDescriptorInternal(ctx, key, nil, false)
   296  	return rd, err
   297  }
   298  
   299  // lookupRangeDescriptorInternal is called from LookupRangeDescriptor or from tests.
   300  //
   301  // If a WaitGroup is supplied, it is signaled when the request is
   302  // added to the inflight request map (with or without merging) or the
   303  // function finishes. Used for testing.
   304  func (rdc *RangeDescriptorCache) lookupRangeDescriptorInternal(
   305  	ctx context.Context, key roachpb.RKey, evictToken *EvictionToken, useReverseScan bool,
   306  ) (*roachpb.RangeDescriptor, *EvictionToken, error) {
   307  	// Retry while we're hitting lookupCoalescingErrors.
   308  	for {
   309  		desc, newToken, err := rdc.tryLookupRangeDescriptor(ctx, key, evictToken, useReverseScan)
   310  		if errors.HasType(err, (lookupCoalescingError{})) {
   311  			log.VEventf(ctx, 2, "bad lookup coalescing; retrying: %s", err)
   312  			continue
   313  		}
   314  		if err != nil {
   315  			return nil, nil, err
   316  		}
   317  		return desc, newToken, nil
   318  	}
   319  }
   320  
   321  // lookupCoalescingError is returned by tryLookupRangeDescriptor() when the
   322  // descriptor database lookup failed because this request was grouped with
   323  // another request for another key, and the grouping proved bad since that other
   324  // request returned a descriptor that doesn't cover our request. The lookup
   325  // should be retried.
   326  type lookupCoalescingError struct {
   327  	// key is the key whose range was being looked-up.
   328  	key       roachpb.RKey
   329  	wrongDesc *roachpb.RangeDescriptor
   330  }
   331  
   332  func (e lookupCoalescingError) Error() string {
   333  	return fmt.Sprintf("key %q not contained in range lookup's "+
   334  		"resulting descriptor %v", e.key, e.wrongDesc)
   335  }
   336  
   337  func newLookupCoalescingError(key roachpb.RKey, wrongDesc *roachpb.RangeDescriptor) error {
   338  	return lookupCoalescingError{
   339  		key:       key,
   340  		wrongDesc: wrongDesc,
   341  	}
   342  }
   343  
   344  // tryLookupRangeDescriptor can return a lookupCoalescingError.
   345  func (rdc *RangeDescriptorCache) tryLookupRangeDescriptor(
   346  	ctx context.Context, key roachpb.RKey, evictToken *EvictionToken, useReverseScan bool,
   347  ) (*roachpb.RangeDescriptor, *EvictionToken, error) {
   348  	rdc.rangeCache.RLock()
   349  	if desc, _ := rdc.getCachedRangeDescriptorLocked(key, useReverseScan); desc != nil {
   350  		rdc.rangeCache.RUnlock()
   351  		returnToken := rdc.makeEvictionToken(desc, nil /* nextDesc */)
   352  		return desc, returnToken, nil
   353  	}
   354  
   355  	if log.V(2) {
   356  		log.Infof(ctx, "lookup range descriptor: key=%s (reverse: %t)", key, useReverseScan)
   357  	}
   358  
   359  	var prevDesc *roachpb.RangeDescriptor
   360  	if evictToken != nil {
   361  		prevDesc = evictToken.desc
   362  	}
   363  	requestKey := makeLookupRequestKey(key, prevDesc, useReverseScan)
   364  	resC, leader := rdc.lookupRequests.DoChan(requestKey, func() (interface{}, error) {
   365  		var lookupRes lookupResult
   366  		if err := rdc.stopper.RunTaskWithErr(ctx, "rangecache: range lookup", func(ctx context.Context) error {
   367  			ctx, reqSpan := tracing.ForkCtxSpan(ctx, "range lookup")
   368  			defer tracing.FinishSpan(reqSpan)
   369  			// Clear the context's cancelation. This request services potentially many
   370  			// callers waiting for its result, and using the flight's leader's
   371  			// cancelation doesn't make sense.
   372  			ctx = logtags.WithTags(context.Background(), logtags.FromContext(ctx))
   373  			ctx = opentracing.ContextWithSpan(ctx, reqSpan)
   374  
   375  			// Since we don't inherit any other cancelation, let's put in a generous
   376  			// timeout as some protection against unavailable meta ranges.
   377  			var rs, preRs []roachpb.RangeDescriptor
   378  			if err := contextutil.RunWithTimeout(ctx, "range lookup", 10*time.Second,
   379  				func(ctx context.Context) error {
   380  					var err error
   381  					rs, preRs, err = rdc.performRangeLookup(ctx, key, useReverseScan)
   382  					return err
   383  				}); err != nil {
   384  				return err
   385  			}
   386  
   387  			switch len(rs) {
   388  			case 0:
   389  				return fmt.Errorf("no range descriptors returned for %s", key)
   390  			case 1:
   391  				desc := &rs[0]
   392  				lookupRes = lookupResult{
   393  					desc:       desc,
   394  					evictToken: rdc.makeEvictionToken(desc, nil /* nextDesc */),
   395  				}
   396  			case 2:
   397  				desc := &rs[0]
   398  				nextDesc := &rs[1]
   399  				lookupRes = lookupResult{
   400  					desc:       desc,
   401  					evictToken: rdc.makeEvictionToken(desc, nextDesc),
   402  				}
   403  			default:
   404  				panic(fmt.Sprintf("more than 2 matching range descriptors returned for %s: %v", key, rs))
   405  			}
   406  
   407  			// We want to be assured that all goroutines which experienced a cache miss
   408  			// have joined our in-flight request, and all others will experience a
   409  			// cache hit. This requires atomicity across cache population and
   410  			// notification, hence this exclusive lock.
   411  			rdc.rangeCache.Lock()
   412  			defer rdc.rangeCache.Unlock()
   413  
   414  			// Insert the descriptor and the prefetched ones. We don't insert rs[1]
   415  			// (if any), since it overlaps with rs[0]; rs[1] will be handled by
   416  			// rs[0]'s eviction token.
   417  			rdc.insertRangeDescriptorsLocked(ctx, rs[0:1:1]... /* this is rs[0], avoiding an allocation */)
   418  			rdc.insertRangeDescriptorsLocked(ctx, preRs...)
   419  			return nil
   420  		}); err != nil {
   421  			return nil, err
   422  		}
   423  		return lookupRes, nil
   424  	})
   425  
   426  	// We must use DoChan above so that we can always unlock this mutex. This must
   427  	// be done *after* the request has been added to the lookupRequests group, or
   428  	// we risk it racing with an inflight request.
   429  	rdc.rangeCache.RUnlock()
   430  
   431  	if !leader {
   432  		log.VEvent(ctx, 2, "coalesced range lookup request onto in-flight one")
   433  		if rdc.coalesced != nil {
   434  			rdc.coalesced <- struct{}{}
   435  		}
   436  	}
   437  
   438  	// Wait for the inflight request.
   439  	var res singleflight.Result
   440  	select {
   441  	case res = <-resC:
   442  	case <-ctx.Done():
   443  		return nil, nil, errors.Wrap(ctx.Err(), "aborted during range descriptor lookup")
   444  	}
   445  
   446  	var s string
   447  	if res.Err != nil {
   448  		s = res.Err.Error()
   449  	} else {
   450  		s = res.Val.(lookupResult).desc.String()
   451  	}
   452  	if res.Shared {
   453  		log.Eventf(ctx, "looked up range descriptor with shared request: %s", s)
   454  	} else {
   455  		log.Eventf(ctx, "looked up range descriptor: %s", s)
   456  	}
   457  	if res.Err != nil {
   458  		return nil, nil, res.Err
   459  	}
   460  
   461  	// We might get a descriptor that doesn't contain the key we're looking for
   462  	// because of bad grouping of requests. For example, say we had a stale
   463  	// [a-z) in the cache who's info is passed into this function as evictToken.
   464  	// In the meantime the range has been split to [a-m),[m-z). A request for "a"
   465  	// will be coalesced with a request for "m" in the singleflight, above, but
   466  	// one of them will get a wrong results. We return an error that will trigger
   467  	// a retry at a higher level inside the cache. Note that the retry might find
   468  	// the descriptor it's looking for in the cache if it was pre-fetched by the
   469  	// original lookup.
   470  	lookupRes := res.Val.(lookupResult)
   471  	desc := lookupRes.desc
   472  	containsFn := (*roachpb.RangeDescriptor).ContainsKey
   473  	if useReverseScan {
   474  		containsFn = (*roachpb.RangeDescriptor).ContainsKeyInverted
   475  	}
   476  	if !containsFn(desc, key) {
   477  		return nil, nil, newLookupCoalescingError(key, desc)
   478  	}
   479  	return desc, lookupRes.evictToken, nil
   480  }
   481  
   482  // performRangeLookup handles delegating the range lookup to the cache's
   483  // RangeDescriptorDB.
   484  func (rdc *RangeDescriptorCache) performRangeLookup(
   485  	ctx context.Context, key roachpb.RKey, useReverseScan bool,
   486  ) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error) {
   487  	// Tag inner operations.
   488  	ctx = logtags.AddTag(ctx, "range-lookup", key)
   489  
   490  	// In this case, the requested key is stored in the cluster's first
   491  	// range. Return the first range, which is always gossiped and not
   492  	// queried from the datastore.
   493  	if keys.RangeMetaKey(key).Equal(roachpb.RKeyMin) {
   494  		desc, err := rdc.db.FirstRange()
   495  		if err != nil {
   496  			return nil, nil, err
   497  		}
   498  		return []roachpb.RangeDescriptor{*desc}, nil, nil
   499  	}
   500  
   501  	return rdc.db.RangeLookup(ctx, key, useReverseScan)
   502  }
   503  
   504  // Clear clears all RangeDescriptors from the RangeDescriptorCache.
   505  func (rdc *RangeDescriptorCache) Clear() {
   506  	rdc.rangeCache.Lock()
   507  	defer rdc.rangeCache.Unlock()
   508  	rdc.rangeCache.cache.Clear()
   509  }
   510  
   511  // EvictByKey evicts the descriptor containing the given key, if any.
   512  //
   513  // Returns true if a descriptor was evicted.
   514  func (rdc *RangeDescriptorCache) EvictByKey(ctx context.Context, descKey roachpb.RKey) bool {
   515  	rdc.rangeCache.Lock()
   516  	defer rdc.rangeCache.Unlock()
   517  
   518  	cachedDesc, entry := rdc.getCachedRangeDescriptorLocked(descKey, false /* inverted */)
   519  	if cachedDesc == nil {
   520  		return false
   521  	}
   522  	log.VEventf(ctx, 2, "evict cached descriptor: %s", cachedDesc)
   523  	rdc.rangeCache.cache.DelEntry(entry)
   524  	return true
   525  }
   526  
   527  // evictCachedRangeDescriptorLocked evicts desc from the cache. If desc is not
   528  // in the cache, it's a no-op. The caller needs to holds a write lock on
   529  // rdc.rangeCache.
   530  //
   531  // Returns true if the descriptor was evicted from the cache.
   532  func (rdc *RangeDescriptorCache) evictCachedRangeDescriptorLocked(
   533  	ctx context.Context, desc *roachpb.RangeDescriptor,
   534  ) bool {
   535  	cachedDesc, entry := rdc.getCachedRangeDescriptorLocked(desc.StartKey, false /* inverted */)
   536  	if cachedDesc == nil {
   537  		return false
   538  	}
   539  
   540  	// Note that we're doing a "compare-and-erase": we want to clean the cache
   541  	// only if it equals the passed-in descriptor. We use Generation to determine
   542  	// if the range descriptors are equal. If the range descriptors are not equal,
   543  	// then likely some other caller already evicted previously, and we can save
   544  	// work by not doing it again (which would prompt another expensive lookup).
   545  	if desc.Generation != cachedDesc.Generation {
   546  		return false
   547  	}
   548  
   549  	log.VEventf(ctx, 2, "evict cached descriptor: desc=%s", cachedDesc)
   550  	rdc.rangeCache.cache.DelEntry(entry)
   551  	return true
   552  }
   553  
   554  // GetCachedRangeDescriptor retrieves the descriptor of the range which contains
   555  // the given key. It returns nil if the descriptor is not found in the cache.
   556  //
   557  // `inverted` determines the behavior at the range boundary: If set to true
   558  // and `key` is the EndKey and StartKey of two adjacent ranges, the first range
   559  // is returned instead of the second (which technically contains the given key).
   560  func (rdc *RangeDescriptorCache) GetCachedRangeDescriptor(
   561  	key roachpb.RKey, inverted bool,
   562  ) *roachpb.RangeDescriptor {
   563  	rdc.rangeCache.RLock()
   564  	defer rdc.rangeCache.RUnlock()
   565  	desc, _ := rdc.getCachedRangeDescriptorLocked(key, inverted)
   566  	return desc
   567  }
   568  
   569  // getCachedRangeDescriptorLocked is like GetCachedRangeDescriptor, but it
   570  // assumes that the caller holds a read lock on rdc.rangeCache.
   571  //
   572  // In addition to GetCachedRangeDescriptor, it also returns an internal cache
   573  // Entry that can be used for descriptor eviction.
   574  func (rdc *RangeDescriptorCache) getCachedRangeDescriptorLocked(
   575  	key roachpb.RKey, inverted bool,
   576  ) (*roachpb.RangeDescriptor, *cache.Entry) {
   577  	// The cache is indexed using the end-key of the range, but the
   578  	// end-key is non-inverted by default.
   579  	var metaKey roachpb.RKey
   580  	if !inverted {
   581  		metaKey = keys.RangeMetaKey(key.Next())
   582  	} else {
   583  		metaKey = keys.RangeMetaKey(key)
   584  	}
   585  
   586  	entry, ok := rdc.rangeCache.cache.CeilEntry(rangeCacheKey(metaKey))
   587  	if !ok {
   588  		return nil, nil
   589  	}
   590  	desc := entry.Value.(*roachpb.RangeDescriptor)
   591  
   592  	containsFn := (*roachpb.RangeDescriptor).ContainsKey
   593  	if inverted {
   594  		containsFn = (*roachpb.RangeDescriptor).ContainsKeyInverted
   595  	}
   596  
   597  	// Return nil if the key does not belong to the range.
   598  	if !containsFn(desc, key) {
   599  		return nil, nil
   600  	}
   601  	return desc, entry
   602  }
   603  
   604  // InsertRangeDescriptors inserts the provided descriptors in the cache.
   605  // This is a no-op for the descriptors that are already present in the cache.
   606  func (rdc *RangeDescriptorCache) InsertRangeDescriptors(
   607  	ctx context.Context, rs ...roachpb.RangeDescriptor,
   608  ) {
   609  	rdc.rangeCache.Lock()
   610  	defer rdc.rangeCache.Unlock()
   611  	rdc.insertRangeDescriptorsLocked(ctx, rs...)
   612  }
   613  
   614  // insertRangeDescriptorsLocked is like InsertRangeDescriptors, but it assumes
   615  // that the caller holds a write lock on rdc.rangeCache.
   616  func (rdc *RangeDescriptorCache) insertRangeDescriptorsLocked(
   617  	ctx context.Context, rs ...roachpb.RangeDescriptor,
   618  ) {
   619  	for i := range rs {
   620  		if !rs[i].IsInitialized() {
   621  			panic(fmt.Sprintf("inserting uninitialized desc: %s", rs[i]))
   622  		}
   623  		// Note: we append the end key of each range to meta records
   624  		// so that calls to rdc.rangeCache.cache.Ceil() for a key will return
   625  		// the correct range.
   626  
   627  		// Before adding a new descriptor, make sure we clear out any
   628  		// pre-existing, overlapping descriptor which might have been
   629  		// re-inserted due to concurrent range lookups.
   630  		ok := rdc.clearOlderOverlapping(ctx, &rs[i])
   631  		if !ok {
   632  			// The descriptor is already in the cache, or is stale.
   633  			continue
   634  		}
   635  		rangeKey := keys.RangeMetaKey(rs[i].EndKey)
   636  		if log.V(2) {
   637  			log.Infof(ctx, "adding descriptor: key=%s desc=%s", rangeKey, &rs[i])
   638  		}
   639  		rdc.rangeCache.cache.Add(rangeCacheKey(rangeKey), &rs[i])
   640  	}
   641  }
   642  
   643  // clearOlderOverlapping clears any stale cache entries which overlap the
   644  // specified descriptor. Returns false if any any overlapping newer descriptor
   645  // is found (or if the descriptor we're trying to insert is already in the
   646  // cache).
   647  //
   648  // Note that even if false is returned, older descriptors are still cleared from
   649  // the cache.
   650  func (rdc *RangeDescriptorCache) clearOlderOverlapping(
   651  	ctx context.Context, desc *roachpb.RangeDescriptor,
   652  ) bool {
   653  	startMeta := keys.RangeMetaKey(desc.StartKey)
   654  	endMeta := keys.RangeMetaKey(desc.EndKey)
   655  	var entriesToEvict []*cache.Entry
   656  	newest := true
   657  
   658  	// Try to clear the descriptor that covers the end key of desc, if any. For
   659  	// example, if we are inserting a [/Min, "m") descriptor, we should check if
   660  	// we should evict an existing [/Min, /Max) descriptor.
   661  	entry, ok := rdc.rangeCache.cache.CeilEntry(rangeCacheKey(endMeta))
   662  	if ok {
   663  		cached := entry.Value.(*roachpb.RangeDescriptor)
   664  		// It might be possible that the range descriptor immediately following
   665  		// desc.EndKey does not contain desc.EndKey, so we explicitly check that it
   666  		// overlaps. For example, if we are inserting ["a", "c"), we don't want to
   667  		// check ["c", "d"). We do, however, want to check ["b", "c"), which is why
   668  		// the end key is inclusive.
   669  		if cached.StartKey.Less(desc.EndKey) && !cached.EndKey.Less(desc.EndKey) {
   670  			if desc.Generation <= cached.Generation {
   671  				// A newer descriptor already exists in cache.
   672  				newest = false
   673  			}
   674  			if newest {
   675  				entriesToEvict = append(entriesToEvict, entry)
   676  			}
   677  		}
   678  	}
   679  
   680  	// Try to clear any descriptors whose end key is contained by the descriptor
   681  	// we are inserting. We iterate from the range meta key after
   682  	// RangeMetaKey(desc.StartKey) to RangeMetaKey(desc.EndKey) to avoid clearing
   683  	// the descriptor that ends when desc starts. For example, if we are
   684  	// inserting ["b", "c"), we should not evict ["a", "b").
   685  	//
   686  	// Descriptors could be cleared from the cache in the event of a merge or a
   687  	// lot of concurrency. For example, if ranges ["a", "b") and ["b", "c") are
   688  	// merged, we should clear both of these if we are inserting ["a", "c").
   689  	rdc.rangeCache.cache.DoRangeEntry(func(e *cache.Entry) bool {
   690  		descriptor := e.Value.(*roachpb.RangeDescriptor)
   691  		// Check generations to see if we evict.
   692  		if desc.Generation <= descriptor.Generation {
   693  			newest = false
   694  		} else {
   695  			entriesToEvict = append(entriesToEvict, e)
   696  		}
   697  		return false
   698  	}, rangeCacheKey(startMeta.Next()), rangeCacheKey(endMeta))
   699  
   700  	for _, e := range entriesToEvict {
   701  		if log.V(2) {
   702  			log.Infof(ctx, "clearing overlapping descriptor: key=%s desc=%s",
   703  				e.Key, e.Value.(*roachpb.RangeDescriptor))
   704  		}
   705  		rdc.rangeCache.cache.DelEntry(e)
   706  	}
   707  	return newest
   708  }