gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/grpc/balancer/rls/internal/cache.go (about)

     1  /*
     2   *
     3   * Copyright 2021 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package rls
    20  
    21  import (
    22  	"container/list"
    23  	"time"
    24  
    25  	"gitee.com/ks-custle/core-gm/grpc/internal/backoff"
    26  	internalgrpclog "gitee.com/ks-custle/core-gm/grpc/internal/grpclog"
    27  	"gitee.com/ks-custle/core-gm/grpc/internal/grpcsync"
    28  )
    29  
    30  // TODO(easwars): Remove this once all RLS code is merged.
    31  //lint:file-ignore U1000 Ignore all unused code, not all code is merged yet.
    32  
    33  // cacheKey represents the key used to uniquely identify an entry in the data
    34  // cache and in the pending requests map.
    35  type cacheKey struct {
    36  	// path is the full path of the incoming RPC request.
    37  	path string
    38  	// keys is a stringified version of the RLS request key map built using the
    39  	// RLS keyBuilder. Since maps are not a type which is comparable in Go, it
    40  	// cannot be part of the key for another map (entries in the data cache and
    41  	// pending requests map are stored in maps).
    42  	keys string
    43  }
    44  
    45  // cacheEntry wraps all the data to be stored in a data cache entry.
    46  type cacheEntry struct {
    47  	// childPolicyWrappers contains the list of child policy wrappers
    48  	// corresponding to the targets returned by the RLS server for this entry.
    49  	childPolicyWrappers []*childPolicyWrapper
    50  	// headerData is received in the RLS response and is to be sent in the
    51  	// X-Google-RLS-Data header for matching RPCs.
    52  	headerData string
    53  	// expiryTime is the absolute time at which this cache entry entry stops
    54  	// being valid. When an RLS request succeeds, this is set to the current
    55  	// time plus the max_age field from the LB policy config.
    56  	expiryTime time.Time
    57  	// staleTime is the absolute time after which this cache entry will be
    58  	// proactively refreshed if an incoming RPC matches this entry. When an RLS
    59  	// request succeeds, this is set to the current time plus the stale_age from
    60  	// the LB policy config.
    61  	staleTime time.Time
    62  	// earliestEvictTime is the absolute time before which this entry should not
    63  	// be evicted from the cache. When a cache entry is created, this is set to
    64  	// the current time plus a default value of 5 seconds. This is required to
    65  	// make sure that a new entry added to the cache is not evicted before the
    66  	// RLS response arrives (usually when the cache is too small).
    67  	earliestEvictTime time.Time
    68  
    69  	// status stores the RPC status of the previous RLS request for this
    70  	// entry. Picks for entries with a non-nil value for this field are failed
    71  	// with the error stored here.
    72  	status error
    73  	// backoffState contains all backoff related state. When an RLS request
    74  	// succeeds, backoffState is reset. This state moves between the data cache
    75  	// and the pending requests map.
    76  	backoffState *backoffState
    77  	// backoffTime is the absolute time at which the backoff period for this
    78  	// entry ends. When an RLS request fails, this is set to the current time
    79  	// plus the backoff value returned by the backoffState. The backoff timer is
    80  	// also setup with this value. No new RLS requests are sent out for this
    81  	// entry until the backoff period ends.
    82  	//
    83  	// Set to zero time instant upon a successful RLS response.
    84  	backoffTime time.Time
    85  	// backoffExpiryTime is the absolute time at which an entry which has gone
    86  	// through backoff stops being valid.  When an RLS request fails, this is
    87  	// set to the current time plus twice the backoff time. The cache expiry
    88  	// timer will only delete entries for which both expiryTime and
    89  	// backoffExpiryTime are in the past.
    90  	//
    91  	// Set to zero time instant upon a successful RLS response.
    92  	backoffExpiryTime time.Time
    93  
    94  	// size stores the size of this cache entry. Used to enforce the cache size
    95  	// specified in the LB policy configuration.
    96  	size int64
    97  	// onEvict is the callback to be invoked when this cache entry is evicted.
    98  	onEvict func()
    99  }
   100  
   101  // backoffState wraps all backoff related state associated with a cache entry.
   102  type backoffState struct {
   103  	// retries keeps track of the number of RLS failures, to be able to
   104  	// determine the amount of time to backoff before the next attempt.
   105  	retries int
   106  	// bs is the exponential backoff implementation which returns the amount of
   107  	// time to backoff, given the number of retries.
   108  	bs backoff.Strategy
   109  	// timer fires when the backoff period ends and incoming requests after this
   110  	// will trigger a new RLS request.
   111  	timer *time.Timer
   112  }
   113  
   114  // lru is a cache implementation with a least recently used eviction policy.
   115  // Internally it uses a doubly linked list, with the least recently used element
   116  // at the front of the list and the most recently used element at the back of
   117  // the list. The value stored in this cache will be of type `cacheKey`.
   118  //
   119  // It is not safe for concurrent access.
   120  type lru struct {
   121  	ll *list.List
   122  
   123  	// A map from the value stored in the lru to its underlying list element is
   124  	// maintained to have a clean API. Without this, a subset of the lru's API
   125  	// would accept/return cacheKey while another subset would accept/return
   126  	// list elements.
   127  	m map[cacheKey]*list.Element
   128  }
   129  
   130  // newLRU creates a new cache with a least recently used eviction policy.
   131  func newLRU() *lru {
   132  	return &lru{
   133  		ll: list.New(),
   134  		m:  make(map[cacheKey]*list.Element),
   135  	}
   136  }
   137  
   138  func (l *lru) addEntry(key cacheKey) {
   139  	e := l.ll.PushBack(key)
   140  	l.m[key] = e
   141  }
   142  
   143  func (l *lru) makeRecent(key cacheKey) {
   144  	e := l.m[key]
   145  	l.ll.MoveToBack(e)
   146  }
   147  
   148  func (l *lru) removeEntry(key cacheKey) {
   149  	e := l.m[key]
   150  	l.ll.Remove(e)
   151  	delete(l.m, key)
   152  }
   153  
   154  func (l *lru) getLeastRecentlyUsed() cacheKey {
   155  	e := l.ll.Front()
   156  	if e == nil {
   157  		return cacheKey{}
   158  	}
   159  	return e.Value.(cacheKey)
   160  }
   161  
   162  // iterateAndRun traverses the lru in least-recently-used order and calls the
   163  // provided function for every element.
   164  //
   165  // Callers may delete the cache entry associated with the cacheKey passed into
   166  // f, but they may not perform any other operation which reorders the elements
   167  // in the lru.
   168  func (l *lru) iterateAndRun(f func(cacheKey)) {
   169  	var next *list.Element
   170  	for e := l.ll.Front(); e != nil; e = next {
   171  		next = e.Next()
   172  		f(e.Value.(cacheKey))
   173  	}
   174  }
   175  
   176  // dataCache contains a cache of RLS data used by the LB policy to make routing
   177  // decisions.
   178  //
   179  // The dataCache will be keyed by the request's path and keys, represented by
   180  // the `cacheKey` type. It will maintain the cache keys in an `lru` and the
   181  // cache data, represented by the `cacheEntry` type, in a native map.
   182  //
   183  // It is not safe for concurrent access.
   184  type dataCache struct {
   185  	maxSize     int64 // Maximum allowed size.
   186  	currentSize int64 // Current size.
   187  	keys        *lru  // Cache keys maintained in lru order.
   188  	entries     map[cacheKey]*cacheEntry
   189  	logger      *internalgrpclog.PrefixLogger
   190  	shutdown    *grpcsync.Event
   191  }
   192  
   193  func newDataCache(size int64, logger *internalgrpclog.PrefixLogger) *dataCache {
   194  	return &dataCache{
   195  		maxSize:  size,
   196  		keys:     newLRU(),
   197  		entries:  make(map[cacheKey]*cacheEntry),
   198  		logger:   logger,
   199  		shutdown: grpcsync.NewEvent(),
   200  	}
   201  }
   202  
   203  // resize changes the maximum allowed size of the data cache.
   204  //
   205  // The return value indicates if an entry with a valid backoff timer was
   206  // evicted. This is important to the RLS LB policy which would send a new picker
   207  // on the channel to re-process any RPCs queued as a result of this backoff
   208  // timer.
   209  func (dc *dataCache) resize(size int64) (backoffCancelled bool) {
   210  	if dc.shutdown.HasFired() {
   211  		return false
   212  	}
   213  
   214  	backoffCancelled = false
   215  	for dc.currentSize > size {
   216  		key := dc.keys.getLeastRecentlyUsed()
   217  		entry, ok := dc.entries[key]
   218  		if !ok {
   219  			// This should never happen.
   220  			dc.logger.Errorf("cacheKey %+v not found in the cache while attempting to resize it", key)
   221  			break
   222  		}
   223  
   224  		// When we encounter a cache entry whose minimum expiration time is in
   225  		// the future, we abort the LRU pass, which may temporarily leave the
   226  		// cache being too large. This is necessary to ensure that in cases
   227  		// where the cache is too small, when we receive an RLS Response, we
   228  		// keep the resulting cache entry around long enough for the pending
   229  		// incoming requests to be re-processed through the new Picker. If we
   230  		// didn't do this, then we'd risk throwing away each RLS response as we
   231  		// receive it, in which case we would fail to actually route any of our
   232  		// incoming requests.
   233  		if entry.earliestEvictTime.After(time.Now()) {
   234  			dc.logger.Warningf("cachekey %+v is too recent to be evicted. Stopping cache resizing for now", key)
   235  			break
   236  		}
   237  
   238  		// Stop the backoff timer before evicting the entry.
   239  		if entry.backoffState != nil && entry.backoffState.timer != nil {
   240  			if entry.backoffState.timer.Stop() {
   241  				entry.backoffState.timer = nil
   242  				backoffCancelled = true
   243  			}
   244  		}
   245  		dc.deleteAndcleanup(key, entry)
   246  	}
   247  	dc.maxSize = size
   248  	return backoffCancelled
   249  }
   250  
   251  // evictExpiredEntries sweeps through the cache and deletes expired entries. An
   252  // expired entry is one for which both the `expiryTime` and `backoffExpiryTime`
   253  // fields are in the past.
   254  //
   255  // The return value indicates if any expired entries were evicted.
   256  //
   257  // The LB policy invokes this method periodically to purge expired entries.
   258  func (dc *dataCache) evictExpiredEntries() (evicted bool) {
   259  	if dc.shutdown.HasFired() {
   260  		return false
   261  	}
   262  
   263  	evicted = false
   264  	dc.keys.iterateAndRun(func(key cacheKey) {
   265  		entry, ok := dc.entries[key]
   266  		if !ok {
   267  			// This should never happen.
   268  			dc.logger.Errorf("cacheKey %+v not found in the cache while attempting to perform periodic cleanup of expired entries", key)
   269  			return
   270  		}
   271  
   272  		// Only evict entries for which both the data expiration time and
   273  		// backoff expiration time fields are in the past.
   274  		now := time.Now()
   275  		if entry.expiryTime.After(now) || entry.backoffExpiryTime.After(now) {
   276  			return
   277  		}
   278  		evicted = true
   279  		dc.deleteAndcleanup(key, entry)
   280  	})
   281  	return evicted
   282  }
   283  
   284  // resetBackoffState sweeps through the cache and for entries with a backoff
   285  // state, the backoff timer is cancelled and the backoff state is reset. The
   286  // return value indicates if any entries were mutated in this fashion.
   287  //
   288  // The LB policy invokes this method when the control channel moves from READY
   289  // to TRANSIENT_FAILURE back to READY. See `monitorConnectivityState` method on
   290  // the `controlChannel` type for more details.
   291  func (dc *dataCache) resetBackoffState(newBackoffState *backoffState) (backoffReset bool) {
   292  	if dc.shutdown.HasFired() {
   293  		return false
   294  	}
   295  
   296  	backoffReset = false
   297  	dc.keys.iterateAndRun(func(key cacheKey) {
   298  		entry, ok := dc.entries[key]
   299  		if !ok {
   300  			// This should never happen.
   301  			dc.logger.Errorf("cacheKey %+v not found in the cache while attempting to perform periodic cleanup of expired entries", key)
   302  			return
   303  		}
   304  
   305  		if entry.backoffState == nil {
   306  			return
   307  		}
   308  		if entry.backoffState.timer != nil {
   309  			entry.backoffState.timer.Stop()
   310  			entry.backoffState.timer = nil
   311  		}
   312  		entry.backoffState = &backoffState{bs: newBackoffState.bs}
   313  		entry.backoffTime = time.Time{}
   314  		entry.backoffExpiryTime = time.Time{}
   315  		backoffReset = true
   316  	})
   317  	return backoffReset
   318  }
   319  
   320  // addEntry adds a cache entry for the given key.
   321  //
   322  // Return value backoffCancelled indicates if a cache entry with a valid backoff
   323  // timer was evicted to make space for the current entry. This is important to
   324  // the RLS LB policy which would send a new picker on the channel to re-process
   325  // any RPCs queued as a result of this backoff timer.
   326  //
   327  // Return value ok indicates if entry was successfully added to the cache.
   328  func (dc *dataCache) addEntry(key cacheKey, entry *cacheEntry) (backoffCancelled bool, ok bool) {
   329  	if dc.shutdown.HasFired() {
   330  		return false, false
   331  	}
   332  
   333  	// Handle the extremely unlikely case that a single entry is bigger than the
   334  	// size of the cache.
   335  	if entry.size > dc.maxSize {
   336  		return false, false
   337  	}
   338  	dc.entries[key] = entry
   339  	dc.currentSize += entry.size
   340  	dc.keys.addEntry(key)
   341  	// If the new entry makes the cache go over its configured size, remove some
   342  	// old entries.
   343  	if dc.currentSize > dc.maxSize {
   344  		backoffCancelled = dc.resize(dc.maxSize)
   345  	}
   346  	return backoffCancelled, true
   347  }
   348  
   349  // updateEntrySize updates the size of a cache entry and the current size of the
   350  // data cache. An entry's size can change upon receipt of an RLS response.
   351  func (dc *dataCache) updateEntrySize(entry *cacheEntry, newSize int64) {
   352  	dc.currentSize -= entry.size
   353  	entry.size = newSize
   354  	dc.currentSize += entry.size
   355  }
   356  
   357  func (dc *dataCache) getEntry(key cacheKey) *cacheEntry {
   358  	if dc.shutdown.HasFired() {
   359  		return nil
   360  	}
   361  
   362  	entry, ok := dc.entries[key]
   363  	if !ok {
   364  		return nil
   365  	}
   366  	dc.keys.makeRecent(key)
   367  	return entry
   368  }
   369  
   370  func (dc *dataCache) removeEntryForTesting(key cacheKey) {
   371  	entry, ok := dc.entries[key]
   372  	if !ok {
   373  		return
   374  	}
   375  	dc.deleteAndcleanup(key, entry)
   376  }
   377  
   378  // deleteAndCleanup performs actions required at the time of deleting an entry
   379  // from the data cache.
   380  // - the entry is removed from the map of entries
   381  // - current size of the data cache is update
   382  // - the key is removed from the LRU
   383  // - onEvict is invoked in a separate goroutine
   384  func (dc *dataCache) deleteAndcleanup(key cacheKey, entry *cacheEntry) {
   385  	delete(dc.entries, key)
   386  	dc.currentSize -= entry.size
   387  	dc.keys.removeEntry(key)
   388  	if entry.onEvict != nil {
   389  		go entry.onEvict()
   390  	}
   391  }
   392  
   393  func (dc *dataCache) stop() {
   394  	dc.keys.iterateAndRun(func(key cacheKey) {
   395  		entry, ok := dc.entries[key]
   396  		if !ok {
   397  			// This should never happen.
   398  			dc.logger.Errorf("cacheKey %+v not found in the cache while shutting down", key)
   399  			return
   400  		}
   401  		dc.deleteAndcleanup(key, entry)
   402  	})
   403  	dc.shutdown.Fire()
   404  }