istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/model/typed_xds_cache.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package model
    16  
    17  import (
    18  	"fmt"
    19  	"sync"
    20  	"time"
    21  
    22  	discovery "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
    23  	"github.com/google/go-cmp/cmp"
    24  	"github.com/hashicorp/golang-lru/v2/simplelru"
    25  	"google.golang.org/protobuf/testing/protocmp"
    26  
    27  	"istio.io/istio/pilot/pkg/features"
    28  	"istio.io/istio/pkg/monitoring"
    29  	"istio.io/istio/pkg/slices"
    30  	"istio.io/istio/pkg/util/sets"
    31  )
    32  
    33  var enableStats = func() bool {
    34  	return features.EnableXDSCacheMetrics
    35  }
    36  
    37  var (
    38  	xdsCacheReads = monitoring.NewSum(
    39  		"xds_cache_reads",
    40  		"Total number of xds cache xdsCacheReads.",
    41  		monitoring.WithEnabled(enableStats),
    42  	)
    43  
    44  	xdsCacheEvictions = monitoring.NewSum(
    45  		"xds_cache_evictions",
    46  		"Total number of xds cache evictions.",
    47  		monitoring.WithEnabled(enableStats),
    48  	)
    49  
    50  	xdsCacheSize = monitoring.NewGauge(
    51  		"xds_cache_size",
    52  		"Current size of xds cache",
    53  		monitoring.WithEnabled(enableStats),
    54  	)
    55  
    56  	dependentConfigSize = monitoring.NewGauge(
    57  		"xds_cache_dependent_config_size",
    58  		"Current size of dependent configs",
    59  		monitoring.WithEnabled(enableStats),
    60  	)
    61  
    62  	xdsCacheHits             = xdsCacheReads.With(typeTag.Value("hit"))
    63  	xdsCacheMisses           = xdsCacheReads.With(typeTag.Value("miss"))
    64  	xdsCacheEvictionsOnClear = xdsCacheEvictions.With(typeTag.Value("clear"))
    65  	xdsCacheEvictionsOnSize  = xdsCacheEvictions.With(typeTag.Value("size"))
    66  )
    67  
    68  func hit() {
    69  	xdsCacheHits.Increment()
    70  }
    71  
    72  func miss() {
    73  	xdsCacheMisses.Increment()
    74  }
    75  
    76  func size(cs int) {
    77  	xdsCacheSize.Record(float64(cs))
    78  }
    79  
    80  type CacheToken uint64
    81  
    82  type dependents interface {
    83  	DependentConfigs() []ConfigHash
    84  }
    85  
    86  // typedXdsCache interface defines a store for caching XDS responses.
    87  // All operations are thread safe.
    88  type typedXdsCache[K comparable] interface {
    89  	// Flush clears the evicted indexes.
    90  	Flush()
    91  	// Add adds the given key with the value and its dependents for the given pushContext to the cache.
    92  	// If the cache has been updated to a newer push context, the write will be dropped silently.
    93  	// This ensures stale data does not overwrite fresh data when dealing with concurrent
    94  	// writers.
    95  	Add(key K, entry dependents, pushRequest *PushRequest, value *discovery.Resource)
    96  	// Get retrieves the cached value if it exists.
    97  	Get(key K) *discovery.Resource
    98  	// Clear removes the cache entries that are dependent on the configs passed.
    99  	Clear(sets.Set[ConfigKey])
   100  	// ClearAll clears the entire cache.
   101  	ClearAll()
   102  	// Keys returns all currently configured keys. This is for testing/debug only
   103  	Keys() []K
   104  	// Snapshot returns a snapshot of all keys and values. This is for testing/debug only
   105  	Snapshot() []*discovery.Resource
   106  }
   107  
   108  // newTypedXdsCache returns an instance of a cache.
   109  func newTypedXdsCache[K comparable]() typedXdsCache[K] {
   110  	cache := &lruCache[K]{
   111  		enableAssertions: features.EnableUnsafeAssertions,
   112  		configIndex:      map[ConfigHash]sets.Set[K]{},
   113  		evictQueue:       make([]evictKeyConfigs[K], 0, 1000),
   114  	}
   115  	cache.store = newLru(cache.onEvict)
   116  	return cache
   117  }
   118  
   119  type evictKeyConfigs[K comparable] struct {
   120  	key              K
   121  	dependentConfigs []ConfigHash
   122  }
   123  
   124  type lruCache[K comparable] struct {
   125  	enableAssertions bool
   126  	store            simplelru.LRUCache[K, cacheValue]
   127  	// token stores the latest token of the store, used to prevent stale data overwrite.
   128  	// It is refreshed when Clear or ClearAll are called
   129  	token       CacheToken
   130  	mu          sync.RWMutex
   131  	configIndex map[ConfigHash]sets.Set[K]
   132  
   133  	evictQueue []evictKeyConfigs[K]
   134  
   135  	// mark whether a key is evicted on Clear call, passively.
   136  	evictedOnClear bool
   137  }
   138  
   139  var _ typedXdsCache[uint64] = &lruCache[uint64]{}
   140  
   141  func newLru[K comparable](evictCallback simplelru.EvictCallback[K, cacheValue]) simplelru.LRUCache[K, cacheValue] {
   142  	sz := features.XDSCacheMaxSize
   143  	if sz <= 0 {
   144  		sz = 20000
   145  	}
   146  	l, err := simplelru.NewLRU(sz, evictCallback)
   147  	if err != nil {
   148  		panic(fmt.Errorf("invalid lru configuration: %v", err))
   149  	}
   150  	return l
   151  }
   152  
   153  func (l *lruCache[K]) Flush() {
   154  	l.mu.Lock()
   155  	for _, keyConfigs := range l.evictQueue {
   156  		l.clearConfigIndex(keyConfigs.key, keyConfigs.dependentConfigs)
   157  	}
   158  	// The underlying array releases references to elements so that they can be garbage collected.
   159  	clear(l.evictQueue)
   160  	l.evictQueue = l.evictQueue[:0:1000]
   161  
   162  	l.recordDependentConfigSize()
   163  	l.mu.Unlock()
   164  }
   165  
   166  func (l *lruCache[K]) recordDependentConfigSize() {
   167  	if !enableStats() {
   168  		return
   169  	}
   170  	dsize := 0
   171  	for _, dependents := range l.configIndex {
   172  		dsize += len(dependents)
   173  	}
   174  	dependentConfigSize.Record(float64(dsize))
   175  }
   176  
   177  // This is the callback passed to LRU, it will be called whenever a key is removed.
   178  func (l *lruCache[K]) onEvict(k K, v cacheValue) {
   179  	if l.evictedOnClear {
   180  		xdsCacheEvictionsOnClear.Increment()
   181  	} else {
   182  		xdsCacheEvictionsOnSize.Increment()
   183  	}
   184  
   185  	// async clearing indexes
   186  	l.evictQueue = append(l.evictQueue, evictKeyConfigs[K]{k, v.dependentConfigs})
   187  }
   188  
   189  func (l *lruCache[K]) updateConfigIndex(k K, dependentConfigs []ConfigHash) {
   190  	for _, cfg := range dependentConfigs {
   191  		sets.InsertOrNew(l.configIndex, cfg, k)
   192  	}
   193  }
   194  
   195  func (l *lruCache[K]) clearConfigIndex(k K, dependentConfigs []ConfigHash) {
   196  	c, exists := l.store.Get(k)
   197  	if exists {
   198  		newDependents := c.dependentConfigs
   199  		// we only need to clear configs {old difference new}
   200  		dependents := sets.New(dependentConfigs...).DifferenceInPlace(sets.New(newDependents...))
   201  		for cfg := range dependents {
   202  			sets.DeleteCleanupLast(l.configIndex, cfg, k)
   203  		}
   204  		return
   205  	}
   206  	for _, cfg := range dependentConfigs {
   207  		sets.DeleteCleanupLast(l.configIndex, cfg, k)
   208  	}
   209  }
   210  
   211  // assertUnchanged checks that a cache entry is not changed. This helps catch bad cache invalidation
   212  // We should never have a case where we overwrite an existing item with a new change. Instead, when
   213  // config sources change, Clear/ClearAll should be called. At this point, we may get multiple writes
   214  // because multiple writers may get cache misses concurrently, but they ought to generate identical
   215  // configuration. This also checks that our XDS config generation is deterministic, which is a very
   216  // important property.
   217  func (l *lruCache[K]) assertUnchanged(key K, existing *discovery.Resource, replacement *discovery.Resource) {
   218  	if l.enableAssertions {
   219  		if existing == nil {
   220  			// This is a new addition, not an update
   221  			return
   222  		}
   223  		// Record time so that we can correlate when the error actually happened, since the async reporting
   224  		// may be delayed
   225  		t0 := time.Now()
   226  		// This operation is really slow, which makes tests fail for unrelated reasons, so we process it async.
   227  		go func() {
   228  			if !cmp.Equal(existing, replacement, protocmp.Transform()) {
   229  				warning := fmt.Errorf("assertion failed at %v, cache entry changed but not cleared for key %v: %v\n%v\n%v",
   230  					t0, key, cmp.Diff(existing, replacement, protocmp.Transform()), existing, replacement)
   231  				panic(warning)
   232  			}
   233  		}()
   234  	}
   235  }
   236  
   237  func (l *lruCache[K]) Add(k K, entry dependents, pushReq *PushRequest, value *discovery.Resource) {
   238  	if pushReq == nil || pushReq.Start.Equal(time.Time{}) {
   239  		return
   240  	}
   241  	// It will not overflow until year 2262
   242  	token := CacheToken(pushReq.Start.UnixNano())
   243  	l.mu.Lock()
   244  	defer l.mu.Unlock()
   245  	if token < l.token {
   246  		// entry may be stale, we need to drop it. This can happen when the cache is invalidated
   247  		// after we call Clear or ClearAll.
   248  		return
   249  	}
   250  	cur, f := l.store.Get(k)
   251  	if f {
   252  		// This is the stale or same resource
   253  		if token <= cur.token {
   254  			return
   255  		}
   256  		if l.enableAssertions {
   257  			l.assertUnchanged(k, cur.value, value)
   258  		}
   259  	}
   260  
   261  	dependentConfigs := entry.DependentConfigs()
   262  	toWrite := cacheValue{value: value, token: token, dependentConfigs: dependentConfigs}
   263  	l.store.Add(k, toWrite)
   264  	l.token = token
   265  	l.updateConfigIndex(k, dependentConfigs)
   266  
   267  	// we have to make sure we evict old entries with the same key
   268  	// to prevent leaking in the index maps
   269  	if f {
   270  		l.evictQueue = append(l.evictQueue, evictKeyConfigs[K]{k, cur.dependentConfigs})
   271  	}
   272  	size(l.store.Len())
   273  }
   274  
   275  type cacheValue struct {
   276  	value            *discovery.Resource
   277  	token            CacheToken
   278  	dependentConfigs []ConfigHash
   279  }
   280  
   281  func (l *lruCache[K]) Get(key K) *discovery.Resource {
   282  	return l.get(key, 0)
   283  }
   284  
   285  // get return the cached value if it exists.
   286  func (l *lruCache[K]) get(key K, token CacheToken) *discovery.Resource {
   287  	l.mu.Lock()
   288  	defer l.mu.Unlock()
   289  	cv, ok := l.store.Get(key)
   290  	if !ok || cv.value == nil {
   291  		miss()
   292  		return nil
   293  	}
   294  	if cv.token >= token {
   295  		hit()
   296  		return cv.value
   297  	}
   298  	miss()
   299  	return nil
   300  }
   301  
   302  func (l *lruCache[K]) Clear(configs sets.Set[ConfigKey]) {
   303  	l.mu.Lock()
   304  	defer l.mu.Unlock()
   305  	l.token = CacheToken(time.Now().UnixNano())
   306  	l.evictedOnClear = true
   307  	defer func() {
   308  		l.evictedOnClear = false
   309  	}()
   310  	for ckey := range configs {
   311  		hc := ckey.HashCode()
   312  		referenced := l.configIndex[hc]
   313  		delete(l.configIndex, hc)
   314  		for key := range referenced {
   315  			l.store.Remove(key)
   316  		}
   317  	}
   318  	size(l.store.Len())
   319  }
   320  
   321  func (l *lruCache[K]) ClearAll() {
   322  	l.mu.Lock()
   323  	defer l.mu.Unlock()
   324  	l.token = CacheToken(time.Now().UnixNano())
   325  	// Purge with an evict function would turn up to be pretty slow since
   326  	// it runs the function for every key in the store, might be better to just
   327  	// create a new store.
   328  	l.store = newLru(l.onEvict)
   329  	l.configIndex = map[ConfigHash]sets.Set[K]{}
   330  
   331  	// The underlying array releases references to elements so that they can be garbage collected.
   332  	clear(l.evictQueue)
   333  	l.evictQueue = l.evictQueue[:0:1000]
   334  
   335  	size(l.store.Len())
   336  }
   337  
   338  func (l *lruCache[K]) Keys() []K {
   339  	l.mu.RLock()
   340  	defer l.mu.RUnlock()
   341  	return slices.Clone(l.store.Keys())
   342  }
   343  
   344  func (l *lruCache[K]) Snapshot() []*discovery.Resource {
   345  	l.mu.RLock()
   346  	defer l.mu.RUnlock()
   347  	iKeys := l.store.Keys()
   348  	res := make([]*discovery.Resource, len(iKeys))
   349  	for i, ik := range iKeys {
   350  		v, ok := l.store.Get(ik)
   351  		if !ok {
   352  			continue
   353  		}
   354  
   355  		res[i] = v.value
   356  	}
   357  	return res
   358  }
   359  
   360  func (l *lruCache[K]) indexLength() int {
   361  	l.mu.RLock()
   362  	defer l.mu.RUnlock()
   363  	return len(l.configIndex)
   364  }
   365  
   366  func (l *lruCache[K]) configIndexSnapshot() map[ConfigHash]sets.Set[K] {
   367  	l.mu.RLock()
   368  	defer l.mu.RUnlock()
   369  	res := make(map[ConfigHash]sets.Set[K], len(l.configIndex))
   370  	for k, v := range l.configIndex {
   371  		res[k] = v
   372  	}
   373  	return res
   374  }
   375  
   376  // disabledCache is a cache that is always empty
   377  type disabledCache[K comparable] struct{}
   378  
   379  var _ typedXdsCache[uint64] = &disabledCache[uint64]{}
   380  
   381  func (d disabledCache[K]) Flush() {
   382  }
   383  
   384  func (d disabledCache[K]) Add(k K, entry dependents, pushReq *PushRequest, value *discovery.Resource) {
   385  }
   386  
   387  func (d disabledCache[K]) Get(k K) *discovery.Resource {
   388  	return nil
   389  }
   390  
   391  func (d disabledCache[K]) Clear(configsUpdated sets.Set[ConfigKey]) {}
   392  
   393  func (d disabledCache[K]) ClearAll() {}
   394  
   395  func (d disabledCache[K]) Keys() []K { return nil }
   396  
   397  func (d disabledCache[K]) Snapshot() []*discovery.Resource { return nil }