istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/model/typed_xds_cache.go (about) 1 // Copyright Istio Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package model 16 17 import ( 18 "fmt" 19 "sync" 20 "time" 21 22 discovery "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" 23 "github.com/google/go-cmp/cmp" 24 "github.com/hashicorp/golang-lru/v2/simplelru" 25 "google.golang.org/protobuf/testing/protocmp" 26 27 "istio.io/istio/pilot/pkg/features" 28 "istio.io/istio/pkg/monitoring" 29 "istio.io/istio/pkg/slices" 30 "istio.io/istio/pkg/util/sets" 31 ) 32 33 var enableStats = func() bool { 34 return features.EnableXDSCacheMetrics 35 } 36 37 var ( 38 xdsCacheReads = monitoring.NewSum( 39 "xds_cache_reads", 40 "Total number of xds cache xdsCacheReads.", 41 monitoring.WithEnabled(enableStats), 42 ) 43 44 xdsCacheEvictions = monitoring.NewSum( 45 "xds_cache_evictions", 46 "Total number of xds cache evictions.", 47 monitoring.WithEnabled(enableStats), 48 ) 49 50 xdsCacheSize = monitoring.NewGauge( 51 "xds_cache_size", 52 "Current size of xds cache", 53 monitoring.WithEnabled(enableStats), 54 ) 55 56 dependentConfigSize = monitoring.NewGauge( 57 "xds_cache_dependent_config_size", 58 "Current size of dependent configs", 59 monitoring.WithEnabled(enableStats), 60 ) 61 62 xdsCacheHits = xdsCacheReads.With(typeTag.Value("hit")) 63 xdsCacheMisses = xdsCacheReads.With(typeTag.Value("miss")) 64 xdsCacheEvictionsOnClear = xdsCacheEvictions.With(typeTag.Value("clear")) 65 xdsCacheEvictionsOnSize = xdsCacheEvictions.With(typeTag.Value("size")) 66 ) 67 68 func hit() { 69 xdsCacheHits.Increment() 70 } 71 72 func miss() { 73 xdsCacheMisses.Increment() 74 } 75 76 func size(cs int) { 77 xdsCacheSize.Record(float64(cs)) 78 } 79 80 type CacheToken uint64 81 82 type dependents interface { 83 DependentConfigs() []ConfigHash 84 } 85 86 // typedXdsCache interface defines a store for caching XDS responses. 87 // All operations are thread safe. 88 type typedXdsCache[K comparable] interface { 89 // Flush clears the evicted indexes. 90 Flush() 91 // Add adds the given key with the value and its dependents for the given pushContext to the cache. 92 // If the cache has been updated to a newer push context, the write will be dropped silently. 93 // This ensures stale data does not overwrite fresh data when dealing with concurrent 94 // writers. 95 Add(key K, entry dependents, pushRequest *PushRequest, value *discovery.Resource) 96 // Get retrieves the cached value if it exists. 97 Get(key K) *discovery.Resource 98 // Clear removes the cache entries that are dependent on the configs passed. 99 Clear(sets.Set[ConfigKey]) 100 // ClearAll clears the entire cache. 101 ClearAll() 102 // Keys returns all currently configured keys. This is for testing/debug only 103 Keys() []K 104 // Snapshot returns a snapshot of all keys and values. This is for testing/debug only 105 Snapshot() []*discovery.Resource 106 } 107 108 // newTypedXdsCache returns an instance of a cache. 109 func newTypedXdsCache[K comparable]() typedXdsCache[K] { 110 cache := &lruCache[K]{ 111 enableAssertions: features.EnableUnsafeAssertions, 112 configIndex: map[ConfigHash]sets.Set[K]{}, 113 evictQueue: make([]evictKeyConfigs[K], 0, 1000), 114 } 115 cache.store = newLru(cache.onEvict) 116 return cache 117 } 118 119 type evictKeyConfigs[K comparable] struct { 120 key K 121 dependentConfigs []ConfigHash 122 } 123 124 type lruCache[K comparable] struct { 125 enableAssertions bool 126 store simplelru.LRUCache[K, cacheValue] 127 // token stores the latest token of the store, used to prevent stale data overwrite. 128 // It is refreshed when Clear or ClearAll are called 129 token CacheToken 130 mu sync.RWMutex 131 configIndex map[ConfigHash]sets.Set[K] 132 133 evictQueue []evictKeyConfigs[K] 134 135 // mark whether a key is evicted on Clear call, passively. 136 evictedOnClear bool 137 } 138 139 var _ typedXdsCache[uint64] = &lruCache[uint64]{} 140 141 func newLru[K comparable](evictCallback simplelru.EvictCallback[K, cacheValue]) simplelru.LRUCache[K, cacheValue] { 142 sz := features.XDSCacheMaxSize 143 if sz <= 0 { 144 sz = 20000 145 } 146 l, err := simplelru.NewLRU(sz, evictCallback) 147 if err != nil { 148 panic(fmt.Errorf("invalid lru configuration: %v", err)) 149 } 150 return l 151 } 152 153 func (l *lruCache[K]) Flush() { 154 l.mu.Lock() 155 for _, keyConfigs := range l.evictQueue { 156 l.clearConfigIndex(keyConfigs.key, keyConfigs.dependentConfigs) 157 } 158 // The underlying array releases references to elements so that they can be garbage collected. 159 clear(l.evictQueue) 160 l.evictQueue = l.evictQueue[:0:1000] 161 162 l.recordDependentConfigSize() 163 l.mu.Unlock() 164 } 165 166 func (l *lruCache[K]) recordDependentConfigSize() { 167 if !enableStats() { 168 return 169 } 170 dsize := 0 171 for _, dependents := range l.configIndex { 172 dsize += len(dependents) 173 } 174 dependentConfigSize.Record(float64(dsize)) 175 } 176 177 // This is the callback passed to LRU, it will be called whenever a key is removed. 178 func (l *lruCache[K]) onEvict(k K, v cacheValue) { 179 if l.evictedOnClear { 180 xdsCacheEvictionsOnClear.Increment() 181 } else { 182 xdsCacheEvictionsOnSize.Increment() 183 } 184 185 // async clearing indexes 186 l.evictQueue = append(l.evictQueue, evictKeyConfigs[K]{k, v.dependentConfigs}) 187 } 188 189 func (l *lruCache[K]) updateConfigIndex(k K, dependentConfigs []ConfigHash) { 190 for _, cfg := range dependentConfigs { 191 sets.InsertOrNew(l.configIndex, cfg, k) 192 } 193 } 194 195 func (l *lruCache[K]) clearConfigIndex(k K, dependentConfigs []ConfigHash) { 196 c, exists := l.store.Get(k) 197 if exists { 198 newDependents := c.dependentConfigs 199 // we only need to clear configs {old difference new} 200 dependents := sets.New(dependentConfigs...).DifferenceInPlace(sets.New(newDependents...)) 201 for cfg := range dependents { 202 sets.DeleteCleanupLast(l.configIndex, cfg, k) 203 } 204 return 205 } 206 for _, cfg := range dependentConfigs { 207 sets.DeleteCleanupLast(l.configIndex, cfg, k) 208 } 209 } 210 211 // assertUnchanged checks that a cache entry is not changed. This helps catch bad cache invalidation 212 // We should never have a case where we overwrite an existing item with a new change. Instead, when 213 // config sources change, Clear/ClearAll should be called. At this point, we may get multiple writes 214 // because multiple writers may get cache misses concurrently, but they ought to generate identical 215 // configuration. This also checks that our XDS config generation is deterministic, which is a very 216 // important property. 217 func (l *lruCache[K]) assertUnchanged(key K, existing *discovery.Resource, replacement *discovery.Resource) { 218 if l.enableAssertions { 219 if existing == nil { 220 // This is a new addition, not an update 221 return 222 } 223 // Record time so that we can correlate when the error actually happened, since the async reporting 224 // may be delayed 225 t0 := time.Now() 226 // This operation is really slow, which makes tests fail for unrelated reasons, so we process it async. 227 go func() { 228 if !cmp.Equal(existing, replacement, protocmp.Transform()) { 229 warning := fmt.Errorf("assertion failed at %v, cache entry changed but not cleared for key %v: %v\n%v\n%v", 230 t0, key, cmp.Diff(existing, replacement, protocmp.Transform()), existing, replacement) 231 panic(warning) 232 } 233 }() 234 } 235 } 236 237 func (l *lruCache[K]) Add(k K, entry dependents, pushReq *PushRequest, value *discovery.Resource) { 238 if pushReq == nil || pushReq.Start.Equal(time.Time{}) { 239 return 240 } 241 // It will not overflow until year 2262 242 token := CacheToken(pushReq.Start.UnixNano()) 243 l.mu.Lock() 244 defer l.mu.Unlock() 245 if token < l.token { 246 // entry may be stale, we need to drop it. This can happen when the cache is invalidated 247 // after we call Clear or ClearAll. 248 return 249 } 250 cur, f := l.store.Get(k) 251 if f { 252 // This is the stale or same resource 253 if token <= cur.token { 254 return 255 } 256 if l.enableAssertions { 257 l.assertUnchanged(k, cur.value, value) 258 } 259 } 260 261 dependentConfigs := entry.DependentConfigs() 262 toWrite := cacheValue{value: value, token: token, dependentConfigs: dependentConfigs} 263 l.store.Add(k, toWrite) 264 l.token = token 265 l.updateConfigIndex(k, dependentConfigs) 266 267 // we have to make sure we evict old entries with the same key 268 // to prevent leaking in the index maps 269 if f { 270 l.evictQueue = append(l.evictQueue, evictKeyConfigs[K]{k, cur.dependentConfigs}) 271 } 272 size(l.store.Len()) 273 } 274 275 type cacheValue struct { 276 value *discovery.Resource 277 token CacheToken 278 dependentConfigs []ConfigHash 279 } 280 281 func (l *lruCache[K]) Get(key K) *discovery.Resource { 282 return l.get(key, 0) 283 } 284 285 // get return the cached value if it exists. 286 func (l *lruCache[K]) get(key K, token CacheToken) *discovery.Resource { 287 l.mu.Lock() 288 defer l.mu.Unlock() 289 cv, ok := l.store.Get(key) 290 if !ok || cv.value == nil { 291 miss() 292 return nil 293 } 294 if cv.token >= token { 295 hit() 296 return cv.value 297 } 298 miss() 299 return nil 300 } 301 302 func (l *lruCache[K]) Clear(configs sets.Set[ConfigKey]) { 303 l.mu.Lock() 304 defer l.mu.Unlock() 305 l.token = CacheToken(time.Now().UnixNano()) 306 l.evictedOnClear = true 307 defer func() { 308 l.evictedOnClear = false 309 }() 310 for ckey := range configs { 311 hc := ckey.HashCode() 312 referenced := l.configIndex[hc] 313 delete(l.configIndex, hc) 314 for key := range referenced { 315 l.store.Remove(key) 316 } 317 } 318 size(l.store.Len()) 319 } 320 321 func (l *lruCache[K]) ClearAll() { 322 l.mu.Lock() 323 defer l.mu.Unlock() 324 l.token = CacheToken(time.Now().UnixNano()) 325 // Purge with an evict function would turn up to be pretty slow since 326 // it runs the function for every key in the store, might be better to just 327 // create a new store. 328 l.store = newLru(l.onEvict) 329 l.configIndex = map[ConfigHash]sets.Set[K]{} 330 331 // The underlying array releases references to elements so that they can be garbage collected. 332 clear(l.evictQueue) 333 l.evictQueue = l.evictQueue[:0:1000] 334 335 size(l.store.Len()) 336 } 337 338 func (l *lruCache[K]) Keys() []K { 339 l.mu.RLock() 340 defer l.mu.RUnlock() 341 return slices.Clone(l.store.Keys()) 342 } 343 344 func (l *lruCache[K]) Snapshot() []*discovery.Resource { 345 l.mu.RLock() 346 defer l.mu.RUnlock() 347 iKeys := l.store.Keys() 348 res := make([]*discovery.Resource, len(iKeys)) 349 for i, ik := range iKeys { 350 v, ok := l.store.Get(ik) 351 if !ok { 352 continue 353 } 354 355 res[i] = v.value 356 } 357 return res 358 } 359 360 func (l *lruCache[K]) indexLength() int { 361 l.mu.RLock() 362 defer l.mu.RUnlock() 363 return len(l.configIndex) 364 } 365 366 func (l *lruCache[K]) configIndexSnapshot() map[ConfigHash]sets.Set[K] { 367 l.mu.RLock() 368 defer l.mu.RUnlock() 369 res := make(map[ConfigHash]sets.Set[K], len(l.configIndex)) 370 for k, v := range l.configIndex { 371 res[k] = v 372 } 373 return res 374 } 375 376 // disabledCache is a cache that is always empty 377 type disabledCache[K comparable] struct{} 378 379 var _ typedXdsCache[uint64] = &disabledCache[uint64]{} 380 381 func (d disabledCache[K]) Flush() { 382 } 383 384 func (d disabledCache[K]) Add(k K, entry dependents, pushReq *PushRequest, value *discovery.Resource) { 385 } 386 387 func (d disabledCache[K]) Get(k K) *discovery.Resource { 388 return nil 389 } 390 391 func (d disabledCache[K]) Clear(configsUpdated sets.Set[ConfigKey]) {} 392 393 func (d disabledCache[K]) ClearAll() {} 394 395 func (d disabledCache[K]) Keys() []K { return nil } 396 397 func (d disabledCache[K]) Snapshot() []*discovery.Resource { return nil }