sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/cache/cache.go (about) 1 /* 2 Copyright 2021 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package cache 18 19 import ( 20 "fmt" 21 "sync" 22 23 "github.com/hashicorp/golang-lru/simplelru" 24 "github.com/sirupsen/logrus" 25 ) 26 27 // Overview 28 // 29 // LRUCache uses an off-the-shelf LRU cache library for the low-level 30 // caching implementation, which uses the empty interface for keys and values. 31 // The values are what we store in the cache, and to retrieve them, we have to 32 // provide a key (which must be a hashable object). We wrap this cache with a 33 // single lock, and use an algorithm for a concurrent non-blocking cache to make 34 // it both thread-safe and also resistant to so-called cache stampedes, where 35 // many concurrent threads all attempt to look up the same (missing) key/value 36 // pair from the cache (see Alan Donovan and Brian Kernighan, "The Go 37 // Programming Language" (Addison-Wesley, 2016), p. 277). 38 // 39 // In practical terms, this means that if 1000 requests come in at the same time 40 // for the same key, only the first one will perform the value construction 41 // while the other 999 will wait for this first goroutine to finish resolving 42 // the value. This property makes this cache resilient and is what is meant by 43 // "non-blocking". 44 45 // LRUCache is the actual concurrent non-blocking cache. 46 type LRUCache struct { 47 *sync.Mutex 48 *simplelru.LRU 49 callbacks Callbacks 50 } 51 52 // Callbacks stores various callbacks that may fire during the lifetime of an 53 // LRUCache. 54 // 55 // NOTE: You must make sure that your callbacks are able to return quickly, 56 // because having slow callbacks will result in degraded cache performance 57 // (because the cache invokes your callbacks synchronously). The reason why we 58 // do this synchronously (and not invoke callbacks in a separate goroutine 59 // ourselves) is because we want to give users the flexibility to do that 60 // themselves. Hard-coding in a `go ...` invocation in our callback call sites 61 // would risk unnecessarily costing performance if the callbacks themselves are 62 // already optimized to return quickly. 63 type Callbacks struct { 64 LookupsCallback EventCallback 65 HitsCallback EventCallback 66 MissesCallback EventCallback 67 ForcedEvictionsCallback simplelru.EvictCallback 68 ManualEvictionsCallback EventCallback 69 } 70 71 // EventCallback is similar to simplelru.EvictCallback, except that it doesn't 72 // take a value argument. 73 type EventCallback func(key interface{}) 74 75 // ValConstructor is used to construct a value. The assumption is that this 76 // ValConstructor is expensive to compute, and that we need to memoize it via 77 // the LRUCache. The raw values of a cache are only constructed after a cache 78 // miss (and only the first cache miss). Using this type allows us to use any 79 // arbitrary function whose resulting value needs to be memoized (saved in the 80 // cache). This type also allows us to delay running the expensive computation 81 // until we actually need it (after a cache miss). 82 type ValConstructor func() (interface{}, error) 83 84 // Promise is a wrapper around cache value construction; it is used to 85 // synchronize the to-be-cached value between the first thread that undergoes a 86 // cache miss and subsequent threads that attempt to look up the same cache 87 // entry (cache hit). When the Promise is resolved (when the 88 // "valConstructionPending" channel is closed), the value is ready for 89 // concurrent reads. 90 type Promise struct { 91 valConstructor ValConstructor 92 valConstructionPending chan struct{} 93 val interface{} 94 err error 95 } 96 97 func newPromise(valConstructor ValConstructor) *Promise { 98 return &Promise{ 99 valConstructor: valConstructor, 100 valConstructionPending: make(chan struct{}), 101 } 102 } 103 104 // waitForResolution blocks the current thread until the first thread that 105 // detected a cache miss has finished constructing the value (see resolve()). 106 func (p *Promise) waitForResolution() { 107 <-p.valConstructionPending 108 } 109 110 // resolve resolves the Promise by constructing the value and closing the 111 // valConstructionPending channel, thereby unblocking any other thread that has 112 // been waiting for the value to be constructed. 113 func (p *Promise) resolve() { 114 p.val, p.err = p.valConstructor() 115 close(p.valConstructionPending) 116 } 117 118 // NewLRUCache returns a new LRUCache with a given size (number of elements). 119 // The forcedEvictionsCallback is a function that is called when an eviction occurs in the 120 // underlying cache. 121 func NewLRUCache(size int, 122 callbacks Callbacks) (*LRUCache, error) { 123 cache, err := simplelru.NewLRU(size, callbacks.ForcedEvictionsCallback) 124 if err != nil { 125 return nil, err 126 } 127 128 return &LRUCache{ 129 &sync.Mutex{}, 130 cache, 131 callbacks, 132 }, nil 133 } 134 135 // GetOrAdd tries to use a cache if it is available to get a Value. It is 136 // assumed that Value is expensive to construct from scratch, which is the 137 // reason why we try to use the cache in the first place. If we do end up 138 // constructing a Value from scratch, we store it into the cache with a 139 // corresponding key, so that we can look up the Value with just the key in the 140 // future. 141 // 142 // This cache is resistant to cache stampedes because it uses a duplicate 143 // suppression strategy. This is also called request coalescing. 144 func (lruCache *LRUCache) GetOrAdd( 145 key interface{}, 146 valConstructor ValConstructor) (interface{}, bool, error) { 147 148 // Cache lookup. 149 if lruCache.callbacks.LookupsCallback != nil { 150 lruCache.callbacks.LookupsCallback(key) 151 } 152 lruCache.Lock() 153 var promise *Promise 154 var ok bool 155 maybePromise, promisePending := lruCache.Get(key) 156 157 if promisePending { 158 // A promise exists, BUT the wrapped value inside it (p.val) might 159 // not be written to yet by the thread that is actually resolving the 160 // promise. 161 // 162 // For now we just unlock the overall lruCache itself so that it can 163 // service other GetOrAdd() calls to it. 164 lruCache.Unlock() 165 166 // Record the cache "hit". To be more precise, there are actually two 167 // possibilities here --- either the value is already ready to be 168 // consumed (a true cache hit), or the value is being constructed and we 169 // have to wait for the promise to resolve first. 170 if lruCache.callbacks.HitsCallback != nil { 171 lruCache.callbacks.HitsCallback(key) 172 } 173 174 // If the type is not a Promise type, there's no need to wait and we can 175 // just return immediately with an error. 176 promise, ok = maybePromise.(*Promise) 177 if !ok { 178 err := fmt.Errorf("Programmer error: expected cache entry type '*Promise', got '%T'", maybePromise) 179 logrus.WithField("key", key).Error(err) 180 return nil, false, err 181 } 182 183 // Block until the first thread originally created this promise has 184 // finished resolving it. Then it's safe to return the resolved values 185 // of the promise below. 186 // 187 // If the original thread resolved the promise already a long time ago 188 // (by calling resolve()), then this this waitForResolution() will 189 // finish immediately and we will not block at all. 190 promise.waitForResolution() 191 } else { 192 // No promise exists for this key. In other words, we are the first 193 // thread to ask for this key's value and so We have no choice but to 194 // construct the value ourselves (this call is expensive!) and add it to 195 // the cache. 196 // 197 // If there are other concurrent threads that call GetOrAdd() with the 198 // same key and corresponding value constructor, we force them to use 199 // the same value as us (so that they don't have to also call 200 // valConstructor()). We do this with the following algorithm: 201 // 202 // 1. immediately create a Promise to construct the value; 203 // 2. actually construct the value (expensive operation); 204 // 3. resolve the promise to alert all threads looking at the same Promise 205 // get the value from Step 2. 206 // 207 // This mitigation strategy is a kind of "duplicate suppression", also 208 // called "request coalescing". The problem of having to deal with a 209 // flood of multiple requests for the same cache entry is also called 210 // "cache stampede". 211 212 // Step 1 213 // 214 // Let other threads know about our promise to construct the value. We 215 // don't care if the underlying LRU cache had to evict an existing 216 // entry. 217 promise = newPromise(valConstructor) 218 _ = lruCache.Add(key, promise) 219 // We must unlock here so that the cache does not block other GetOrAdd() 220 // calls to it for different (or same) key/value pairs. 221 lruCache.Unlock() 222 223 // Record the cache miss. 224 if lruCache.callbacks.MissesCallback != nil { 225 lruCache.callbacks.MissesCallback(key) 226 } 227 228 // Step 2 & 3 229 // 230 // Construct the value (expensive operation), and broadcast to all 231 // watchers of this promise that it is ready to be read from (no data 232 // race!). 233 promise.resolve() 234 235 // If the value construction (expensive operation) failed, then we 236 // delete the cached entry so that we may attempt to re-try again in the 237 // future (instead of waiting for the LRUCache to evict it on its own 238 // over time). 239 // 240 // NOTE: It may be the case that the underlying lruCache itself decided 241 // to evict this key by the time we try to Lock() it here and evict it 242 // ourselves. I.e., it may be the case that the lruCache evicted our key 243 // because there just happened to be a massive load of calls with lots 244 // of different keys, forcing all old cached values to be evicted. But 245 // this is a minor concern because (1) it is unlikely to happen and (2) 246 // even if it does happen, our eviction will be a NOP because the key we 247 // want to delete wouldn't be in the cache anyway (it's already been 248 // evicted!). 249 // 250 // Another possibility is that by the time we run attempt to delete the 251 // key here, there has been not only an eviction of this same key, but 252 // the creation of another entry with the same key with valid results. 253 // So at worst we would be wrongfully invalidating a cache entry. 254 // 255 // TODO: If our cache implementation supports a TTL mechanism, then we 256 // could just set that instead and let the cached entry expire on its 257 // own (we would not have to do this eviction ourselves manually). 258 if promise.err != nil { 259 logrus.WithField("key", key).Infof("promise was successfully resolved, but the call to resolve() returned an error; deleting key from cache...") 260 261 lruCache.Lock() 262 weDeletedThisKey := lruCache.Remove(key) 263 lruCache.Unlock() 264 if weDeletedThisKey { 265 if lruCache.callbacks.ManualEvictionsCallback != nil { 266 lruCache.callbacks.ManualEvictionsCallback(key) 267 } 268 logrus.WithField("key", key).Infof("successfully deleted") 269 } else { 270 err := fmt.Errorf("unexpected (non-problematic) race: key deleted by the cache without our knowledge; our own deletion of this key was a NOP but this does not constitute a problem") 271 logrus.WithField("key", key).Info(err) 272 } 273 } 274 } 275 276 return promise.val, ok, promise.err 277 }