google.golang.org/grpc@v1.72.2/balancer/rls/cache.go (about) 1 /* 2 * 3 * Copyright 2021 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package rls 20 21 import ( 22 "container/list" 23 "time" 24 25 "github.com/google/uuid" 26 estats "google.golang.org/grpc/experimental/stats" 27 "google.golang.org/grpc/internal/backoff" 28 internalgrpclog "google.golang.org/grpc/internal/grpclog" 29 "google.golang.org/grpc/internal/grpcsync" 30 ) 31 32 // cacheKey represents the key used to uniquely identify an entry in the data 33 // cache and in the pending requests map. 34 type cacheKey struct { 35 // path is the full path of the incoming RPC request. 36 path string 37 // keys is a stringified version of the RLS request key map built using the 38 // RLS keyBuilder. Since maps are not a type which is comparable in Go, it 39 // cannot be part of the key for another map (entries in the data cache and 40 // pending requests map are stored in maps). 41 keys string 42 } 43 44 // cacheEntry wraps all the data to be stored in a data cache entry. 45 type cacheEntry struct { 46 // childPolicyWrappers contains the list of child policy wrappers 47 // corresponding to the targets returned by the RLS server for this entry. 48 childPolicyWrappers []*childPolicyWrapper 49 // headerData is received in the RLS response and is to be sent in the 50 // X-Google-RLS-Data header for matching RPCs. 51 headerData string 52 // expiryTime is the absolute time at which this cache entry stops 53 // being valid. When an RLS request succeeds, this is set to the current 54 // time plus the max_age field from the LB policy config. 55 expiryTime time.Time 56 // staleTime is the absolute time after which this cache entry will be 57 // proactively refreshed if an incoming RPC matches this entry. When an RLS 58 // request succeeds, this is set to the current time plus the stale_age from 59 // the LB policy config. 60 staleTime time.Time 61 // earliestEvictTime is the absolute time before which this entry should not 62 // be evicted from the cache. When a cache entry is created, this is set to 63 // the current time plus a default value of 5 seconds. This is required to 64 // make sure that a new entry added to the cache is not evicted before the 65 // RLS response arrives (usually when the cache is too small). 66 earliestEvictTime time.Time 67 68 // status stores the RPC status of the previous RLS request for this 69 // entry. Picks for entries with a non-nil value for this field are failed 70 // with the error stored here. 71 status error 72 // backoffState contains all backoff related state. When an RLS request 73 // succeeds, backoffState is reset. This state moves between the data cache 74 // and the pending requests map. 75 backoffState *backoffState 76 // backoffTime is the absolute time at which the backoff period for this 77 // entry ends. When an RLS request fails, this is set to the current time 78 // plus the backoff value returned by the backoffState. The backoff timer is 79 // also setup with this value. No new RLS requests are sent out for this 80 // entry until the backoff period ends. 81 // 82 // Set to zero time instant upon a successful RLS response. 83 backoffTime time.Time 84 // backoffExpiryTime is the absolute time at which an entry which has gone 85 // through backoff stops being valid. When an RLS request fails, this is 86 // set to the current time plus twice the backoff time. The cache expiry 87 // timer will only delete entries for which both expiryTime and 88 // backoffExpiryTime are in the past. 89 // 90 // Set to zero time instant upon a successful RLS response. 91 backoffExpiryTime time.Time 92 93 // size stores the size of this cache entry. Used to enforce the cache size 94 // specified in the LB policy configuration. 95 size int64 96 } 97 98 // backoffState wraps all backoff related state associated with a cache entry. 99 type backoffState struct { 100 // retries keeps track of the number of RLS failures, to be able to 101 // determine the amount of time to backoff before the next attempt. 102 retries int 103 // bs is the exponential backoff implementation which returns the amount of 104 // time to backoff, given the number of retries. 105 bs backoff.Strategy 106 // timer fires when the backoff period ends and incoming requests after this 107 // will trigger a new RLS request. 108 timer *time.Timer 109 } 110 111 // lru is a cache implementation with a least recently used eviction policy. 112 // Internally it uses a doubly linked list, with the least recently used element 113 // at the front of the list and the most recently used element at the back of 114 // the list. The value stored in this cache will be of type `cacheKey`. 115 // 116 // It is not safe for concurrent access. 117 type lru struct { 118 ll *list.List 119 120 // A map from the value stored in the lru to its underlying list element is 121 // maintained to have a clean API. Without this, a subset of the lru's API 122 // would accept/return cacheKey while another subset would accept/return 123 // list elements. 124 m map[cacheKey]*list.Element 125 } 126 127 // newLRU creates a new cache with a least recently used eviction policy. 128 func newLRU() *lru { 129 return &lru{ 130 ll: list.New(), 131 m: make(map[cacheKey]*list.Element), 132 } 133 } 134 135 func (l *lru) addEntry(key cacheKey) { 136 e := l.ll.PushBack(key) 137 l.m[key] = e 138 } 139 140 func (l *lru) makeRecent(key cacheKey) { 141 e := l.m[key] 142 l.ll.MoveToBack(e) 143 } 144 145 func (l *lru) removeEntry(key cacheKey) { 146 e := l.m[key] 147 l.ll.Remove(e) 148 delete(l.m, key) 149 } 150 151 func (l *lru) getLeastRecentlyUsed() cacheKey { 152 e := l.ll.Front() 153 if e == nil { 154 return cacheKey{} 155 } 156 return e.Value.(cacheKey) 157 } 158 159 // dataCache contains a cache of RLS data used by the LB policy to make routing 160 // decisions. 161 // 162 // The dataCache will be keyed by the request's path and keys, represented by 163 // the `cacheKey` type. It will maintain the cache keys in an `lru` and the 164 // cache data, represented by the `cacheEntry` type, in a native map. 165 // 166 // It is not safe for concurrent access. 167 type dataCache struct { 168 maxSize int64 // Maximum allowed size. 169 currentSize int64 // Current size. 170 keys *lru // Cache keys maintained in lru order. 171 entries map[cacheKey]*cacheEntry 172 logger *internalgrpclog.PrefixLogger 173 shutdown *grpcsync.Event 174 rlsServerTarget string 175 176 // Read only after initialization. 177 grpcTarget string 178 uuid string 179 metricsRecorder estats.MetricsRecorder 180 } 181 182 func newDataCache(size int64, logger *internalgrpclog.PrefixLogger, metricsRecorder estats.MetricsRecorder, grpcTarget string) *dataCache { 183 return &dataCache{ 184 maxSize: size, 185 keys: newLRU(), 186 entries: make(map[cacheKey]*cacheEntry), 187 logger: logger, 188 shutdown: grpcsync.NewEvent(), 189 grpcTarget: grpcTarget, 190 uuid: uuid.New().String(), 191 metricsRecorder: metricsRecorder, 192 } 193 } 194 195 // updateRLSServerTarget updates the RLS Server Target the RLS Balancer is 196 // configured with. 197 func (dc *dataCache) updateRLSServerTarget(rlsServerTarget string) { 198 dc.rlsServerTarget = rlsServerTarget 199 } 200 201 // resize changes the maximum allowed size of the data cache. 202 // 203 // The return value indicates if an entry with a valid backoff timer was 204 // evicted. This is important to the RLS LB policy which would send a new picker 205 // on the channel to re-process any RPCs queued as a result of this backoff 206 // timer. 207 func (dc *dataCache) resize(size int64) (backoffCancelled bool) { 208 if dc.shutdown.HasFired() { 209 return false 210 } 211 212 backoffCancelled = false 213 for dc.currentSize > size { 214 key := dc.keys.getLeastRecentlyUsed() 215 entry, ok := dc.entries[key] 216 if !ok { 217 // This should never happen. 218 dc.logger.Errorf("cacheKey %+v not found in the cache while attempting to resize it", key) 219 break 220 } 221 222 // When we encounter a cache entry whose minimum expiration time is in 223 // the future, we abort the LRU pass, which may temporarily leave the 224 // cache being too large. This is necessary to ensure that in cases 225 // where the cache is too small, when we receive an RLS Response, we 226 // keep the resulting cache entry around long enough for the pending 227 // incoming requests to be re-processed through the new Picker. If we 228 // didn't do this, then we'd risk throwing away each RLS response as we 229 // receive it, in which case we would fail to actually route any of our 230 // incoming requests. 231 if entry.earliestEvictTime.After(time.Now()) { 232 dc.logger.Warningf("cachekey %+v is too recent to be evicted. Stopping cache resizing for now", key) 233 break 234 } 235 236 // Stop the backoff timer before evicting the entry. 237 if entry.backoffState != nil && entry.backoffState.timer != nil { 238 if entry.backoffState.timer.Stop() { 239 entry.backoffState.timer = nil 240 backoffCancelled = true 241 } 242 } 243 dc.deleteAndCleanup(key, entry) 244 } 245 dc.maxSize = size 246 return backoffCancelled 247 } 248 249 // evictExpiredEntries sweeps through the cache and deletes expired entries. An 250 // expired entry is one for which both the `expiryTime` and `backoffExpiryTime` 251 // fields are in the past. 252 // 253 // The return value indicates if any expired entries were evicted. 254 // 255 // The LB policy invokes this method periodically to purge expired entries. 256 func (dc *dataCache) evictExpiredEntries() bool { 257 if dc.shutdown.HasFired() { 258 return false 259 } 260 261 evicted := false 262 for key, entry := range dc.entries { 263 // Only evict entries for which both the data expiration time and 264 // backoff expiration time fields are in the past. 265 now := time.Now() 266 if entry.expiryTime.After(now) || entry.backoffExpiryTime.After(now) { 267 continue 268 } 269 dc.deleteAndCleanup(key, entry) 270 evicted = true 271 } 272 return evicted 273 } 274 275 // resetBackoffState sweeps through the cache and for entries with a backoff 276 // state, the backoff timer is cancelled and the backoff state is reset. The 277 // return value indicates if any entries were mutated in this fashion. 278 // 279 // The LB policy invokes this method when the control channel moves from READY 280 // to TRANSIENT_FAILURE back to READY. See `monitorConnectivityState` method on 281 // the `controlChannel` type for more details. 282 func (dc *dataCache) resetBackoffState(newBackoffState *backoffState) bool { 283 if dc.shutdown.HasFired() { 284 return false 285 } 286 287 backoffReset := false 288 for _, entry := range dc.entries { 289 if entry.backoffState == nil { 290 continue 291 } 292 if entry.backoffState.timer != nil { 293 entry.backoffState.timer.Stop() 294 entry.backoffState.timer = nil 295 } 296 entry.backoffState = &backoffState{bs: newBackoffState.bs} 297 entry.backoffTime = time.Time{} 298 entry.backoffExpiryTime = time.Time{} 299 backoffReset = true 300 } 301 return backoffReset 302 } 303 304 // addEntry adds a cache entry for the given key. 305 // 306 // Return value backoffCancelled indicates if a cache entry with a valid backoff 307 // timer was evicted to make space for the current entry. This is important to 308 // the RLS LB policy which would send a new picker on the channel to re-process 309 // any RPCs queued as a result of this backoff timer. 310 // 311 // Return value ok indicates if entry was successfully added to the cache. 312 func (dc *dataCache) addEntry(key cacheKey, entry *cacheEntry) (backoffCancelled bool, ok bool) { 313 if dc.shutdown.HasFired() { 314 return false, false 315 } 316 317 // Handle the extremely unlikely case that a single entry is bigger than the 318 // size of the cache. 319 if entry.size > dc.maxSize { 320 return false, false 321 } 322 dc.entries[key] = entry 323 dc.currentSize += entry.size 324 dc.keys.addEntry(key) 325 // If the new entry makes the cache go over its configured size, remove some 326 // old entries. 327 if dc.currentSize > dc.maxSize { 328 backoffCancelled = dc.resize(dc.maxSize) 329 } 330 cacheSizeMetric.Record(dc.metricsRecorder, dc.currentSize, dc.grpcTarget, dc.rlsServerTarget, dc.uuid) 331 cacheEntriesMetric.Record(dc.metricsRecorder, int64(len(dc.entries)), dc.grpcTarget, dc.rlsServerTarget, dc.uuid) 332 return backoffCancelled, true 333 } 334 335 // updateEntrySize updates the size of a cache entry and the current size of the 336 // data cache. An entry's size can change upon receipt of an RLS response. 337 func (dc *dataCache) updateEntrySize(entry *cacheEntry, newSize int64) { 338 dc.currentSize -= entry.size 339 entry.size = newSize 340 dc.currentSize += entry.size 341 cacheSizeMetric.Record(dc.metricsRecorder, dc.currentSize, dc.grpcTarget, dc.rlsServerTarget, dc.uuid) 342 } 343 344 func (dc *dataCache) getEntry(key cacheKey) *cacheEntry { 345 if dc.shutdown.HasFired() { 346 return nil 347 } 348 349 entry, ok := dc.entries[key] 350 if !ok { 351 return nil 352 } 353 dc.keys.makeRecent(key) 354 return entry 355 } 356 357 func (dc *dataCache) removeEntryForTesting(key cacheKey) { 358 entry, ok := dc.entries[key] 359 if !ok { 360 return 361 } 362 dc.deleteAndCleanup(key, entry) 363 } 364 365 // deleteAndCleanup performs actions required at the time of deleting an entry 366 // from the data cache. 367 // - the entry is removed from the map of entries 368 // - current size of the data cache is update 369 // - the key is removed from the LRU 370 func (dc *dataCache) deleteAndCleanup(key cacheKey, entry *cacheEntry) { 371 delete(dc.entries, key) 372 dc.currentSize -= entry.size 373 dc.keys.removeEntry(key) 374 cacheSizeMetric.Record(dc.metricsRecorder, dc.currentSize, dc.grpcTarget, dc.rlsServerTarget, dc.uuid) 375 cacheEntriesMetric.Record(dc.metricsRecorder, int64(len(dc.entries)), dc.grpcTarget, dc.rlsServerTarget, dc.uuid) 376 } 377 378 func (dc *dataCache) stop() { 379 for key, entry := range dc.entries { 380 dc.deleteAndCleanup(key, entry) 381 } 382 dc.shutdown.Fire() 383 }