github.com/outbrain/consul@v1.4.5/agent/cache/watch.go (about) 1 package cache 2 3 import ( 4 "context" 5 "fmt" 6 "reflect" 7 "time" 8 9 "github.com/hashicorp/consul/lib" 10 ) 11 12 // UpdateEvent is a struct summarizing an update to a cache entry 13 type UpdateEvent struct { 14 // CorrelationID is used by the Notify API to allow correlation of updates 15 // with specific requests. We could return the full request object and 16 // cachetype for consumers to match against the calls they made but in 17 // practice it's cleaner for them to choose the minimal necessary unique 18 // identifier given the set of things they are watching. They might even 19 // choose to assign random IDs for example. 20 CorrelationID string 21 Result interface{} 22 Meta ResultMeta 23 Err error 24 } 25 26 // Notify registers a desire to be updated about changes to a cache result. 27 // 28 // It is a helper that abstracts code from performing their own "blocking" query 29 // logic against a cache key to watch for changes and to maintain the key in 30 // cache actively. It will continue to perform blocking Get requests until the 31 // context is canceled. 32 // 33 // The passed context must be canceled or timeout in order to free resources 34 // and stop maintaining the value in cache. Typically request-scoped resources 35 // do this but if a long-lived context like context.Background is used, then the 36 // caller must arrange for it to be canceled when the watch is no longer 37 // needed. 38 // 39 // The passed chan may be buffered or unbuffered, if the caller doesn't consume 40 // fast enough it will block the notification loop. When the chan is later 41 // drained, watching resumes correctly. If the pause is longer than the 42 // cachetype's TTL, the result might be removed from the local cache. Even in 43 // this case though when the chan is drained again, the new Get will re-fetch 44 // the entry from servers and resume notification behavior transparently. 45 // 46 // The chan is passed in to allow multiple cached results to be watched by a 47 // single consumer without juggling extra goroutines per watch. The 48 // correlationID is opaque and will be returned in all UpdateEvents generated by 49 // result of watching the specified request so the caller can set this to any 50 // value that allows them to disambiguate between events in the returned chan 51 // when sharing a chan between multiple cache entries. If the chan is closed, 52 // the notify loop will terminate. 53 func (c *Cache) Notify(ctx context.Context, t string, r Request, 54 correlationID string, ch chan<- UpdateEvent) error { 55 56 // Get the type that we're fetching 57 c.typesLock.RLock() 58 tEntry, ok := c.types[t] 59 c.typesLock.RUnlock() 60 if !ok { 61 return fmt.Errorf("unknown type in cache: %s", t) 62 } 63 if tEntry.Type.SupportsBlocking() { 64 go c.notifyBlockingQuery(ctx, t, r, correlationID, ch) 65 } else { 66 info := r.CacheInfo() 67 if info.MaxAge == 0 { 68 return fmt.Errorf("Cannot use Notify for polling cache types without specifying the MaxAge") 69 } 70 go c.notifyPollingQuery(ctx, t, r, correlationID, ch, info.MaxAge) 71 } 72 73 return nil 74 } 75 76 func (c *Cache) notifyBlockingQuery(ctx context.Context, t string, r Request, correlationID string, ch chan<- UpdateEvent) { 77 // Always start at 0 index to deliver the initial (possibly currently cached 78 // value). 79 index := uint64(0) 80 failures := uint(0) 81 82 for { 83 // Check context hasn't been canceled 84 if ctx.Err() != nil { 85 return 86 } 87 88 // Blocking request 89 res, meta, err := c.getWithIndex(t, r, index) 90 91 // Check context hasn't been canceled 92 if ctx.Err() != nil { 93 return 94 } 95 96 // Check the index of the value returned in the cache entry to be sure it 97 // changed 98 if index < meta.Index { 99 u := UpdateEvent{correlationID, res, meta, err} 100 select { 101 case ch <- u: 102 case <-ctx.Done(): 103 return 104 } 105 106 // Update index for next request 107 index = meta.Index 108 } 109 110 // Handle errors with backoff. Badly behaved blocking calls that returned 111 // a zero index are considered as failures since we need to not get stuck 112 // in a busy loop. 113 wait := 0 * time.Second 114 if err == nil && meta.Index > 0 { 115 failures = 0 116 } else { 117 failures++ 118 wait = backOffWait(failures) 119 } 120 121 if wait > 0 { 122 select { 123 case <-time.After(wait): 124 case <-ctx.Done(): 125 return 126 } 127 } 128 // Sanity check we always request blocking on second pass 129 if index < 1 { 130 index = 1 131 } 132 } 133 } 134 135 func (c *Cache) notifyPollingQuery(ctx context.Context, t string, r Request, correlationID string, ch chan<- UpdateEvent, maxAge time.Duration) { 136 index := uint64(0) 137 failures := uint(0) 138 139 var lastValue interface{} = nil 140 141 for { 142 // Check context hasn't been canceled 143 if ctx.Err() != nil { 144 return 145 } 146 147 // Make the request 148 res, meta, err := c.getWithIndex(t, r, index) 149 150 // Check context hasn't been canceled 151 if ctx.Err() != nil { 152 return 153 } 154 155 // Check for a change in the value or an index change 156 if index < meta.Index || !reflect.DeepEqual(lastValue, res) { 157 u := UpdateEvent{correlationID, res, meta, err} 158 select { 159 case ch <- u: 160 case <-ctx.Done(): 161 return 162 } 163 164 // Update index and lastValue 165 lastValue = res 166 index = meta.Index 167 } 168 169 // Reset or increment failure counter 170 if err == nil { 171 failures = 0 172 } else { 173 failures++ 174 } 175 176 // Determining how long to wait before the next poll is complicated. 177 // First off the happy path and the error path waits are handled distinctly 178 // 179 // Once fetching the data through the cache returns an error (and until a 180 // non-error value is returned) the wait time between each round of the loop 181 // gets controlled by the backOffWait function. Because we would have waited 182 // at least until the age of the cached data was too old the error path should 183 // immediately retry the fetch and backoff on the time as needed for persistent 184 // failures which potentially will wait much longer than the MaxAge of the request 185 // 186 // When on the happy path we just need to fetch from the cache often enough to ensure 187 // that the data is not older than the MaxAge. Therefore after fetching the data from 188 // the cache we can sleep until the age of that data would exceed the MaxAge. Sometimes 189 // this will be for the MaxAge duration (like when only a single notify was executed so 190 // only 1 go routine is keeping the cache updated). Other times this will be some smaller 191 // duration than MaxAge (when multiple notify calls were executed and this go routine just 192 // got data back from the cache that was a cache hit after the other go routine fetched it 193 // without a hit). We cannot just set MustRevalidate on the request and always sleep for MaxAge 194 // as this would eliminate the single-flighting of these requests in the cache and 195 // the efficiencies gained by it. 196 if failures > 0 { 197 198 errWait := backOffWait(failures) 199 select { 200 case <-time.After(errWait): 201 case <-ctx.Done(): 202 return 203 } 204 } else { 205 // Default to immediately re-poll. This only will happen if the data 206 // we just got out of the cache is already too stale 207 pollWait := 0 * time.Second 208 209 // Calculate when the cached data's Age will get too stale and 210 // need to be re-queried. When the data's Age already exceeds the 211 // maxAge the pollWait value is left at 0 to immediately re-poll 212 if meta.Age <= maxAge { 213 pollWait = maxAge - meta.Age 214 } 215 216 // Add a small amount of random jitter to the polling time. One 217 // purpose of the jitter is to ensure that the next time 218 // we fetch from the cache the data will be stale (unless another 219 // notify go routine has updated it while this one is sleeping). 220 // Without this it would be possible to wake up, fetch the data 221 // again where the age of the data is strictly equal to the MaxAge 222 // and then immediately have to re-fetch again. That wouldn't 223 // be terrible but it would expend a bunch more cpu cycles when 224 // we can definitely avoid it. 225 pollWait += lib.RandomStagger(maxAge / 16) 226 227 select { 228 case <-time.After(pollWait): 229 case <-ctx.Done(): 230 return 231 } 232 } 233 } 234 }