github.com/outbrain/consul@v1.4.5/agent/cache/watch.go

github.com/outbrain/consul@v1.4.5/agent/cache/watch.go (about)

     1  package cache
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"reflect"
     7  	"time"
     8  
     9  	"github.com/hashicorp/consul/lib"
    10  )
    11  
    12  // UpdateEvent is a struct summarizing an update to a cache entry
    13  type UpdateEvent struct {
    14  	// CorrelationID is used by the Notify API to allow correlation of updates
    15  	// with specific requests. We could return the full request object and
    16  	// cachetype for consumers to match against the calls they made but in
    17  	// practice it's cleaner for them to choose the minimal necessary unique
    18  	// identifier given the set of things they are watching. They might even
    19  	// choose to assign random IDs for example.
    20  	CorrelationID string
    21  	Result        interface{}
    22  	Meta          ResultMeta
    23  	Err           error
    24  }
    25  
    26  // Notify registers a desire to be updated about changes to a cache result.
    27  //
    28  // It is a helper that abstracts code from performing their own "blocking" query
    29  // logic against a cache key to watch for changes and to maintain the key in
    30  // cache actively. It will continue to perform blocking Get requests until the
    31  // context is canceled.
    32  //
    33  // The passed context must be canceled or timeout in order to free resources
    34  // and stop maintaining the value in cache. Typically request-scoped resources
    35  // do this but if a long-lived context like context.Background is used, then the
    36  // caller must arrange for it to be canceled when the watch is no longer
    37  // needed.
    38  //
    39  // The passed chan may be buffered or unbuffered, if the caller doesn't consume
    40  // fast enough it will block the notification loop. When the chan is later
    41  // drained, watching resumes correctly. If the pause is longer than the
    42  // cachetype's TTL, the result might be removed from the local cache. Even in
    43  // this case though when the chan is drained again, the new Get will re-fetch
    44  // the entry from servers and resume notification behavior transparently.
    45  //
    46  // The chan is passed in to allow multiple cached results to be watched by a
    47  // single consumer without juggling extra goroutines per watch. The
    48  // correlationID is opaque and will be returned in all UpdateEvents generated by
    49  // result of watching the specified request so the caller can set this to any
    50  // value that allows them to disambiguate between events in the returned chan
    51  // when sharing a chan between multiple cache entries. If the chan is closed,
    52  // the notify loop will terminate.
    53  func (c *Cache) Notify(ctx context.Context, t string, r Request,
    54  	correlationID string, ch chan<- UpdateEvent) error {
    55  
    56  	// Get the type that we're fetching
    57  	c.typesLock.RLock()
    58  	tEntry, ok := c.types[t]
    59  	c.typesLock.RUnlock()
    60  	if !ok {
    61  		return fmt.Errorf("unknown type in cache: %s", t)
    62  	}
    63  	if tEntry.Type.SupportsBlocking() {
    64  		go c.notifyBlockingQuery(ctx, t, r, correlationID, ch)
    65  	} else {
    66  		info := r.CacheInfo()
    67  		if info.MaxAge == 0 {
    68  			return fmt.Errorf("Cannot use Notify for polling cache types without specifying the MaxAge")
    69  		}
    70  		go c.notifyPollingQuery(ctx, t, r, correlationID, ch, info.MaxAge)
    71  	}
    72  
    73  	return nil
    74  }
    75  
    76  func (c *Cache) notifyBlockingQuery(ctx context.Context, t string, r Request, correlationID string, ch chan<- UpdateEvent) {
    77  	// Always start at 0 index to deliver the initial (possibly currently cached
    78  	// value).
    79  	index := uint64(0)
    80  	failures := uint(0)
    81  
    82  	for {
    83  		// Check context hasn't been canceled
    84  		if ctx.Err() != nil {
    85  			return
    86  		}
    87  
    88  		// Blocking request
    89  		res, meta, err := c.getWithIndex(t, r, index)
    90  
    91  		// Check context hasn't been canceled
    92  		if ctx.Err() != nil {
    93  			return
    94  		}
    95  
    96  		// Check the index of the value returned in the cache entry to be sure it
    97  		// changed
    98  		if index < meta.Index {
    99  			u := UpdateEvent{correlationID, res, meta, err}
   100  			select {
   101  			case ch <- u:
   102  			case <-ctx.Done():
   103  				return
   104  			}
   105  
   106  			// Update index for next request
   107  			index = meta.Index
   108  		}
   109  
   110  		// Handle errors with backoff. Badly behaved blocking calls that returned
   111  		// a zero index are considered as failures since we need to not get stuck
   112  		// in a busy loop.
   113  		wait := 0 * time.Second
   114  		if err == nil && meta.Index > 0 {
   115  			failures = 0
   116  		} else {
   117  			failures++
   118  			wait = backOffWait(failures)
   119  		}
   120  
   121  		if wait > 0 {
   122  			select {
   123  			case <-time.After(wait):
   124  			case <-ctx.Done():
   125  				return
   126  			}
   127  		}
   128  		// Sanity check we always request blocking on second pass
   129  		if index < 1 {
   130  			index = 1
   131  		}
   132  	}
   133  }
   134  
   135  func (c *Cache) notifyPollingQuery(ctx context.Context, t string, r Request, correlationID string, ch chan<- UpdateEvent, maxAge time.Duration) {
   136  	index := uint64(0)
   137  	failures := uint(0)
   138  
   139  	var lastValue interface{} = nil
   140  
   141  	for {
   142  		// Check context hasn't been canceled
   143  		if ctx.Err() != nil {
   144  			return
   145  		}
   146  
   147  		// Make the request
   148  		res, meta, err := c.getWithIndex(t, r, index)
   149  
   150  		// Check context hasn't been canceled
   151  		if ctx.Err() != nil {
   152  			return
   153  		}
   154  
   155  		// Check for a change in the value or an index change
   156  		if index < meta.Index || !reflect.DeepEqual(lastValue, res) {
   157  			u := UpdateEvent{correlationID, res, meta, err}
   158  			select {
   159  			case ch <- u:
   160  			case <-ctx.Done():
   161  				return
   162  			}
   163  
   164  			// Update index and lastValue
   165  			lastValue = res
   166  			index = meta.Index
   167  		}
   168  
   169  		// Reset or increment failure counter
   170  		if err == nil {
   171  			failures = 0
   172  		} else {
   173  			failures++
   174  		}
   175  
   176  		// Determining how long to wait before the next poll is complicated.
   177  		// First off the happy path and the error path waits are handled distinctly
   178  		//
   179  		// Once fetching the data through the cache returns an error (and until a
   180  		// non-error value is returned) the wait time between each round of the loop
   181  		// gets controlled by the backOffWait function. Because we would have waited
   182  		// at least until the age of the cached data was too old the error path should
   183  		// immediately retry the fetch and backoff on the time as needed for persistent
   184  		// failures which potentially will wait much longer than the MaxAge of the request
   185  		//
   186  		// When on the happy path we just need to fetch from the cache often enough to ensure
   187  		// that the data is not older than the MaxAge. Therefore after fetching the data from
   188  		// the cache we can sleep until the age of that data would exceed the MaxAge. Sometimes
   189  		// this will be for the MaxAge duration (like when only a single notify was executed so
   190  		// only 1 go routine is keeping the cache updated). Other times this will be some smaller
   191  		// duration than MaxAge (when multiple notify calls were executed and this go routine just
   192  		// got data back from the cache that was a cache hit after the other go routine fetched it
   193  		// without a hit). We cannot just set MustRevalidate on the request and always sleep for MaxAge
   194  		// as this would eliminate the single-flighting of these requests in the cache and
   195  		// the efficiencies gained by it.
   196  		if failures > 0 {
   197  
   198  			errWait := backOffWait(failures)
   199  			select {
   200  			case <-time.After(errWait):
   201  			case <-ctx.Done():
   202  				return
   203  			}
   204  		} else {
   205  			// Default to immediately re-poll. This only will happen if the data
   206  			// we just got out of the cache is already too stale
   207  			pollWait := 0 * time.Second
   208  
   209  			// Calculate when the cached data's Age will get too stale and
   210  			// need to be re-queried. When the data's Age already exceeds the
   211  			// maxAge the pollWait value is left at 0 to immediately re-poll
   212  			if meta.Age <= maxAge {
   213  				pollWait = maxAge - meta.Age
   214  			}
   215  
   216  			// Add a small amount of random jitter to the polling time. One
   217  			// purpose of the jitter is to ensure that the next time
   218  			// we fetch from the cache the data will be stale (unless another
   219  			// notify go routine has updated it while this one is sleeping).
   220  			// Without this it would be possible to wake up, fetch the data
   221  			// again where the age of the data is strictly equal to the MaxAge
   222  			// and then immediately have to re-fetch again. That wouldn't
   223  			// be terrible but it would expend a bunch more cpu cycles when
   224  			// we can definitely avoid it.
   225  			pollWait += lib.RandomStagger(maxAge / 16)
   226  
   227  			select {
   228  			case <-time.After(pollWait):
   229  			case <-ctx.Done():
   230  				return
   231  			}
   232  		}
   233  	}
   234  }