github.com/DerekStrickland/consul@v1.4.5/agent/cache-types/connect_ca_leaf.go (about)

     1  package cachetype
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"sync"
     8  	"sync/atomic"
     9  	"time"
    10  
    11  	"github.com/hashicorp/consul/lib"
    12  
    13  	"github.com/hashicorp/consul/agent/cache"
    14  	"github.com/hashicorp/consul/agent/connect"
    15  	"github.com/hashicorp/consul/agent/consul"
    16  	"github.com/hashicorp/consul/agent/structs"
    17  )
    18  
    19  // Recommended name for registration.
    20  const ConnectCALeafName = "connect-ca-leaf"
    21  
    22  // caChangeJitterWindow is the time over which we spread each round of retries
    23  // when attempting to get a new certificate following a root rotation. It's
    24  // selected to be a trade-off between not making rotation unnecessarily slow on
    25  // a tiny cluster while not hammering the servers on a huge cluster
    26  // unnecessarily hard. Servers rate limit to protect themselves from the
    27  // expensive crypto work, but in practice have 10k+ RPCs all in the same second
    28  // will cause a major disruption even on large servers due to downloading the
    29  // payloads, parsing msgpack etc. Instead we pick a window that for now is fixed
    30  // but later might be either user configurable (not nice since it would become
    31  // another hard-to-tune value) or set dynamically by the server based on it's
    32  // knowledge of how many certs need to be rotated. Currently the server doesn't
    33  // know that so we pick something that is reasonable. We err on the side of
    34  // being slower that we need in trivial cases but gentler for large deployments.
    35  // 30s means that even with a cluster of 10k service instances, the server only
    36  // has to cope with ~333 RPCs a second which shouldn't be too bad if it's rate
    37  // limiting the actual expensive crypto work.
    38  //
    39  // The actual backoff strategy when we are rate limited is to have each cert
    40  // only retry once with each window of this size, at a point in the window
    41  // selected at random. This performs much better than exponential backoff in
    42  // terms of getting things rotated quickly with more predictable load and so
    43  // fewer rate limited requests. See the full simulation this is based on at
    44  // https://github.com/banks/sim-rate-limit-backoff/blob/master/README.md for
    45  // more detail.
    46  const caChangeJitterWindow = 30 * time.Second
    47  
    48  // ConnectCALeaf supports fetching and generating Connect leaf
    49  // certificates.
    50  type ConnectCALeaf struct {
    51  	caIndex uint64 // Current index for CA roots
    52  
    53  	// rootWatchMu protects access to the rootWatchSubscribers map and
    54  	// rootWatchCancel
    55  	rootWatchMu sync.Mutex
    56  	// rootWatchSubscribers is a set of chans, one for each currently in-flight
    57  	// Fetch. These chans have root updates delivered from the root watcher.
    58  	rootWatchSubscribers map[chan struct{}]struct{}
    59  	// rootWatchCancel is a func to call to stop the background root watch if any.
    60  	// You must hold inflightMu to read (e.g. call) or write the value.
    61  	rootWatchCancel func()
    62  
    63  	// testRootWatchStart/StopCount are testing helpers that allow tests to
    64  	// observe the reference counting behavior that governs the shared root watch.
    65  	// It's not exactly pretty to expose internals like this, but seems cleaner
    66  	// than constructing elaborate and brittle test cases that we can infer
    67  	// correct behavior from, and simpler than trying to probe runtime goroutine
    68  	// traces to infer correct behavior that way. They must be accessed
    69  	// atomically.
    70  	testRootWatchStartCount uint32
    71  	testRootWatchStopCount  uint32
    72  
    73  	RPC        RPC          // RPC client for remote requests
    74  	Cache      *cache.Cache // Cache that has CA root certs via ConnectCARoot
    75  	Datacenter string       // This agent's datacenter
    76  
    77  	// TestOverrideCAChangeInitialDelay allows overriding the random jitter after a
    78  	// root change with a fixed delay. So far ths is only done in tests. If it's
    79  	// zero the caChangeInitialSpreadDefault maximum jitter will be used but if
    80  	// set, it overrides and provides a fixed delay. To essentially disable the
    81  	// delay in tests they can set it to 1 nanosecond. We may separately allow
    82  	// configuring the jitter limit by users later but this is different and for
    83  	// tests only since we need to set a deterministic time delay in order to test
    84  	// the behavior here fully and determinstically.
    85  	TestOverrideCAChangeInitialDelay time.Duration
    86  }
    87  
    88  // fetchState is some additional metadata we store with each cert in the cache
    89  // to track things like expiry and coordinate paces root rotations. It's
    90  // important this doesn't contain any pointer types since we rely on the struct
    91  // being copied to avoid modifying the actual state in the cache entry during
    92  // Fetch. Pointers themselves are OK, but if we point to another struct that we
    93  // call a method or modify in some way that would directly mutate the cache and
    94  // cause problems. We'd need to deep-clone in that case in Fetch below.
    95  // time.Time technically contains a pointer to the Location but we ignore that
    96  // since all times we get from our wall clock should point to the same Location
    97  // anyway.
    98  type fetchState struct {
    99  	// authorityKeyID is the key ID of the CA root that signed the current cert.
   100  	// This is just to save parsing the whole cert everytime we have to check if
   101  	// the root changed.
   102  	authorityKeyID string
   103  
   104  	// forceExpireAfter is used to coordinate renewing certs after a CA rotation
   105  	// in a staggered way so that we don't overwhelm the servers.
   106  	forceExpireAfter time.Time
   107  
   108  	// activeRootRotationStart is set when the root has changed and we need to get
   109  	// a new cert but haven't got one yet. forceExpireAfter will be set to the
   110  	// next scheduled time we should try our CSR, but this is needed to calculate
   111  	// the retry windows if we are rate limited when we try. See comment on
   112  	// caChangeJitterWindow above for more.
   113  	activeRootRotationStart time.Time
   114  
   115  	// consecutiveRateLimitErrs stores how many rate limit errors we've hit. We
   116  	// use this to choose a new window for the next retry. See comment on
   117  	// caChangeJitterWindow above for more.
   118  	consecutiveRateLimitErrs int
   119  }
   120  
   121  // fetchStart is called on each fetch that is about to block and wait for
   122  // changes to the leaf. It subscribes a chan to receive updates from the shared
   123  // root watcher and triggers root watcher if it's not already running.
   124  func (c *ConnectCALeaf) fetchStart(rootUpdateCh chan struct{}) {
   125  	c.rootWatchMu.Lock()
   126  	defer c.rootWatchMu.Unlock()
   127  	// Lazy allocation
   128  	if c.rootWatchSubscribers == nil {
   129  		c.rootWatchSubscribers = make(map[chan struct{}]struct{})
   130  	}
   131  	// Make sure a root watcher is running. We don't only do this on first request
   132  	// to be more tolerant of errors that could cause the root watcher to fail and
   133  	// exit.
   134  	if c.rootWatchCancel == nil {
   135  		ctx, cancel := context.WithCancel(context.Background())
   136  		c.rootWatchCancel = cancel
   137  		go c.rootWatcher(ctx)
   138  	}
   139  	c.rootWatchSubscribers[rootUpdateCh] = struct{}{}
   140  }
   141  
   142  // fetchDone is called when a blocking call exits to unsubscribe from root
   143  // updates and possibly stop the shared root watcher if it's no longer needed.
   144  // Note that typically root CA is still being watched by clients directly and
   145  // probably by the ProxyConfigManager so it will stay hot in cache for a while,
   146  // we are just not monitoring it for updates any more.
   147  func (c *ConnectCALeaf) fetchDone(rootUpdateCh chan struct{}) {
   148  	c.rootWatchMu.Lock()
   149  	defer c.rootWatchMu.Unlock()
   150  	delete(c.rootWatchSubscribers, rootUpdateCh)
   151  	if len(c.rootWatchSubscribers) == 0 && c.rootWatchCancel != nil {
   152  		// This was the last request. Stop the root watcher.
   153  		c.rootWatchCancel()
   154  	}
   155  }
   156  
   157  // rootWatcher is the shared rootWatcher that runs in a background goroutine
   158  // while needed by one or more inflight Fetch calls.
   159  func (c *ConnectCALeaf) rootWatcher(ctx context.Context) {
   160  	atomic.AddUint32(&c.testRootWatchStartCount, 1)
   161  	defer atomic.AddUint32(&c.testRootWatchStopCount, 1)
   162  
   163  	ch := make(chan cache.UpdateEvent, 1)
   164  	err := c.Cache.Notify(ctx, ConnectCARootName, &structs.DCSpecificRequest{
   165  		Datacenter: c.Datacenter,
   166  	}, "roots", ch)
   167  
   168  	notifyChange := func() {
   169  		c.rootWatchMu.Lock()
   170  		defer c.rootWatchMu.Unlock()
   171  
   172  		for ch := range c.rootWatchSubscribers {
   173  			select {
   174  			case ch <- struct{}{}:
   175  			default:
   176  				// Don't block - chans are 1-buffered so act as an edge trigger and
   177  				// reload CA state directly from cache so they never "miss" updates.
   178  			}
   179  		}
   180  	}
   181  
   182  	if err != nil {
   183  		// Trigger all inflight watchers. We don't pass the error, but they will
   184  		// reload from cache and observe the same error and return it to the caller,
   185  		// or if it's transient, will continue and the next Fetch will get us back
   186  		// into the right state. Seems better than busy loop-retrying here given
   187  		// that almost any error we would see here would also be returned from the
   188  		// cache get this will trigger.
   189  		notifyChange()
   190  		return
   191  	}
   192  
   193  	var oldRoots *structs.IndexedCARoots
   194  	// Wait for updates to roots or all requests to stop
   195  	for {
   196  		select {
   197  		case <-ctx.Done():
   198  			return
   199  		case e := <-ch:
   200  			// Root response changed in some way. Note this might be the initial
   201  			// fetch.
   202  			if e.Err != nil {
   203  				// See above rationale about the error propagation
   204  				notifyChange()
   205  				continue
   206  			}
   207  
   208  			roots, ok := e.Result.(*structs.IndexedCARoots)
   209  			if !ok {
   210  				// See above rationale about the error propagation
   211  				notifyChange()
   212  				continue
   213  			}
   214  
   215  			// Check that the active root is actually different from the last CA
   216  			// config there are many reasons the config might have changed without
   217  			// actually updating the CA root that is signing certs in the cluster.
   218  			// The Fetch calls will also validate this since the first call here we
   219  			// don't know if it changed or not, but there is no point waking up all
   220  			// Fetch calls to check this if we know none of them will need to act on
   221  			// this update.
   222  			if oldRoots != nil && oldRoots.ActiveRootID == roots.ActiveRootID {
   223  				continue
   224  			}
   225  
   226  			// Distribute the update to all inflight requests - they will decide
   227  			// whether or not they need to act on it.
   228  			notifyChange()
   229  			oldRoots = roots
   230  		}
   231  	}
   232  }
   233  
   234  // calculateSoftExpiry encapsulates our logic for when to renew a cert based on
   235  // it's age. It returns a pair of times min, max which makes it easier to test
   236  // the logic without non-deterministic jitter to account for. The caller should
   237  // choose a time randomly in between these.
   238  //
   239  // We want to balance a few factors here:
   240  //   - renew too early and it increases the aggregate CSR rate in the cluster
   241  //   - renew too late and it risks disruption to the service if a transient
   242  //     error prevents the renewal
   243  //   - we want a broad amount of jitter so if there is an outage, we don't end
   244  //     up with all services in sync and causing a thundering herd every
   245  //     renewal period. Broader is better for smoothing requests but pushes
   246  //     both earlier and later tradeoffs above.
   247  //
   248  // Somewhat arbitrarily the current strategy looks like this:
   249  //
   250  //          0                              60%             90%
   251  //   Issued [------------------------------|===============|!!!!!] Expires
   252  // 72h TTL: 0                             ~43h            ~65h
   253  //  1h TTL: 0                              36m             54m
   254  //
   255  // Where |===| is the soft renewal period where we jitter for the first attempt
   256  // and |!!!| is the danger zone where we just try immediately.
   257  //
   258  // In the happy path (no outages) the average renewal occurs half way through
   259  // the soft renewal region or at 75% of the cert lifetime which is ~54 hours for
   260  // a 72 hour cert, or 45 mins for a 1 hour cert.
   261  //
   262  // If we are already in the softRenewal period, we randomly pick a time between
   263  // now and the start of the danger zone.
   264  //
   265  // We pass in now to make testing easier.
   266  func calculateSoftExpiry(now time.Time, cert *structs.IssuedCert) (min time.Time, max time.Time) {
   267  
   268  	certLifetime := cert.ValidBefore.Sub(cert.ValidAfter)
   269  	if certLifetime < 10*time.Minute {
   270  		// Shouldn't happen as we limit to 1 hour shortest elsewhere but just be
   271  		// defensive against strange times or bugs.
   272  		return now, now
   273  	}
   274  
   275  	// Find the 60% mark in diagram above
   276  	softRenewTime := cert.ValidAfter.Add(time.Duration(float64(certLifetime) * 0.6))
   277  	hardRenewTime := cert.ValidAfter.Add(time.Duration(float64(certLifetime) * 0.9))
   278  
   279  	if now.After(hardRenewTime) {
   280  		// In the hard renew period, or already expired. Renew now!
   281  		return now, now
   282  	}
   283  
   284  	if now.After(softRenewTime) {
   285  		// Already in the soft renew period, make now the lower bound for jitter
   286  		softRenewTime = now
   287  	}
   288  	return softRenewTime, hardRenewTime
   289  }
   290  
   291  func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache.FetchResult, error) {
   292  	var result cache.FetchResult
   293  
   294  	// Get the correct type
   295  	reqReal, ok := req.(*ConnectCALeafRequest)
   296  	if !ok {
   297  		return result, fmt.Errorf(
   298  			"Internal cache failure: request wrong type: %T", req)
   299  	}
   300  
   301  	// Do we already have a cert in the cache?
   302  	var existing *structs.IssuedCert
   303  	// Really important this is not a pointer type since otherwise we would set it
   304  	// to point to the actual fetchState in the cache entry below and then would
   305  	// be directly modifying that in the cache entry even when we might later
   306  	// return an error and not update index etc. By being a value, we force a copy
   307  	var state fetchState
   308  	if opts.LastResult != nil {
   309  		existing, ok = opts.LastResult.Value.(*structs.IssuedCert)
   310  		if !ok {
   311  			return result, fmt.Errorf(
   312  				"Internal cache failure: last value wrong type: %T", opts.LastResult.Value)
   313  		}
   314  		if opts.LastResult.State != nil {
   315  			state, ok = opts.LastResult.State.(fetchState)
   316  			if !ok {
   317  				return result, fmt.Errorf(
   318  					"Internal cache failure: last state wrong type: %T", opts.LastResult.State)
   319  			}
   320  		}
   321  	}
   322  
   323  	// Handle brand new request first as it's simplest.
   324  	if existing == nil {
   325  		return c.generateNewLeaf(reqReal, result)
   326  	}
   327  
   328  	// Setup result to mirror the current value for if we timeout or hit a rate
   329  	// limit. This allows us to update the state (e.g. for backoff or retry
   330  	// coordination on root change) even if we don't get a new cert.
   331  	result.Value = existing
   332  	result.Index = existing.ModifyIndex
   333  	result.State = state
   334  
   335  	// Since state is not a pointer, we can't just set it once in result and then
   336  	// continue to update it later since we will be updating only our copy.
   337  	// Instead we have a helper function that is used to make sure the state is
   338  	// updated in the result when we return.
   339  	lastResultWithNewState := func() cache.FetchResult {
   340  		return cache.FetchResult{
   341  			Value: existing,
   342  			Index: existing.ModifyIndex,
   343  			State: state,
   344  		}
   345  	}
   346  
   347  	// Beyond this point we need to only return lastResultWithNewState() not just
   348  	// result since otherwise we might "loose" state updates we expect not to.
   349  
   350  	// We have a certificate in cache already. Check it's still valid.
   351  	now := time.Now()
   352  	minExpire, maxExpire := calculateSoftExpiry(now, existing)
   353  	expiresAt := minExpire.Add(lib.RandomStagger(maxExpire.Sub(minExpire)))
   354  
   355  	// Check if we have been force-expired by a root update that jittered beyond
   356  	// the timeout of the query it was running.
   357  	if !state.forceExpireAfter.IsZero() && state.forceExpireAfter.Before(expiresAt) {
   358  		expiresAt = state.forceExpireAfter
   359  	}
   360  
   361  	if expiresAt == now || expiresAt.Before(now) {
   362  		// Already expired, just make a new one right away
   363  		return c.generateNewLeaf(reqReal, lastResultWithNewState())
   364  	}
   365  
   366  	// We are about to block and wait for a change or timeout.
   367  
   368  	// Make a chan we can be notified of changes to CA roots on. It must be
   369  	// buffered so we don't miss broadcasts from rootsWatch. It is an edge trigger
   370  	// so a single buffer element is sufficient regardless of whether we consume
   371  	// the updates fast enough since as soon as we see an element in it, we will
   372  	// reload latest CA from cache.
   373  	rootUpdateCh := make(chan struct{}, 1)
   374  
   375  	// The roots may have changed in between blocking calls. We need to verify
   376  	// that the existing cert was signed by the current root. If it was we still
   377  	// want to do the whole jitter thing. We could code that again here but it's
   378  	// identical to the select case below so we just trigger our own update chan
   379  	// and let the logic below handle checking if the CA actually changed in the
   380  	// common case where it didn't it is a no-op anyway.
   381  	rootUpdateCh <- struct{}{}
   382  
   383  	// Subscribe our chan to get root update notification.
   384  	c.fetchStart(rootUpdateCh)
   385  	defer c.fetchDone(rootUpdateCh)
   386  
   387  	// Setup the timeout chan outside the loop so we don't keep bumping the timout
   388  	// later if we loop around.
   389  	timeoutCh := time.After(opts.Timeout)
   390  
   391  	// Setup initial expiry chan. We may change this if root update occurs in the
   392  	// loop below.
   393  	expiresCh := time.After(expiresAt.Sub(now))
   394  
   395  	// Current cert is valid so just wait until it expires or we time out.
   396  	for {
   397  		select {
   398  		case <-timeoutCh:
   399  			// We timed out the request with same cert.
   400  			return lastResultWithNewState(), nil
   401  
   402  		case <-expiresCh:
   403  			// Cert expired or was force-expired by a root change.
   404  			return c.generateNewLeaf(reqReal, lastResultWithNewState())
   405  
   406  		case <-rootUpdateCh:
   407  			// A root cache change occurred, reload roots from cache.
   408  			roots, err := c.rootsFromCache()
   409  			if err != nil {
   410  				return lastResultWithNewState(), err
   411  			}
   412  
   413  			// Handle _possibly_ changed roots. We still need to verify the new active
   414  			// root is not the same as the one our current cert was signed by since we
   415  			// can be notified spuriously if we are the first request since the
   416  			// rootsWatcher didn't know about the CA we were signed by. We also rely
   417  			// on this on every request to do the initial check that the current roots
   418  			// are the same ones the current cert was signed by.
   419  			if activeRootHasKey(roots, state.authorityKeyID) {
   420  				// Current active CA is the same one that signed our current cert so
   421  				// keep waiting for a change.
   422  				continue
   423  			}
   424  			state.activeRootRotationStart = time.Now()
   425  
   426  			// CA root changed. We add some jitter here to avoid a thundering herd.
   427  			// See docs on caChangeJitterWindow const.
   428  			delay := lib.RandomStagger(caChangeJitterWindow)
   429  			if c.TestOverrideCAChangeInitialDelay > 0 {
   430  				delay = c.TestOverrideCAChangeInitialDelay
   431  			}
   432  			// Force the cert to be expired after the jitter - the delay above might
   433  			// be longer than we have left on our timeout. We set forceExpireAfter in
   434  			// the cache state so the next request will notice we still need to renew
   435  			// and do it at the right time. This is cleared once a new cert is
   436  			// returned by generateNewLeaf.
   437  			state.forceExpireAfter = state.activeRootRotationStart.Add(delay)
   438  			// If the delay time is within the current timeout, we want to renew the
   439  			// as soon as it's up. We change the expire time and chan so that when we
   440  			// loop back around, we'll wait at most delay until generating a new cert.
   441  			if state.forceExpireAfter.Before(expiresAt) {
   442  				expiresAt = state.forceExpireAfter
   443  				expiresCh = time.After(delay)
   444  			}
   445  			continue
   446  		}
   447  	}
   448  }
   449  
   450  func activeRootHasKey(roots *structs.IndexedCARoots, currentSigningKeyID string) bool {
   451  	for _, ca := range roots.Roots {
   452  		if ca.Active {
   453  			if ca.SigningKeyID == currentSigningKeyID {
   454  				return true
   455  			}
   456  			// Found the active CA but it has changed
   457  			return false
   458  		}
   459  	}
   460  	// Shouldn't be possible since at least one root should be active.
   461  	return false
   462  }
   463  
   464  func (c *ConnectCALeaf) rootsFromCache() (*structs.IndexedCARoots, error) {
   465  	rawRoots, _, err := c.Cache.Get(ConnectCARootName, &structs.DCSpecificRequest{
   466  		Datacenter: c.Datacenter,
   467  	})
   468  	if err != nil {
   469  		return nil, err
   470  	}
   471  	roots, ok := rawRoots.(*structs.IndexedCARoots)
   472  	if !ok {
   473  		return nil, errors.New("invalid RootCA response type")
   474  	}
   475  	return roots, nil
   476  }
   477  
   478  // generateNewLeaf does the actual work of creating a new private key,
   479  // generating a CSR and getting it signed by the servers. result argument
   480  // represents the last result currently in cache if any along with it's state.
   481  func (c *ConnectCALeaf) generateNewLeaf(req *ConnectCALeafRequest,
   482  	result cache.FetchResult) (cache.FetchResult, error) {
   483  
   484  	var state fetchState
   485  	if result.State != nil {
   486  		var ok bool
   487  		state, ok = result.State.(fetchState)
   488  		if !ok {
   489  			return result, fmt.Errorf(
   490  				"Internal cache failure: result state wrong type: %T", result.State)
   491  		}
   492  	}
   493  
   494  	// Need to lookup RootCAs response to discover trust domain. This should be a
   495  	// cache hit.
   496  	roots, err := c.rootsFromCache()
   497  	if err != nil {
   498  		return result, err
   499  	}
   500  	if roots.TrustDomain == "" {
   501  		return result, errors.New("cluster has no CA bootstrapped yet")
   502  	}
   503  
   504  	// Build the service ID
   505  	serviceID := &connect.SpiffeIDService{
   506  		Host:       roots.TrustDomain,
   507  		Datacenter: req.Datacenter,
   508  		Namespace:  "default",
   509  		Service:    req.Service,
   510  	}
   511  
   512  	// Create a new private key
   513  	pk, pkPEM, err := connect.GeneratePrivateKey()
   514  	if err != nil {
   515  		return result, err
   516  	}
   517  
   518  	// Create a CSR.
   519  	csr, err := connect.CreateCSR(serviceID, pk)
   520  	if err != nil {
   521  		return result, err
   522  	}
   523  
   524  	// Request signing
   525  	var reply structs.IssuedCert
   526  	args := structs.CASignRequest{
   527  		WriteRequest: structs.WriteRequest{Token: req.Token},
   528  		Datacenter:   req.Datacenter,
   529  		CSR:          csr,
   530  	}
   531  	if err := c.RPC.RPC("ConnectCA.Sign", &args, &reply); err != nil {
   532  		if err.Error() == consul.ErrRateLimited.Error() {
   533  			if result.Value == nil {
   534  				// This was a first fetch - we have no good value in cache. In this case
   535  				// we just return the error to the caller rather than rely on surprising
   536  				// semi-blocking until the rate limit is appeased or we timeout
   537  				// behavior. It's likely the caller isn't expecting this to block since
   538  				// it's an initial fetch. This also massively simplifies this edge case.
   539  				return result, err
   540  			}
   541  
   542  			if state.activeRootRotationStart.IsZero() {
   543  				// We hit a rate limit error by chance - for example a cert expired
   544  				// before the root rotation was observed (not triggered by rotation) but
   545  				// while server is working through high load from a recent rotation.
   546  				// Just pretend there is a rotation and the retry logic here will start
   547  				// jittering and retrying in the same way from now.
   548  				state.activeRootRotationStart = time.Now()
   549  			}
   550  
   551  			// Increment the errors in the state
   552  			state.consecutiveRateLimitErrs++
   553  
   554  			delay := lib.RandomStagger(caChangeJitterWindow)
   555  			if c.TestOverrideCAChangeInitialDelay > 0 {
   556  				delay = c.TestOverrideCAChangeInitialDelay
   557  			}
   558  
   559  			// Find the start of the next window we can retry in. See comment on
   560  			// caChangeJitterWindow for details of why we use this strategy.
   561  			windowStart := state.activeRootRotationStart.Add(
   562  				time.Duration(state.consecutiveRateLimitErrs) * delay)
   563  
   564  			// Pick a random time in that window
   565  			state.forceExpireAfter = windowStart.Add(delay)
   566  
   567  			// Return a result with the existing cert but the new state - the cache
   568  			// will see this as no change. Note that we always have an existing result
   569  			// here due to the nil value check above.
   570  			result.State = state
   571  			return result, nil
   572  		}
   573  		return result, err
   574  	}
   575  	reply.PrivateKeyPEM = pkPEM
   576  
   577  	// Reset rotation state
   578  	state.forceExpireAfter = time.Time{}
   579  	state.consecutiveRateLimitErrs = 0
   580  	state.activeRootRotationStart = time.Time{}
   581  
   582  	cert, err := connect.ParseCert(reply.CertPEM)
   583  	if err != nil {
   584  		return result, err
   585  	}
   586  	// Set the CA key ID so we can easily tell when a active root has changed.
   587  	state.authorityKeyID = connect.HexString(cert.AuthorityKeyId)
   588  
   589  	result.Value = &reply
   590  	// Store value not pointer so we don't accidentally mutate the cache entry
   591  	// state in Fetch.
   592  	result.State = state
   593  	result.Index = reply.ModifyIndex
   594  	return result, nil
   595  }
   596  
   597  func (c *ConnectCALeaf) SupportsBlocking() bool {
   598  	return true
   599  }
   600  
   601  // ConnectCALeafRequest is the cache.Request implementation for the
   602  // ConnectCALeaf cache type. This is implemented here and not in structs
   603  // since this is only used for cache-related requests and not forwarded
   604  // directly to any Consul servers.
   605  type ConnectCALeafRequest struct {
   606  	Token         string
   607  	Datacenter    string
   608  	Service       string // Service name, not ID
   609  	MinQueryIndex uint64
   610  	MaxQueryTime  time.Duration
   611  }
   612  
   613  func (r *ConnectCALeafRequest) CacheInfo() cache.RequestInfo {
   614  	return cache.RequestInfo{
   615  		Token:      r.Token,
   616  		Key:        r.Service,
   617  		Datacenter: r.Datacenter,
   618  		MinIndex:   r.MinQueryIndex,
   619  		Timeout:    r.MaxQueryTime,
   620  	}
   621  }