github.com/DerekStrickland/consul@v1.4.5/agent/cache-types/connect_ca_leaf.go (about) 1 package cachetype 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "sync" 8 "sync/atomic" 9 "time" 10 11 "github.com/hashicorp/consul/lib" 12 13 "github.com/hashicorp/consul/agent/cache" 14 "github.com/hashicorp/consul/agent/connect" 15 "github.com/hashicorp/consul/agent/consul" 16 "github.com/hashicorp/consul/agent/structs" 17 ) 18 19 // Recommended name for registration. 20 const ConnectCALeafName = "connect-ca-leaf" 21 22 // caChangeJitterWindow is the time over which we spread each round of retries 23 // when attempting to get a new certificate following a root rotation. It's 24 // selected to be a trade-off between not making rotation unnecessarily slow on 25 // a tiny cluster while not hammering the servers on a huge cluster 26 // unnecessarily hard. Servers rate limit to protect themselves from the 27 // expensive crypto work, but in practice have 10k+ RPCs all in the same second 28 // will cause a major disruption even on large servers due to downloading the 29 // payloads, parsing msgpack etc. Instead we pick a window that for now is fixed 30 // but later might be either user configurable (not nice since it would become 31 // another hard-to-tune value) or set dynamically by the server based on it's 32 // knowledge of how many certs need to be rotated. Currently the server doesn't 33 // know that so we pick something that is reasonable. We err on the side of 34 // being slower that we need in trivial cases but gentler for large deployments. 35 // 30s means that even with a cluster of 10k service instances, the server only 36 // has to cope with ~333 RPCs a second which shouldn't be too bad if it's rate 37 // limiting the actual expensive crypto work. 38 // 39 // The actual backoff strategy when we are rate limited is to have each cert 40 // only retry once with each window of this size, at a point in the window 41 // selected at random. This performs much better than exponential backoff in 42 // terms of getting things rotated quickly with more predictable load and so 43 // fewer rate limited requests. See the full simulation this is based on at 44 // https://github.com/banks/sim-rate-limit-backoff/blob/master/README.md for 45 // more detail. 46 const caChangeJitterWindow = 30 * time.Second 47 48 // ConnectCALeaf supports fetching and generating Connect leaf 49 // certificates. 50 type ConnectCALeaf struct { 51 caIndex uint64 // Current index for CA roots 52 53 // rootWatchMu protects access to the rootWatchSubscribers map and 54 // rootWatchCancel 55 rootWatchMu sync.Mutex 56 // rootWatchSubscribers is a set of chans, one for each currently in-flight 57 // Fetch. These chans have root updates delivered from the root watcher. 58 rootWatchSubscribers map[chan struct{}]struct{} 59 // rootWatchCancel is a func to call to stop the background root watch if any. 60 // You must hold inflightMu to read (e.g. call) or write the value. 61 rootWatchCancel func() 62 63 // testRootWatchStart/StopCount are testing helpers that allow tests to 64 // observe the reference counting behavior that governs the shared root watch. 65 // It's not exactly pretty to expose internals like this, but seems cleaner 66 // than constructing elaborate and brittle test cases that we can infer 67 // correct behavior from, and simpler than trying to probe runtime goroutine 68 // traces to infer correct behavior that way. They must be accessed 69 // atomically. 70 testRootWatchStartCount uint32 71 testRootWatchStopCount uint32 72 73 RPC RPC // RPC client for remote requests 74 Cache *cache.Cache // Cache that has CA root certs via ConnectCARoot 75 Datacenter string // This agent's datacenter 76 77 // TestOverrideCAChangeInitialDelay allows overriding the random jitter after a 78 // root change with a fixed delay. So far ths is only done in tests. If it's 79 // zero the caChangeInitialSpreadDefault maximum jitter will be used but if 80 // set, it overrides and provides a fixed delay. To essentially disable the 81 // delay in tests they can set it to 1 nanosecond. We may separately allow 82 // configuring the jitter limit by users later but this is different and for 83 // tests only since we need to set a deterministic time delay in order to test 84 // the behavior here fully and determinstically. 85 TestOverrideCAChangeInitialDelay time.Duration 86 } 87 88 // fetchState is some additional metadata we store with each cert in the cache 89 // to track things like expiry and coordinate paces root rotations. It's 90 // important this doesn't contain any pointer types since we rely on the struct 91 // being copied to avoid modifying the actual state in the cache entry during 92 // Fetch. Pointers themselves are OK, but if we point to another struct that we 93 // call a method or modify in some way that would directly mutate the cache and 94 // cause problems. We'd need to deep-clone in that case in Fetch below. 95 // time.Time technically contains a pointer to the Location but we ignore that 96 // since all times we get from our wall clock should point to the same Location 97 // anyway. 98 type fetchState struct { 99 // authorityKeyID is the key ID of the CA root that signed the current cert. 100 // This is just to save parsing the whole cert everytime we have to check if 101 // the root changed. 102 authorityKeyID string 103 104 // forceExpireAfter is used to coordinate renewing certs after a CA rotation 105 // in a staggered way so that we don't overwhelm the servers. 106 forceExpireAfter time.Time 107 108 // activeRootRotationStart is set when the root has changed and we need to get 109 // a new cert but haven't got one yet. forceExpireAfter will be set to the 110 // next scheduled time we should try our CSR, but this is needed to calculate 111 // the retry windows if we are rate limited when we try. See comment on 112 // caChangeJitterWindow above for more. 113 activeRootRotationStart time.Time 114 115 // consecutiveRateLimitErrs stores how many rate limit errors we've hit. We 116 // use this to choose a new window for the next retry. See comment on 117 // caChangeJitterWindow above for more. 118 consecutiveRateLimitErrs int 119 } 120 121 // fetchStart is called on each fetch that is about to block and wait for 122 // changes to the leaf. It subscribes a chan to receive updates from the shared 123 // root watcher and triggers root watcher if it's not already running. 124 func (c *ConnectCALeaf) fetchStart(rootUpdateCh chan struct{}) { 125 c.rootWatchMu.Lock() 126 defer c.rootWatchMu.Unlock() 127 // Lazy allocation 128 if c.rootWatchSubscribers == nil { 129 c.rootWatchSubscribers = make(map[chan struct{}]struct{}) 130 } 131 // Make sure a root watcher is running. We don't only do this on first request 132 // to be more tolerant of errors that could cause the root watcher to fail and 133 // exit. 134 if c.rootWatchCancel == nil { 135 ctx, cancel := context.WithCancel(context.Background()) 136 c.rootWatchCancel = cancel 137 go c.rootWatcher(ctx) 138 } 139 c.rootWatchSubscribers[rootUpdateCh] = struct{}{} 140 } 141 142 // fetchDone is called when a blocking call exits to unsubscribe from root 143 // updates and possibly stop the shared root watcher if it's no longer needed. 144 // Note that typically root CA is still being watched by clients directly and 145 // probably by the ProxyConfigManager so it will stay hot in cache for a while, 146 // we are just not monitoring it for updates any more. 147 func (c *ConnectCALeaf) fetchDone(rootUpdateCh chan struct{}) { 148 c.rootWatchMu.Lock() 149 defer c.rootWatchMu.Unlock() 150 delete(c.rootWatchSubscribers, rootUpdateCh) 151 if len(c.rootWatchSubscribers) == 0 && c.rootWatchCancel != nil { 152 // This was the last request. Stop the root watcher. 153 c.rootWatchCancel() 154 } 155 } 156 157 // rootWatcher is the shared rootWatcher that runs in a background goroutine 158 // while needed by one or more inflight Fetch calls. 159 func (c *ConnectCALeaf) rootWatcher(ctx context.Context) { 160 atomic.AddUint32(&c.testRootWatchStartCount, 1) 161 defer atomic.AddUint32(&c.testRootWatchStopCount, 1) 162 163 ch := make(chan cache.UpdateEvent, 1) 164 err := c.Cache.Notify(ctx, ConnectCARootName, &structs.DCSpecificRequest{ 165 Datacenter: c.Datacenter, 166 }, "roots", ch) 167 168 notifyChange := func() { 169 c.rootWatchMu.Lock() 170 defer c.rootWatchMu.Unlock() 171 172 for ch := range c.rootWatchSubscribers { 173 select { 174 case ch <- struct{}{}: 175 default: 176 // Don't block - chans are 1-buffered so act as an edge trigger and 177 // reload CA state directly from cache so they never "miss" updates. 178 } 179 } 180 } 181 182 if err != nil { 183 // Trigger all inflight watchers. We don't pass the error, but they will 184 // reload from cache and observe the same error and return it to the caller, 185 // or if it's transient, will continue and the next Fetch will get us back 186 // into the right state. Seems better than busy loop-retrying here given 187 // that almost any error we would see here would also be returned from the 188 // cache get this will trigger. 189 notifyChange() 190 return 191 } 192 193 var oldRoots *structs.IndexedCARoots 194 // Wait for updates to roots or all requests to stop 195 for { 196 select { 197 case <-ctx.Done(): 198 return 199 case e := <-ch: 200 // Root response changed in some way. Note this might be the initial 201 // fetch. 202 if e.Err != nil { 203 // See above rationale about the error propagation 204 notifyChange() 205 continue 206 } 207 208 roots, ok := e.Result.(*structs.IndexedCARoots) 209 if !ok { 210 // See above rationale about the error propagation 211 notifyChange() 212 continue 213 } 214 215 // Check that the active root is actually different from the last CA 216 // config there are many reasons the config might have changed without 217 // actually updating the CA root that is signing certs in the cluster. 218 // The Fetch calls will also validate this since the first call here we 219 // don't know if it changed or not, but there is no point waking up all 220 // Fetch calls to check this if we know none of them will need to act on 221 // this update. 222 if oldRoots != nil && oldRoots.ActiveRootID == roots.ActiveRootID { 223 continue 224 } 225 226 // Distribute the update to all inflight requests - they will decide 227 // whether or not they need to act on it. 228 notifyChange() 229 oldRoots = roots 230 } 231 } 232 } 233 234 // calculateSoftExpiry encapsulates our logic for when to renew a cert based on 235 // it's age. It returns a pair of times min, max which makes it easier to test 236 // the logic without non-deterministic jitter to account for. The caller should 237 // choose a time randomly in between these. 238 // 239 // We want to balance a few factors here: 240 // - renew too early and it increases the aggregate CSR rate in the cluster 241 // - renew too late and it risks disruption to the service if a transient 242 // error prevents the renewal 243 // - we want a broad amount of jitter so if there is an outage, we don't end 244 // up with all services in sync and causing a thundering herd every 245 // renewal period. Broader is better for smoothing requests but pushes 246 // both earlier and later tradeoffs above. 247 // 248 // Somewhat arbitrarily the current strategy looks like this: 249 // 250 // 0 60% 90% 251 // Issued [------------------------------|===============|!!!!!] Expires 252 // 72h TTL: 0 ~43h ~65h 253 // 1h TTL: 0 36m 54m 254 // 255 // Where |===| is the soft renewal period where we jitter for the first attempt 256 // and |!!!| is the danger zone where we just try immediately. 257 // 258 // In the happy path (no outages) the average renewal occurs half way through 259 // the soft renewal region or at 75% of the cert lifetime which is ~54 hours for 260 // a 72 hour cert, or 45 mins for a 1 hour cert. 261 // 262 // If we are already in the softRenewal period, we randomly pick a time between 263 // now and the start of the danger zone. 264 // 265 // We pass in now to make testing easier. 266 func calculateSoftExpiry(now time.Time, cert *structs.IssuedCert) (min time.Time, max time.Time) { 267 268 certLifetime := cert.ValidBefore.Sub(cert.ValidAfter) 269 if certLifetime < 10*time.Minute { 270 // Shouldn't happen as we limit to 1 hour shortest elsewhere but just be 271 // defensive against strange times or bugs. 272 return now, now 273 } 274 275 // Find the 60% mark in diagram above 276 softRenewTime := cert.ValidAfter.Add(time.Duration(float64(certLifetime) * 0.6)) 277 hardRenewTime := cert.ValidAfter.Add(time.Duration(float64(certLifetime) * 0.9)) 278 279 if now.After(hardRenewTime) { 280 // In the hard renew period, or already expired. Renew now! 281 return now, now 282 } 283 284 if now.After(softRenewTime) { 285 // Already in the soft renew period, make now the lower bound for jitter 286 softRenewTime = now 287 } 288 return softRenewTime, hardRenewTime 289 } 290 291 func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache.FetchResult, error) { 292 var result cache.FetchResult 293 294 // Get the correct type 295 reqReal, ok := req.(*ConnectCALeafRequest) 296 if !ok { 297 return result, fmt.Errorf( 298 "Internal cache failure: request wrong type: %T", req) 299 } 300 301 // Do we already have a cert in the cache? 302 var existing *structs.IssuedCert 303 // Really important this is not a pointer type since otherwise we would set it 304 // to point to the actual fetchState in the cache entry below and then would 305 // be directly modifying that in the cache entry even when we might later 306 // return an error and not update index etc. By being a value, we force a copy 307 var state fetchState 308 if opts.LastResult != nil { 309 existing, ok = opts.LastResult.Value.(*structs.IssuedCert) 310 if !ok { 311 return result, fmt.Errorf( 312 "Internal cache failure: last value wrong type: %T", opts.LastResult.Value) 313 } 314 if opts.LastResult.State != nil { 315 state, ok = opts.LastResult.State.(fetchState) 316 if !ok { 317 return result, fmt.Errorf( 318 "Internal cache failure: last state wrong type: %T", opts.LastResult.State) 319 } 320 } 321 } 322 323 // Handle brand new request first as it's simplest. 324 if existing == nil { 325 return c.generateNewLeaf(reqReal, result) 326 } 327 328 // Setup result to mirror the current value for if we timeout or hit a rate 329 // limit. This allows us to update the state (e.g. for backoff or retry 330 // coordination on root change) even if we don't get a new cert. 331 result.Value = existing 332 result.Index = existing.ModifyIndex 333 result.State = state 334 335 // Since state is not a pointer, we can't just set it once in result and then 336 // continue to update it later since we will be updating only our copy. 337 // Instead we have a helper function that is used to make sure the state is 338 // updated in the result when we return. 339 lastResultWithNewState := func() cache.FetchResult { 340 return cache.FetchResult{ 341 Value: existing, 342 Index: existing.ModifyIndex, 343 State: state, 344 } 345 } 346 347 // Beyond this point we need to only return lastResultWithNewState() not just 348 // result since otherwise we might "loose" state updates we expect not to. 349 350 // We have a certificate in cache already. Check it's still valid. 351 now := time.Now() 352 minExpire, maxExpire := calculateSoftExpiry(now, existing) 353 expiresAt := minExpire.Add(lib.RandomStagger(maxExpire.Sub(minExpire))) 354 355 // Check if we have been force-expired by a root update that jittered beyond 356 // the timeout of the query it was running. 357 if !state.forceExpireAfter.IsZero() && state.forceExpireAfter.Before(expiresAt) { 358 expiresAt = state.forceExpireAfter 359 } 360 361 if expiresAt == now || expiresAt.Before(now) { 362 // Already expired, just make a new one right away 363 return c.generateNewLeaf(reqReal, lastResultWithNewState()) 364 } 365 366 // We are about to block and wait for a change or timeout. 367 368 // Make a chan we can be notified of changes to CA roots on. It must be 369 // buffered so we don't miss broadcasts from rootsWatch. It is an edge trigger 370 // so a single buffer element is sufficient regardless of whether we consume 371 // the updates fast enough since as soon as we see an element in it, we will 372 // reload latest CA from cache. 373 rootUpdateCh := make(chan struct{}, 1) 374 375 // The roots may have changed in between blocking calls. We need to verify 376 // that the existing cert was signed by the current root. If it was we still 377 // want to do the whole jitter thing. We could code that again here but it's 378 // identical to the select case below so we just trigger our own update chan 379 // and let the logic below handle checking if the CA actually changed in the 380 // common case where it didn't it is a no-op anyway. 381 rootUpdateCh <- struct{}{} 382 383 // Subscribe our chan to get root update notification. 384 c.fetchStart(rootUpdateCh) 385 defer c.fetchDone(rootUpdateCh) 386 387 // Setup the timeout chan outside the loop so we don't keep bumping the timout 388 // later if we loop around. 389 timeoutCh := time.After(opts.Timeout) 390 391 // Setup initial expiry chan. We may change this if root update occurs in the 392 // loop below. 393 expiresCh := time.After(expiresAt.Sub(now)) 394 395 // Current cert is valid so just wait until it expires or we time out. 396 for { 397 select { 398 case <-timeoutCh: 399 // We timed out the request with same cert. 400 return lastResultWithNewState(), nil 401 402 case <-expiresCh: 403 // Cert expired or was force-expired by a root change. 404 return c.generateNewLeaf(reqReal, lastResultWithNewState()) 405 406 case <-rootUpdateCh: 407 // A root cache change occurred, reload roots from cache. 408 roots, err := c.rootsFromCache() 409 if err != nil { 410 return lastResultWithNewState(), err 411 } 412 413 // Handle _possibly_ changed roots. We still need to verify the new active 414 // root is not the same as the one our current cert was signed by since we 415 // can be notified spuriously if we are the first request since the 416 // rootsWatcher didn't know about the CA we were signed by. We also rely 417 // on this on every request to do the initial check that the current roots 418 // are the same ones the current cert was signed by. 419 if activeRootHasKey(roots, state.authorityKeyID) { 420 // Current active CA is the same one that signed our current cert so 421 // keep waiting for a change. 422 continue 423 } 424 state.activeRootRotationStart = time.Now() 425 426 // CA root changed. We add some jitter here to avoid a thundering herd. 427 // See docs on caChangeJitterWindow const. 428 delay := lib.RandomStagger(caChangeJitterWindow) 429 if c.TestOverrideCAChangeInitialDelay > 0 { 430 delay = c.TestOverrideCAChangeInitialDelay 431 } 432 // Force the cert to be expired after the jitter - the delay above might 433 // be longer than we have left on our timeout. We set forceExpireAfter in 434 // the cache state so the next request will notice we still need to renew 435 // and do it at the right time. This is cleared once a new cert is 436 // returned by generateNewLeaf. 437 state.forceExpireAfter = state.activeRootRotationStart.Add(delay) 438 // If the delay time is within the current timeout, we want to renew the 439 // as soon as it's up. We change the expire time and chan so that when we 440 // loop back around, we'll wait at most delay until generating a new cert. 441 if state.forceExpireAfter.Before(expiresAt) { 442 expiresAt = state.forceExpireAfter 443 expiresCh = time.After(delay) 444 } 445 continue 446 } 447 } 448 } 449 450 func activeRootHasKey(roots *structs.IndexedCARoots, currentSigningKeyID string) bool { 451 for _, ca := range roots.Roots { 452 if ca.Active { 453 if ca.SigningKeyID == currentSigningKeyID { 454 return true 455 } 456 // Found the active CA but it has changed 457 return false 458 } 459 } 460 // Shouldn't be possible since at least one root should be active. 461 return false 462 } 463 464 func (c *ConnectCALeaf) rootsFromCache() (*structs.IndexedCARoots, error) { 465 rawRoots, _, err := c.Cache.Get(ConnectCARootName, &structs.DCSpecificRequest{ 466 Datacenter: c.Datacenter, 467 }) 468 if err != nil { 469 return nil, err 470 } 471 roots, ok := rawRoots.(*structs.IndexedCARoots) 472 if !ok { 473 return nil, errors.New("invalid RootCA response type") 474 } 475 return roots, nil 476 } 477 478 // generateNewLeaf does the actual work of creating a new private key, 479 // generating a CSR and getting it signed by the servers. result argument 480 // represents the last result currently in cache if any along with it's state. 481 func (c *ConnectCALeaf) generateNewLeaf(req *ConnectCALeafRequest, 482 result cache.FetchResult) (cache.FetchResult, error) { 483 484 var state fetchState 485 if result.State != nil { 486 var ok bool 487 state, ok = result.State.(fetchState) 488 if !ok { 489 return result, fmt.Errorf( 490 "Internal cache failure: result state wrong type: %T", result.State) 491 } 492 } 493 494 // Need to lookup RootCAs response to discover trust domain. This should be a 495 // cache hit. 496 roots, err := c.rootsFromCache() 497 if err != nil { 498 return result, err 499 } 500 if roots.TrustDomain == "" { 501 return result, errors.New("cluster has no CA bootstrapped yet") 502 } 503 504 // Build the service ID 505 serviceID := &connect.SpiffeIDService{ 506 Host: roots.TrustDomain, 507 Datacenter: req.Datacenter, 508 Namespace: "default", 509 Service: req.Service, 510 } 511 512 // Create a new private key 513 pk, pkPEM, err := connect.GeneratePrivateKey() 514 if err != nil { 515 return result, err 516 } 517 518 // Create a CSR. 519 csr, err := connect.CreateCSR(serviceID, pk) 520 if err != nil { 521 return result, err 522 } 523 524 // Request signing 525 var reply structs.IssuedCert 526 args := structs.CASignRequest{ 527 WriteRequest: structs.WriteRequest{Token: req.Token}, 528 Datacenter: req.Datacenter, 529 CSR: csr, 530 } 531 if err := c.RPC.RPC("ConnectCA.Sign", &args, &reply); err != nil { 532 if err.Error() == consul.ErrRateLimited.Error() { 533 if result.Value == nil { 534 // This was a first fetch - we have no good value in cache. In this case 535 // we just return the error to the caller rather than rely on surprising 536 // semi-blocking until the rate limit is appeased or we timeout 537 // behavior. It's likely the caller isn't expecting this to block since 538 // it's an initial fetch. This also massively simplifies this edge case. 539 return result, err 540 } 541 542 if state.activeRootRotationStart.IsZero() { 543 // We hit a rate limit error by chance - for example a cert expired 544 // before the root rotation was observed (not triggered by rotation) but 545 // while server is working through high load from a recent rotation. 546 // Just pretend there is a rotation and the retry logic here will start 547 // jittering and retrying in the same way from now. 548 state.activeRootRotationStart = time.Now() 549 } 550 551 // Increment the errors in the state 552 state.consecutiveRateLimitErrs++ 553 554 delay := lib.RandomStagger(caChangeJitterWindow) 555 if c.TestOverrideCAChangeInitialDelay > 0 { 556 delay = c.TestOverrideCAChangeInitialDelay 557 } 558 559 // Find the start of the next window we can retry in. See comment on 560 // caChangeJitterWindow for details of why we use this strategy. 561 windowStart := state.activeRootRotationStart.Add( 562 time.Duration(state.consecutiveRateLimitErrs) * delay) 563 564 // Pick a random time in that window 565 state.forceExpireAfter = windowStart.Add(delay) 566 567 // Return a result with the existing cert but the new state - the cache 568 // will see this as no change. Note that we always have an existing result 569 // here due to the nil value check above. 570 result.State = state 571 return result, nil 572 } 573 return result, err 574 } 575 reply.PrivateKeyPEM = pkPEM 576 577 // Reset rotation state 578 state.forceExpireAfter = time.Time{} 579 state.consecutiveRateLimitErrs = 0 580 state.activeRootRotationStart = time.Time{} 581 582 cert, err := connect.ParseCert(reply.CertPEM) 583 if err != nil { 584 return result, err 585 } 586 // Set the CA key ID so we can easily tell when a active root has changed. 587 state.authorityKeyID = connect.HexString(cert.AuthorityKeyId) 588 589 result.Value = &reply 590 // Store value not pointer so we don't accidentally mutate the cache entry 591 // state in Fetch. 592 result.State = state 593 result.Index = reply.ModifyIndex 594 return result, nil 595 } 596 597 func (c *ConnectCALeaf) SupportsBlocking() bool { 598 return true 599 } 600 601 // ConnectCALeafRequest is the cache.Request implementation for the 602 // ConnectCALeaf cache type. This is implemented here and not in structs 603 // since this is only used for cache-related requests and not forwarded 604 // directly to any Consul servers. 605 type ConnectCALeafRequest struct { 606 Token string 607 Datacenter string 608 Service string // Service name, not ID 609 MinQueryIndex uint64 610 MaxQueryTime time.Duration 611 } 612 613 func (r *ConnectCALeafRequest) CacheInfo() cache.RequestInfo { 614 return cache.RequestInfo{ 615 Token: r.Token, 616 Key: r.Service, 617 Datacenter: r.Datacenter, 618 MinIndex: r.MinQueryIndex, 619 Timeout: r.MaxQueryTime, 620 } 621 }