github.com/DerekStrickland/consul@v1.4.5/agent/cache-types/connect_ca_leaf_test.go (about) 1 package cachetype 2 3 import ( 4 "fmt" 5 "sync/atomic" 6 "testing" 7 "time" 8 9 "github.com/hashicorp/consul/testutil/retry" 10 11 "github.com/hashicorp/consul/agent/cache" 12 "github.com/hashicorp/consul/agent/connect" 13 "github.com/hashicorp/consul/agent/consul" 14 "github.com/hashicorp/consul/agent/structs" 15 "github.com/stretchr/testify/mock" 16 "github.com/stretchr/testify/require" 17 ) 18 19 func TestCalculateSoftExpire(t *testing.T) { 20 tests := []struct { 21 name string 22 now string 23 issued string 24 lifetime time.Duration 25 wantMin string 26 wantMax string 27 }{ 28 { 29 name: "72h just issued", 30 now: "2018-01-01 00:00:01", 31 issued: "2018-01-01 00:00:00", 32 lifetime: 72 * time.Hour, 33 // Should jitter between 60% and 90% of the lifetime which is 43.2/64.8 34 // hours after issued 35 wantMin: "2018-01-02 19:12:00", 36 wantMax: "2018-01-03 16:48:00", 37 }, 38 { 39 name: "72h in renew range", 40 // This time should be inside the renewal range. 41 now: "2018-01-02 20:00:20", 42 issued: "2018-01-01 00:00:00", 43 lifetime: 72 * time.Hour, 44 // Min should be the "now" time 45 wantMin: "2018-01-02 20:00:20", 46 wantMax: "2018-01-03 16:48:00", 47 }, 48 { 49 name: "72h in hard renew", 50 // This time should be inside the renewal range. 51 now: "2018-01-03 18:00:00", 52 issued: "2018-01-01 00:00:00", 53 lifetime: 72 * time.Hour, 54 // Min and max should both be the "now" time 55 wantMin: "2018-01-03 18:00:00", 56 wantMax: "2018-01-03 18:00:00", 57 }, 58 { 59 name: "72h expired", 60 // This time is after expiry 61 now: "2018-01-05 00:00:00", 62 issued: "2018-01-01 00:00:00", 63 lifetime: 72 * time.Hour, 64 // Min and max should both be the "now" time 65 wantMin: "2018-01-05 00:00:00", 66 wantMax: "2018-01-05 00:00:00", 67 }, 68 { 69 name: "1h just issued", 70 now: "2018-01-01 00:00:01", 71 issued: "2018-01-01 00:00:00", 72 lifetime: 1 * time.Hour, 73 // Should jitter between 60% and 90% of the lifetime which is 36/54 mins 74 // hours after issued 75 wantMin: "2018-01-01 00:36:00", 76 wantMax: "2018-01-01 00:54:00", 77 }, 78 { 79 name: "1h in renew range", 80 // This time should be inside the renewal range. 81 now: "2018-01-01 00:40:00", 82 issued: "2018-01-01 00:00:00", 83 lifetime: 1 * time.Hour, 84 // Min should be the "now" time 85 wantMin: "2018-01-01 00:40:00", 86 wantMax: "2018-01-01 00:54:00", 87 }, 88 { 89 name: "1h in hard renew", 90 // This time should be inside the renewal range. 91 now: "2018-01-01 00:55:00", 92 issued: "2018-01-01 00:00:00", 93 lifetime: 1 * time.Hour, 94 // Min and max should both be the "now" time 95 wantMin: "2018-01-01 00:55:00", 96 wantMax: "2018-01-01 00:55:00", 97 }, 98 { 99 name: "1h expired", 100 // This time is after expiry 101 now: "2018-01-01 01:01:01", 102 issued: "2018-01-01 00:00:00", 103 lifetime: 1 * time.Hour, 104 // Min and max should both be the "now" time 105 wantMin: "2018-01-01 01:01:01", 106 wantMax: "2018-01-01 01:01:01", 107 }, 108 { 109 name: "too short lifetime", 110 // This time is after expiry 111 now: "2018-01-01 01:01:01", 112 issued: "2018-01-01 00:00:00", 113 lifetime: 1 * time.Minute, 114 // Min and max should both be the "now" time 115 wantMin: "2018-01-01 01:01:01", 116 wantMax: "2018-01-01 01:01:01", 117 }, 118 } 119 120 for _, tc := range tests { 121 t.Run(tc.name, func(t *testing.T) { 122 require := require.New(t) 123 now, err := time.Parse("2006-01-02 15:04:05", tc.now) 124 require.NoError(err) 125 issued, err := time.Parse("2006-01-02 15:04:05", tc.issued) 126 require.NoError(err) 127 wantMin, err := time.Parse("2006-01-02 15:04:05", tc.wantMin) 128 require.NoError(err) 129 wantMax, err := time.Parse("2006-01-02 15:04:05", tc.wantMax) 130 require.NoError(err) 131 132 min, max := calculateSoftExpiry(now, &structs.IssuedCert{ 133 ValidAfter: issued, 134 ValidBefore: issued.Add(tc.lifetime), 135 }) 136 137 require.Equal(wantMin, min) 138 require.Equal(wantMax, max) 139 }) 140 } 141 } 142 143 // Test that after an initial signing, new CA roots (new ID) will 144 // trigger a blocking query to execute. 145 func TestConnectCALeaf_changingRoots(t *testing.T) { 146 t.Parallel() 147 148 require := require.New(t) 149 rpc := TestRPC(t) 150 defer rpc.AssertExpectations(t) 151 152 typ, rootsCh := testCALeafType(t, rpc) 153 defer close(rootsCh) 154 155 caRoot := connect.TestCA(t, nil) 156 caRoot.Active = true 157 rootsCh <- structs.IndexedCARoots{ 158 ActiveRootID: caRoot.ID, 159 TrustDomain: "fake-trust-domain.consul", 160 Roots: []*structs.CARoot{ 161 caRoot, 162 }, 163 QueryMeta: structs.QueryMeta{Index: 1}, 164 } 165 166 // We need this later but needs to be defined so we sign second CSR with it 167 // otherwise we break the cert root checking. 168 caRoot2 := connect.TestCA(t, nil) 169 170 // Instrument ConnectCA.Sign to return signed cert 171 var resp *structs.IssuedCert 172 var idx uint64 173 174 rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil). 175 Run(func(args mock.Arguments) { 176 ca := caRoot 177 cIdx := atomic.AddUint64(&idx, 1) 178 if cIdx > 1 { 179 // Second time round use the new CA 180 ca = caRoot2 181 } 182 reply := args.Get(2).(*structs.IssuedCert) 183 leaf, _ := connect.TestLeaf(t, "web", ca) 184 reply.CertPEM = leaf 185 reply.ValidAfter = time.Now().Add(-1 * time.Hour) 186 reply.ValidBefore = time.Now().Add(11 * time.Hour) 187 reply.CreateIndex = cIdx 188 reply.ModifyIndex = reply.CreateIndex 189 resp = reply 190 }) 191 192 // We'll reuse the fetch options and request 193 opts := cache.FetchOptions{MinIndex: 0, Timeout: 10 * time.Second} 194 req := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web"} 195 196 // First fetch should return immediately 197 fetchCh := TestFetchCh(t, typ, opts, req) 198 select { 199 case <-time.After(100 * time.Millisecond): 200 t.Fatal("shouldn't block waiting for fetch") 201 case result := <-fetchCh: 202 v := mustFetchResult(t, result) 203 require.Equal(resp, v.Value) 204 require.Equal(uint64(1), v.Index) 205 // Set the LastResult for subsequent fetches 206 opts.LastResult = &v 207 } 208 209 // Second fetch should block with set index 210 opts.MinIndex = 1 211 fetchCh = TestFetchCh(t, typ, opts, req) 212 select { 213 case result := <-fetchCh: 214 t.Fatalf("should not return: %#v", result) 215 case <-time.After(100 * time.Millisecond): 216 } 217 218 // Let's send in new roots, which should trigger the sign req. We need to take 219 // care to set the new root as active 220 caRoot2.Active = true 221 caRoot.Active = false 222 rootsCh <- structs.IndexedCARoots{ 223 ActiveRootID: caRoot2.ID, 224 TrustDomain: "fake-trust-domain.consul", 225 Roots: []*structs.CARoot{ 226 caRoot2, 227 caRoot, 228 }, 229 QueryMeta: structs.QueryMeta{Index: atomic.AddUint64(&idx, 1)}, 230 } 231 select { 232 case <-time.After(100 * time.Millisecond): 233 t.Fatal("shouldn't block waiting for fetch") 234 case result := <-fetchCh: 235 v := mustFetchResult(t, result) 236 require.Equal(resp, v.Value) 237 // 3 since the second CA "update" used up 2 238 require.Equal(uint64(3), v.Index) 239 // Set the LastResult for subsequent fetches 240 opts.LastResult = &v 241 opts.MinIndex = 3 242 } 243 244 // Third fetch should block 245 fetchCh = TestFetchCh(t, typ, opts, req) 246 select { 247 case result := <-fetchCh: 248 t.Fatalf("should not return: %#v", result) 249 case <-time.After(100 * time.Millisecond): 250 } 251 } 252 253 // Tests that if the root change jitter is longer than the time left on the 254 // timeout, we return normally but then still renew the cert on a subsequent 255 // call. 256 func TestConnectCALeaf_changingRootsJitterBetweenCalls(t *testing.T) { 257 t.Parallel() 258 259 require := require.New(t) 260 rpc := TestRPC(t) 261 defer rpc.AssertExpectations(t) 262 263 typ, rootsCh := testCALeafType(t, rpc) 264 defer close(rootsCh) 265 266 // Override the root-change delay so we will timeout first. We can't set it to 267 // a crazy high value otherwise we'll have to wait that long in the test to 268 // see if it actually happens on subsequent calls. We instead reduce the 269 // timeout in FetchOptions to be much shorter than this. 270 typ.TestOverrideCAChangeInitialDelay = 100 * time.Millisecond 271 272 caRoot := connect.TestCA(t, nil) 273 caRoot.Active = true 274 rootsCh <- structs.IndexedCARoots{ 275 ActiveRootID: caRoot.ID, 276 TrustDomain: "fake-trust-domain.consul", 277 Roots: []*structs.CARoot{ 278 caRoot, 279 }, 280 QueryMeta: structs.QueryMeta{Index: 1}, 281 } 282 283 // Instrument ConnectCA.Sign to return signed cert 284 var resp *structs.IssuedCert 285 var idx uint64 286 rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil). 287 Run(func(args mock.Arguments) { 288 reply := args.Get(2).(*structs.IssuedCert) 289 leaf, _ := connect.TestLeaf(t, "web", caRoot) 290 reply.CertPEM = leaf 291 reply.ValidAfter = time.Now().Add(-1 * time.Hour) 292 reply.ValidBefore = time.Now().Add(11 * time.Hour) 293 reply.CreateIndex = atomic.AddUint64(&idx, 1) 294 reply.ModifyIndex = reply.CreateIndex 295 resp = reply 296 }) 297 298 // We'll reuse the fetch options and request. Timeout must be much shorter 299 // than the initial root delay. 20ms means that if we deliver the root change 300 // during the first blocking call, we should need to block fully for 5 more 301 // calls before the cert is renewed. We pick a timeout that is not an exact 302 // multiple of the 100ms delay above to reduce the chance that timing works 303 // out in a way that makes it hard to tell a timeout from an early return due 304 // to a cert renewal. 305 opts := cache.FetchOptions{MinIndex: 0, Timeout: 35 * time.Millisecond} 306 req := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web"} 307 308 // First fetch should return immediately 309 fetchCh := TestFetchCh(t, typ, opts, req) 310 select { 311 case <-time.After(100 * time.Millisecond): 312 t.Fatal("shouldn't block waiting for fetch") 313 case result := <-fetchCh: 314 v := mustFetchResult(t, result) 315 require.Equal(resp, v.Value) 316 require.Equal(uint64(1), v.Index) 317 // Set the LastResult for subsequent fetches 318 opts.LastResult = &v 319 } 320 321 // Let's send in new roots, which should eventually trigger the sign req. We 322 // need to take care to set the new root as active. Note that this is 323 // implicitly testing that root updates that happen in between leaf blocking 324 // queries are still noticed too. At this point no leaf blocking query is 325 // running so the root watch should be stopped. By pushing this update, the 326 // next blocking query will _immediately_ see the new root which means it 327 // needs to correctly notice that it is not the same one that generated the 328 // current cert and start the rotation. This is good, just not obvious that 329 // the behavior is actually well tested here when it is. 330 caRoot2 := connect.TestCA(t, nil) 331 caRoot2.Active = true 332 caRoot.Active = false 333 rootsCh <- structs.IndexedCARoots{ 334 ActiveRootID: caRoot2.ID, 335 TrustDomain: "fake-trust-domain.consul", 336 Roots: []*structs.CARoot{ 337 caRoot2, 338 caRoot, 339 }, 340 QueryMeta: structs.QueryMeta{Index: atomic.AddUint64(&idx, 1)}, 341 } 342 earliestRootDelivery := time.Now() 343 344 // Some number of fetches (2,3,4 likely) should timeout after 20ms and after 345 // 100ms has elapsed total we should see the new cert. Since this is all very 346 // timing dependent, we don't hard code exact numbers here and instead loop 347 // for plenty of time and do as many calls as it takes and just assert on the 348 // time taken and that the call either blocks and returns the cached cert, or 349 // returns the new one. 350 opts.MinIndex = 1 351 var shouldExpireAfter time.Time 352 i := 1 353 rootsDelivered := false 354 for rootsDelivered { 355 start := time.Now() 356 fetchCh = TestFetchCh(t, typ, opts, req) 357 select { 358 case result := <-fetchCh: 359 v := mustFetchResult(t, result) 360 timeTaken := time.Since(start) 361 362 // There are two options, either it blocked waiting for the delay after 363 // the rotation or it returned the new CA cert before the timeout was 364 // done. TO be more robust against timing, we take the value as the 365 // decider for which case it is, and assert timing matches our expected 366 // bounds rather than vice versa. 367 368 if v.Index > uint64(1) { 369 // Got a new cert 370 require.Equal(resp, v.Value) 371 require.Equal(uint64(3), v.Index) 372 // Should not have been delivered before the delay 373 require.True(time.Since(earliestRootDelivery) > typ.TestOverrideCAChangeInitialDelay) 374 // All good. We are done! 375 rootsDelivered = true 376 } else { 377 // Should be the cached cert 378 require.Equal(resp, v.Value) 379 require.Equal(uint64(1), v.Index) 380 // Sanity check we blocked for the whole timeout 381 require.Truef(timeTaken > opts.Timeout, 382 "should block for at least %s, returned after %s", 383 opts.Timeout, timeTaken) 384 // Sanity check that the forceExpireAfter state was set correctly 385 shouldExpireAfter = v.State.(*fetchState).forceExpireAfter 386 require.True(shouldExpireAfter.After(time.Now())) 387 require.True(shouldExpireAfter.Before(time.Now().Add(typ.TestOverrideCAChangeInitialDelay))) 388 } 389 // Set the LastResult for subsequent fetches 390 opts.LastResult = &v 391 case <-time.After(50 * time.Millisecond): 392 t.Fatalf("request %d blocked too long", i) 393 } 394 i++ 395 396 // Sanity check that we've not gone way beyond the deadline without a 397 // new cert. We give some leeway to make it less brittle. 398 require.Falsef( 399 time.Now().After(shouldExpireAfter.Add(100*time.Millisecond)), 400 "waited extra 100ms and delayed CA rotate renew didn't happen") 401 } 402 } 403 404 // Tests that if the root changes in between blocking calls we still pick it up. 405 func TestConnectCALeaf_changingRootsBetweenBlockingCalls(t *testing.T) { 406 t.Parallel() 407 408 require := require.New(t) 409 rpc := TestRPC(t) 410 defer rpc.AssertExpectations(t) 411 412 typ, rootsCh := testCALeafType(t, rpc) 413 defer close(rootsCh) 414 415 caRoot := connect.TestCA(t, nil) 416 caRoot.Active = true 417 rootsCh <- structs.IndexedCARoots{ 418 ActiveRootID: caRoot.ID, 419 TrustDomain: "fake-trust-domain.consul", 420 Roots: []*structs.CARoot{ 421 caRoot, 422 }, 423 QueryMeta: structs.QueryMeta{Index: 1}, 424 } 425 426 // Instrument ConnectCA.Sign to return signed cert 427 var resp *structs.IssuedCert 428 var idx uint64 429 rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil). 430 Run(func(args mock.Arguments) { 431 reply := args.Get(2).(*structs.IssuedCert) 432 leaf, _ := connect.TestLeaf(t, "web", caRoot) 433 reply.CertPEM = leaf 434 reply.ValidAfter = time.Now().Add(-1 * time.Hour) 435 reply.ValidBefore = time.Now().Add(11 * time.Hour) 436 reply.CreateIndex = atomic.AddUint64(&idx, 1) 437 reply.ModifyIndex = reply.CreateIndex 438 resp = reply 439 }) 440 441 // We'll reuse the fetch options and request. Short timeout important since we 442 // wait the full timeout before chaning roots. 443 opts := cache.FetchOptions{MinIndex: 0, Timeout: 35 * time.Millisecond} 444 req := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web"} 445 446 // First fetch should return immediately 447 fetchCh := TestFetchCh(t, typ, opts, req) 448 select { 449 case <-time.After(100 * time.Millisecond): 450 t.Fatal("shouldn't block waiting for fetch") 451 case result := <-fetchCh: 452 v := mustFetchResult(t, result) 453 require.Equal(resp, v.Value) 454 require.Equal(uint64(1), v.Index) 455 // Set the LastResult for subsequent fetches 456 opts.LastResult = &v 457 } 458 459 // Next fetch should block for the full timeout 460 start := time.Now() 461 fetchCh = TestFetchCh(t, typ, opts, req) 462 select { 463 case <-time.After(100 * time.Millisecond): 464 t.Fatal("shouldn't block for too long waiting for fetch") 465 case result := <-fetchCh: 466 v := mustFetchResult(t, result) 467 require.Equal(resp, v.Value) 468 // Still the initial cached result 469 require.Equal(uint64(1), v.Index) 470 // Sanity check that it waited 471 require.True(time.Since(start) > opts.Timeout) 472 // Set the LastResult for subsequent fetches 473 opts.LastResult = &v 474 } 475 476 // No active requests, simulate root change now 477 caRoot2 := connect.TestCA(t, nil) 478 caRoot2.Active = true 479 caRoot.Active = false 480 rootsCh <- structs.IndexedCARoots{ 481 ActiveRootID: caRoot2.ID, 482 TrustDomain: "fake-trust-domain.consul", 483 Roots: []*structs.CARoot{ 484 caRoot2, 485 caRoot, 486 }, 487 QueryMeta: structs.QueryMeta{Index: atomic.AddUint64(&idx, 1)}, 488 } 489 earliestRootDelivery := time.Now() 490 491 // We should get the new cert immediately on next fetch (since test override 492 // root change jitter to be 1 nanosecond so no delay expected). 493 fetchCh = TestFetchCh(t, typ, opts, req) 494 select { 495 case <-time.After(100 * time.Millisecond): 496 t.Fatal("shouldn't block too long waiting for fetch") 497 case result := <-fetchCh: 498 v := mustFetchResult(t, result) 499 require.Equal(resp, v.Value) 500 // Index should be 3 since root change consumed 2 501 require.Equal(uint64(3), v.Index) 502 // Sanity check that we didn't wait too long 503 require.True(time.Since(earliestRootDelivery) < opts.Timeout) 504 // Set the LastResult for subsequent fetches 505 opts.LastResult = &v 506 } 507 508 } 509 510 func TestConnectCALeaf_CSRRateLimiting(t *testing.T) { 511 t.Parallel() 512 513 require := require.New(t) 514 rpc := TestRPC(t) 515 defer rpc.AssertExpectations(t) 516 517 typ, rootsCh := testCALeafType(t, rpc) 518 defer close(rootsCh) 519 520 // Each jitter window will be only 100 ms long to make testing quick but 521 // highly likely not to fail based on scheduling issues. 522 typ.TestOverrideCAChangeInitialDelay = 100 * time.Millisecond 523 524 // Setup root that will be returned by the mocked Root cache fetch 525 caRoot := connect.TestCA(t, nil) 526 caRoot.Active = true 527 rootsCh <- structs.IndexedCARoots{ 528 ActiveRootID: caRoot.ID, 529 TrustDomain: "fake-trust-domain.consul", 530 Roots: []*structs.CARoot{ 531 caRoot, 532 }, 533 QueryMeta: structs.QueryMeta{Index: 1}, 534 } 535 536 // Instrument ConnectCA.Sign 537 var resp *structs.IssuedCert 538 var idx, rateLimitedRPCs uint64 539 540 genCert := func(args mock.Arguments) { 541 reply := args.Get(2).(*structs.IssuedCert) 542 leaf, _ := connect.TestLeaf(t, "web", caRoot) 543 reply.CertPEM = leaf 544 reply.ValidAfter = time.Now().Add(-1 * time.Hour) 545 reply.ValidBefore = time.Now().Add(11 * time.Hour) 546 reply.CreateIndex = atomic.AddUint64(&idx, 1) 547 reply.ModifyIndex = reply.CreateIndex 548 resp = reply 549 } 550 551 incRateLimit := func(args mock.Arguments) { 552 atomic.AddUint64(&rateLimitedRPCs, 1) 553 } 554 555 // First call return rate limit error. This is important as it checks 556 // behavior when cache is empty and we have to return a nil Value but need to 557 // save state to do the right thing for retry. 558 rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything). 559 Return(consul.ErrRateLimited).Once().Run(incRateLimit) 560 // Then succeed on second call 561 rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything). 562 Return(nil).Run(genCert).Once() 563 // Then be rate limited again on several further calls 564 rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything). 565 Return(consul.ErrRateLimited).Twice().Run(incRateLimit) 566 // Then fine after that 567 rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything). 568 Return(nil).Run(genCert) 569 570 opts := cache.FetchOptions{MinIndex: 0, Timeout: 10 * time.Minute} 571 req := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web"} 572 573 // First fetch should return rate limit error directly - client is expected to 574 // backoff itself. 575 fetchCh := TestFetchCh(t, typ, opts, req) 576 select { 577 case <-time.After(200 * time.Millisecond): 578 t.Fatal("shouldn't block longer than one jitter window for success") 579 case result := <-fetchCh: 580 switch v := result.(type) { 581 case error: 582 require.Error(v) 583 require.Equal(consul.ErrRateLimited.Error(), v.Error()) 584 case cache.FetchResult: 585 t.Fatalf("Expected error") 586 } 587 } 588 589 // Second call should return correct cert immediately. 590 fetchCh = TestFetchCh(t, typ, opts, req) 591 select { 592 case <-time.After(100 * time.Millisecond): 593 t.Fatal("shouldn't block waiting for fetch") 594 case result := <-fetchCh: 595 v := mustFetchResult(t, result) 596 require.Equal(resp, v.Value) 597 require.Equal(uint64(1), v.Index) 598 // Set the LastResult for subsequent fetches 599 opts.LastResult = &v 600 // Set MinIndex 601 opts.MinIndex = 1 602 } 603 604 // Send in new roots, which should trigger the next sign req. We need to take 605 // care to set the new root as active 606 caRoot2 := connect.TestCA(t, nil) 607 caRoot2.Active = true 608 caRoot.Active = false 609 rootsCh <- structs.IndexedCARoots{ 610 ActiveRootID: caRoot2.ID, 611 TrustDomain: "fake-trust-domain.consul", 612 Roots: []*structs.CARoot{ 613 caRoot2, 614 caRoot, 615 }, 616 QueryMeta: structs.QueryMeta{Index: atomic.AddUint64(&idx, 1)}, 617 } 618 earliestRootDelivery := time.Now() 619 620 // Sanity check state 621 require.Equal(uint64(1), atomic.LoadUint64(&rateLimitedRPCs)) 622 623 // After root rotation jitter has been waited out, a new CSR will 624 // be attempted but will fail and return the previous cached result with no 625 // error since we will try again soon. 626 fetchCh = TestFetchCh(t, typ, opts, req) 627 select { 628 case <-time.After(200 * time.Millisecond): 629 t.Fatal("shouldn't block too long waiting for fetch") 630 case result := <-fetchCh: 631 // We should block for _at least_ one jitter period since we set that to 632 // 100ms and in test override mode we always pick the max jitter not a 633 // random amount. 634 require.True(time.Since(earliestRootDelivery) > 100*time.Millisecond) 635 require.Equal(uint64(2), atomic.LoadUint64(&rateLimitedRPCs)) 636 637 v := mustFetchResult(t, result) 638 require.Equal(resp, v.Value) 639 // 1 since this should still be the original cached result as we failed to 640 // get a new cert. 641 require.Equal(uint64(1), v.Index) 642 // Set the LastResult for subsequent fetches 643 opts.LastResult = &v 644 } 645 646 // Root rotation state is now only captured in the opts.LastResult.State so a 647 // subsequent call should also wait for 100ms and then attempt to generate a 648 // new cert since we failed last time. 649 fetchCh = TestFetchCh(t, typ, opts, req) 650 select { 651 case <-time.After(200 * time.Millisecond): 652 t.Fatal("shouldn't block too long waiting for fetch") 653 case result := <-fetchCh: 654 // We should block for _at least_ two jitter periods now. 655 require.True(time.Since(earliestRootDelivery) > 200*time.Millisecond) 656 require.Equal(uint64(3), atomic.LoadUint64(&rateLimitedRPCs)) 657 658 v := mustFetchResult(t, result) 659 require.Equal(resp, v.Value) 660 // 1 since this should still be the original cached result as we failed to 661 // get a new cert. 662 require.Equal(uint64(1), v.Index) 663 // Set the LastResult for subsequent fetches 664 opts.LastResult = &v 665 } 666 667 // Now we've had two rate limit failures and seen root rotation state work 668 // across both the blocking request that observed the rotation and the 669 // subsequent one. The next request should wait out the rest of the backoff 670 // and then actually fetch a new cert at last! 671 fetchCh = TestFetchCh(t, typ, opts, req) 672 select { 673 case <-time.After(200 * time.Millisecond): 674 t.Fatal("shouldn't block too long waiting for fetch") 675 case result := <-fetchCh: 676 // We should block for _at least_ three jitter periods now. 677 require.True(time.Since(earliestRootDelivery) > 300*time.Millisecond) 678 require.Equal(uint64(3), atomic.LoadUint64(&rateLimitedRPCs)) 679 680 v := mustFetchResult(t, result) 681 require.Equal(resp, v.Value) 682 // 3 since the rootCA change used 2 683 require.Equal(uint64(3), v.Index) 684 // Set the LastResult for subsequent fetches 685 opts.LastResult = &v 686 } 687 } 688 689 // This test runs multiple concurrent callers watching different leaf certs and 690 // tries to ensure that the background root watch activity behaves correctly. 691 func TestConnectCALeaf_watchRootsDedupingMultipleCallers(t *testing.T) { 692 t.Parallel() 693 694 require := require.New(t) 695 rpc := TestRPC(t) 696 defer rpc.AssertExpectations(t) 697 698 typ, rootsCh := testCALeafType(t, rpc) 699 defer close(rootsCh) 700 701 caRoot := connect.TestCA(t, nil) 702 caRoot.Active = true 703 rootsCh <- structs.IndexedCARoots{ 704 ActiveRootID: caRoot.ID, 705 TrustDomain: "fake-trust-domain.consul", 706 Roots: []*structs.CARoot{ 707 caRoot, 708 }, 709 QueryMeta: structs.QueryMeta{Index: 1}, 710 } 711 712 // Instrument ConnectCA.Sign to return signed cert 713 var idx uint64 714 rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil). 715 Run(func(args mock.Arguments) { 716 reply := args.Get(2).(*structs.IssuedCert) 717 // Note we will sign certs for same service name each time because 718 // otherwise we have to re-invent whole CSR endpoint here to be able to 719 // control things - parse PEM sign with right key etc. It doesn't matter - 720 // we use the CreateIndex to differentiate the "right" results. 721 leaf, _ := connect.TestLeaf(t, "web", caRoot) 722 reply.CertPEM = leaf 723 reply.ValidAfter = time.Now().Add(-1 * time.Hour) 724 reply.ValidBefore = time.Now().Add(11 * time.Hour) 725 reply.CreateIndex = atomic.AddUint64(&idx, 1) 726 reply.ModifyIndex = reply.CreateIndex 727 }) 728 729 // n is the number of clients we'll run 730 n := 3 731 732 // setup/testDoneCh are used for coordinating clients such that each has 733 // initial cert delivered and is blocking before the root changes. It's not a 734 // wait group since we want to be able to timeout the main test goroutine if 735 // one of the clients gets stuck. Instead it's a buffered chan. 736 setupDoneCh := make(chan struct{}, n) 737 testDoneCh := make(chan struct{}, n) 738 // rootsUpdate is used to coordinate clients so they know when they should 739 // expect to see leaf renewed after root change. 740 rootsUpdatedCh := make(chan struct{}) 741 742 // Create a function that models a single client. It should go through the 743 // steps of getting an initial cert and then watching for changes until root 744 // updates. 745 client := func(i int) { 746 // We'll reuse the fetch options and request 747 opts := cache.FetchOptions{MinIndex: 0, Timeout: 10 * time.Second} 748 req := &ConnectCALeafRequest{Datacenter: "dc1", Service: fmt.Sprintf("web-%d", i)} 749 750 // First fetch should return immediately 751 fetchCh := TestFetchCh(t, typ, opts, req) 752 select { 753 case <-time.After(100 * time.Millisecond): 754 t.Fatal("shouldn't block waiting for fetch") 755 case result := <-fetchCh: 756 v := mustFetchResult(t, result) 757 opts.LastResult = &v 758 } 759 760 // Second fetch should block with set index 761 opts.MinIndex = 1 762 fetchCh = TestFetchCh(t, typ, opts, req) 763 select { 764 case result := <-fetchCh: 765 t.Fatalf("should not return: %#v", result) 766 case <-time.After(100 * time.Millisecond): 767 } 768 769 // We're done with setup and the blocking call is still blocking in 770 // background. 771 setupDoneCh <- struct{}{} 772 773 // Wait until all others are also done and roots change incase there are 774 // stragglers delaying the root update. 775 select { 776 case <-rootsUpdatedCh: 777 case <-time.After(200 * time.Millisecond): 778 t.Fatalf("waited too long for root update") 779 } 780 781 // Now we should see root update within a short period 782 select { 783 case <-time.After(100 * time.Millisecond): 784 t.Fatal("shouldn't block waiting for fetch") 785 case result := <-fetchCh: 786 v := mustFetchResult(t, result) 787 // Index must be different 788 require.NotEqual(opts.MinIndex, v.Value.(*structs.IssuedCert).CreateIndex) 789 } 790 791 testDoneCh <- struct{}{} 792 } 793 794 // Sanity check the roots watcher is not running yet 795 assertRootsWatchCounts(t, typ, 0, 0) 796 797 for i := 0; i < n; i++ { 798 go client(i) 799 } 800 801 timeoutCh := time.After(200 * time.Millisecond) 802 803 for i := 0; i < n; i++ { 804 select { 805 case <-timeoutCh: 806 t.Fatal("timed out waiting for clients") 807 case <-setupDoneCh: 808 } 809 } 810 811 // Should be 3 clients running now, so the roots watcher should have started 812 // once and not stopped. 813 assertRootsWatchCounts(t, typ, 1, 0) 814 815 // Now we deliver the root update 816 caRoot2 := connect.TestCA(t, nil) 817 caRoot2.Active = true 818 caRoot.Active = false 819 rootsCh <- structs.IndexedCARoots{ 820 ActiveRootID: caRoot2.ID, 821 TrustDomain: "fake-trust-domain.consul", 822 Roots: []*structs.CARoot{ 823 caRoot2, 824 caRoot, 825 }, 826 QueryMeta: structs.QueryMeta{Index: atomic.AddUint64(&idx, 1)}, 827 } 828 // And notify clients 829 close(rootsUpdatedCh) 830 831 timeoutCh = time.After(200 * time.Millisecond) 832 for i := 0; i < n; i++ { 833 select { 834 case <-timeoutCh: 835 t.Fatalf("timed out waiting for %d of %d clients to renew after root change", n-i, n) 836 case <-testDoneCh: 837 } 838 } 839 840 // All active requests have returned the new cert so the rootsWatcher should 841 // have stopped. This is timing dependent though so retry a few times 842 retry.RunWith(retry.ThreeTimes(), t, func(r *retry.R) { 843 assertRootsWatchCounts(r, typ, 1, 1) 844 }) 845 } 846 847 func assertRootsWatchCounts(t require.TestingT, typ *ConnectCALeaf, wantStarts, wantStops int) { 848 if tt, ok := t.(*testing.T); ok { 849 tt.Helper() 850 } 851 starts := atomic.LoadUint32(&typ.testRootWatchStartCount) 852 stops := atomic.LoadUint32(&typ.testRootWatchStopCount) 853 require.Equal(t, wantStarts, int(starts)) 854 require.Equal(t, wantStops, int(stops)) 855 } 856 857 func mustFetchResult(t *testing.T, result interface{}) cache.FetchResult { 858 t.Helper() 859 switch v := result.(type) { 860 case error: 861 require.NoError(t, v) 862 case cache.FetchResult: 863 return v 864 default: 865 t.Fatalf("unexpected type from fetch %T", v) 866 } 867 return cache.FetchResult{} 868 } 869 870 // Test that after an initial signing, an expiringLeaf will trigger a 871 // blocking query to resign. 872 func TestConnectCALeaf_expiringLeaf(t *testing.T) { 873 t.Parallel() 874 875 require := require.New(t) 876 rpc := TestRPC(t) 877 defer rpc.AssertExpectations(t) 878 879 typ, rootsCh := testCALeafType(t, rpc) 880 defer close(rootsCh) 881 882 caRoot := connect.TestCA(t, nil) 883 caRoot.Active = true 884 rootsCh <- structs.IndexedCARoots{ 885 ActiveRootID: caRoot.ID, 886 TrustDomain: "fake-trust-domain.consul", 887 Roots: []*structs.CARoot{ 888 caRoot, 889 }, 890 QueryMeta: structs.QueryMeta{Index: 1}, 891 } 892 893 // Instrument ConnectCA.Sign to 894 var resp *structs.IssuedCert 895 var idx uint64 896 rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil). 897 Run(func(args mock.Arguments) { 898 reply := args.Get(2).(*structs.IssuedCert) 899 reply.CreateIndex = atomic.AddUint64(&idx, 1) 900 reply.ModifyIndex = reply.CreateIndex 901 902 leaf, _ := connect.TestLeaf(t, "web", caRoot) 903 reply.CertPEM = leaf 904 905 if reply.CreateIndex == 1 { 906 // First call returns expired cert to prime cache with an expired one. 907 reply.ValidAfter = time.Now().Add(-13 * time.Hour) 908 reply.ValidBefore = time.Now().Add(-1 * time.Hour) 909 } else { 910 reply.ValidAfter = time.Now().Add(-1 * time.Hour) 911 reply.ValidBefore = time.Now().Add(11 * time.Hour) 912 } 913 914 resp = reply 915 }) 916 917 // We'll reuse the fetch options and request 918 opts := cache.FetchOptions{MinIndex: 0, Timeout: 10 * time.Second} 919 req := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web"} 920 921 // First fetch should return immediately 922 fetchCh := TestFetchCh(t, typ, opts, req) 923 select { 924 case <-time.After(100 * time.Millisecond): 925 t.Fatal("shouldn't block waiting for fetch") 926 case result := <-fetchCh: 927 switch v := result.(type) { 928 case error: 929 require.NoError(v) 930 case cache.FetchResult: 931 require.Equal(resp, v.Value) 932 require.Equal(uint64(1), v.Index) 933 // Set the LastResult for subsequent fetches 934 opts.LastResult = &v 935 } 936 } 937 938 // Second fetch should return immediately despite there being 939 // no updated CA roots, because we issued an expired cert. 940 fetchCh = TestFetchCh(t, typ, opts, req) 941 select { 942 case <-time.After(100 * time.Millisecond): 943 t.Fatal("shouldn't block waiting for fetch") 944 case result := <-fetchCh: 945 switch v := result.(type) { 946 case error: 947 require.NoError(v) 948 case cache.FetchResult: 949 require.Equal(resp, v.Value) 950 require.Equal(uint64(2), v.Index) 951 // Set the LastResult for subsequent fetches 952 opts.LastResult = &v 953 } 954 } 955 956 // Third fetch should block since the cert is not expiring and 957 // we also didn't update CA certs. 958 opts.MinIndex = 2 959 fetchCh = TestFetchCh(t, typ, opts, req) 960 select { 961 case result := <-fetchCh: 962 t.Fatalf("should not return: %#v", result) 963 case <-time.After(100 * time.Millisecond): 964 } 965 } 966 967 // testCALeafType returns a *ConnectCALeaf that is pre-configured to 968 // use the given RPC implementation for "ConnectCA.Sign" operations. 969 func testCALeafType(t *testing.T, rpc RPC) (*ConnectCALeaf, chan structs.IndexedCARoots) { 970 // This creates an RPC implementation that will block until the 971 // value is sent on the channel. This lets us control when the 972 // next values show up. 973 rootsCh := make(chan structs.IndexedCARoots, 10) 974 rootsRPC := &testGatedRootsRPC{ValueCh: rootsCh} 975 976 // Create a cache 977 c := cache.TestCache(t) 978 c.RegisterType(ConnectCARootName, &ConnectCARoot{RPC: rootsRPC}, &cache.RegisterOptions{ 979 // Disable refresh so that the gated channel controls the 980 // request directly. Otherwise, we get background refreshes and 981 // it screws up the ordering of the channel reads of the 982 // testGatedRootsRPC implementation. 983 Refresh: false, 984 }) 985 986 // Create the leaf type 987 return &ConnectCALeaf{ 988 RPC: rpc, 989 Cache: c, 990 Datacenter: "dc1", 991 // Override the root-change spread so we don't have to wait up to 20 seconds 992 // to see root changes work. Can be changed back for specific tests that 993 // need to test this, Note it's not 0 since that used default but is 994 // effectively the same. 995 TestOverrideCAChangeInitialDelay: 1 * time.Microsecond, 996 }, rootsCh 997 } 998 999 // testGatedRootsRPC will send each subsequent value on the channel as the 1000 // RPC response, blocking if it is waiting for a value on the channel. This 1001 // can be used to control when background fetches are returned and what they 1002 // return. 1003 // 1004 // This should be used with Refresh = false for the registration options so 1005 // automatic refreshes don't mess up the channel read ordering. 1006 type testGatedRootsRPC struct { 1007 ValueCh chan structs.IndexedCARoots 1008 } 1009 1010 func (r *testGatedRootsRPC) RPC(method string, args interface{}, reply interface{}) error { 1011 if method != "ConnectCA.Roots" { 1012 return fmt.Errorf("invalid RPC method: %s", method) 1013 } 1014 1015 replyReal := reply.(*structs.IndexedCARoots) 1016 *replyReal = <-r.ValueCh 1017 return nil 1018 }