github.com/DerekStrickland/consul@v1.4.5/agent/cache-types/connect_ca_leaf_test.go (about)

     1  package cachetype
     2  
     3  import (
     4  	"fmt"
     5  	"sync/atomic"
     6  	"testing"
     7  	"time"
     8  
     9  	"github.com/hashicorp/consul/testutil/retry"
    10  
    11  	"github.com/hashicorp/consul/agent/cache"
    12  	"github.com/hashicorp/consul/agent/connect"
    13  	"github.com/hashicorp/consul/agent/consul"
    14  	"github.com/hashicorp/consul/agent/structs"
    15  	"github.com/stretchr/testify/mock"
    16  	"github.com/stretchr/testify/require"
    17  )
    18  
    19  func TestCalculateSoftExpire(t *testing.T) {
    20  	tests := []struct {
    21  		name     string
    22  		now      string
    23  		issued   string
    24  		lifetime time.Duration
    25  		wantMin  string
    26  		wantMax  string
    27  	}{
    28  		{
    29  			name:     "72h just issued",
    30  			now:      "2018-01-01 00:00:01",
    31  			issued:   "2018-01-01 00:00:00",
    32  			lifetime: 72 * time.Hour,
    33  			// Should jitter between 60% and 90% of the lifetime which is 43.2/64.8
    34  			// hours after issued
    35  			wantMin: "2018-01-02 19:12:00",
    36  			wantMax: "2018-01-03 16:48:00",
    37  		},
    38  		{
    39  			name: "72h in renew range",
    40  			// This time should be inside the renewal range.
    41  			now:      "2018-01-02 20:00:20",
    42  			issued:   "2018-01-01 00:00:00",
    43  			lifetime: 72 * time.Hour,
    44  			// Min should be the "now" time
    45  			wantMin: "2018-01-02 20:00:20",
    46  			wantMax: "2018-01-03 16:48:00",
    47  		},
    48  		{
    49  			name: "72h in hard renew",
    50  			// This time should be inside the renewal range.
    51  			now:      "2018-01-03 18:00:00",
    52  			issued:   "2018-01-01 00:00:00",
    53  			lifetime: 72 * time.Hour,
    54  			// Min and max should both be the "now" time
    55  			wantMin: "2018-01-03 18:00:00",
    56  			wantMax: "2018-01-03 18:00:00",
    57  		},
    58  		{
    59  			name: "72h expired",
    60  			// This time is after expiry
    61  			now:      "2018-01-05 00:00:00",
    62  			issued:   "2018-01-01 00:00:00",
    63  			lifetime: 72 * time.Hour,
    64  			// Min and max should both be the "now" time
    65  			wantMin: "2018-01-05 00:00:00",
    66  			wantMax: "2018-01-05 00:00:00",
    67  		},
    68  		{
    69  			name:     "1h just issued",
    70  			now:      "2018-01-01 00:00:01",
    71  			issued:   "2018-01-01 00:00:00",
    72  			lifetime: 1 * time.Hour,
    73  			// Should jitter between 60% and 90% of the lifetime which is 36/54 mins
    74  			// hours after issued
    75  			wantMin: "2018-01-01 00:36:00",
    76  			wantMax: "2018-01-01 00:54:00",
    77  		},
    78  		{
    79  			name: "1h in renew range",
    80  			// This time should be inside the renewal range.
    81  			now:      "2018-01-01 00:40:00",
    82  			issued:   "2018-01-01 00:00:00",
    83  			lifetime: 1 * time.Hour,
    84  			// Min should be the "now" time
    85  			wantMin: "2018-01-01 00:40:00",
    86  			wantMax: "2018-01-01 00:54:00",
    87  		},
    88  		{
    89  			name: "1h in hard renew",
    90  			// This time should be inside the renewal range.
    91  			now:      "2018-01-01 00:55:00",
    92  			issued:   "2018-01-01 00:00:00",
    93  			lifetime: 1 * time.Hour,
    94  			// Min and max should both be the "now" time
    95  			wantMin: "2018-01-01 00:55:00",
    96  			wantMax: "2018-01-01 00:55:00",
    97  		},
    98  		{
    99  			name: "1h expired",
   100  			// This time is after expiry
   101  			now:      "2018-01-01 01:01:01",
   102  			issued:   "2018-01-01 00:00:00",
   103  			lifetime: 1 * time.Hour,
   104  			// Min and max should both be the "now" time
   105  			wantMin: "2018-01-01 01:01:01",
   106  			wantMax: "2018-01-01 01:01:01",
   107  		},
   108  		{
   109  			name: "too short lifetime",
   110  			// This time is after expiry
   111  			now:      "2018-01-01 01:01:01",
   112  			issued:   "2018-01-01 00:00:00",
   113  			lifetime: 1 * time.Minute,
   114  			// Min and max should both be the "now" time
   115  			wantMin: "2018-01-01 01:01:01",
   116  			wantMax: "2018-01-01 01:01:01",
   117  		},
   118  	}
   119  
   120  	for _, tc := range tests {
   121  		t.Run(tc.name, func(t *testing.T) {
   122  			require := require.New(t)
   123  			now, err := time.Parse("2006-01-02 15:04:05", tc.now)
   124  			require.NoError(err)
   125  			issued, err := time.Parse("2006-01-02 15:04:05", tc.issued)
   126  			require.NoError(err)
   127  			wantMin, err := time.Parse("2006-01-02 15:04:05", tc.wantMin)
   128  			require.NoError(err)
   129  			wantMax, err := time.Parse("2006-01-02 15:04:05", tc.wantMax)
   130  			require.NoError(err)
   131  
   132  			min, max := calculateSoftExpiry(now, &structs.IssuedCert{
   133  				ValidAfter:  issued,
   134  				ValidBefore: issued.Add(tc.lifetime),
   135  			})
   136  
   137  			require.Equal(wantMin, min)
   138  			require.Equal(wantMax, max)
   139  		})
   140  	}
   141  }
   142  
   143  // Test that after an initial signing, new CA roots (new ID) will
   144  // trigger a blocking query to execute.
   145  func TestConnectCALeaf_changingRoots(t *testing.T) {
   146  	t.Parallel()
   147  
   148  	require := require.New(t)
   149  	rpc := TestRPC(t)
   150  	defer rpc.AssertExpectations(t)
   151  
   152  	typ, rootsCh := testCALeafType(t, rpc)
   153  	defer close(rootsCh)
   154  
   155  	caRoot := connect.TestCA(t, nil)
   156  	caRoot.Active = true
   157  	rootsCh <- structs.IndexedCARoots{
   158  		ActiveRootID: caRoot.ID,
   159  		TrustDomain:  "fake-trust-domain.consul",
   160  		Roots: []*structs.CARoot{
   161  			caRoot,
   162  		},
   163  		QueryMeta: structs.QueryMeta{Index: 1},
   164  	}
   165  
   166  	// We need this later but needs to be defined so we sign second CSR with it
   167  	// otherwise we break the cert root checking.
   168  	caRoot2 := connect.TestCA(t, nil)
   169  
   170  	// Instrument ConnectCA.Sign to return signed cert
   171  	var resp *structs.IssuedCert
   172  	var idx uint64
   173  
   174  	rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil).
   175  		Run(func(args mock.Arguments) {
   176  			ca := caRoot
   177  			cIdx := atomic.AddUint64(&idx, 1)
   178  			if cIdx > 1 {
   179  				// Second time round use the new CA
   180  				ca = caRoot2
   181  			}
   182  			reply := args.Get(2).(*structs.IssuedCert)
   183  			leaf, _ := connect.TestLeaf(t, "web", ca)
   184  			reply.CertPEM = leaf
   185  			reply.ValidAfter = time.Now().Add(-1 * time.Hour)
   186  			reply.ValidBefore = time.Now().Add(11 * time.Hour)
   187  			reply.CreateIndex = cIdx
   188  			reply.ModifyIndex = reply.CreateIndex
   189  			resp = reply
   190  		})
   191  
   192  	// We'll reuse the fetch options and request
   193  	opts := cache.FetchOptions{MinIndex: 0, Timeout: 10 * time.Second}
   194  	req := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web"}
   195  
   196  	// First fetch should return immediately
   197  	fetchCh := TestFetchCh(t, typ, opts, req)
   198  	select {
   199  	case <-time.After(100 * time.Millisecond):
   200  		t.Fatal("shouldn't block waiting for fetch")
   201  	case result := <-fetchCh:
   202  		v := mustFetchResult(t, result)
   203  		require.Equal(resp, v.Value)
   204  		require.Equal(uint64(1), v.Index)
   205  		// Set the LastResult for subsequent fetches
   206  		opts.LastResult = &v
   207  	}
   208  
   209  	// Second fetch should block with set index
   210  	opts.MinIndex = 1
   211  	fetchCh = TestFetchCh(t, typ, opts, req)
   212  	select {
   213  	case result := <-fetchCh:
   214  		t.Fatalf("should not return: %#v", result)
   215  	case <-time.After(100 * time.Millisecond):
   216  	}
   217  
   218  	// Let's send in new roots, which should trigger the sign req. We need to take
   219  	// care to set the new root as active
   220  	caRoot2.Active = true
   221  	caRoot.Active = false
   222  	rootsCh <- structs.IndexedCARoots{
   223  		ActiveRootID: caRoot2.ID,
   224  		TrustDomain:  "fake-trust-domain.consul",
   225  		Roots: []*structs.CARoot{
   226  			caRoot2,
   227  			caRoot,
   228  		},
   229  		QueryMeta: structs.QueryMeta{Index: atomic.AddUint64(&idx, 1)},
   230  	}
   231  	select {
   232  	case <-time.After(100 * time.Millisecond):
   233  		t.Fatal("shouldn't block waiting for fetch")
   234  	case result := <-fetchCh:
   235  		v := mustFetchResult(t, result)
   236  		require.Equal(resp, v.Value)
   237  		// 3 since the second CA "update" used up 2
   238  		require.Equal(uint64(3), v.Index)
   239  		// Set the LastResult for subsequent fetches
   240  		opts.LastResult = &v
   241  		opts.MinIndex = 3
   242  	}
   243  
   244  	// Third fetch should block
   245  	fetchCh = TestFetchCh(t, typ, opts, req)
   246  	select {
   247  	case result := <-fetchCh:
   248  		t.Fatalf("should not return: %#v", result)
   249  	case <-time.After(100 * time.Millisecond):
   250  	}
   251  }
   252  
   253  // Tests that if the root change jitter is longer than the time left on the
   254  // timeout, we return normally but then still renew the cert on a subsequent
   255  // call.
   256  func TestConnectCALeaf_changingRootsJitterBetweenCalls(t *testing.T) {
   257  	t.Parallel()
   258  
   259  	require := require.New(t)
   260  	rpc := TestRPC(t)
   261  	defer rpc.AssertExpectations(t)
   262  
   263  	typ, rootsCh := testCALeafType(t, rpc)
   264  	defer close(rootsCh)
   265  
   266  	// Override the root-change delay so we will timeout first. We can't set it to
   267  	// a crazy high value otherwise we'll have to wait that long in the test to
   268  	// see if it actually happens on subsequent calls. We instead reduce the
   269  	// timeout in FetchOptions to be much shorter than this.
   270  	typ.TestOverrideCAChangeInitialDelay = 100 * time.Millisecond
   271  
   272  	caRoot := connect.TestCA(t, nil)
   273  	caRoot.Active = true
   274  	rootsCh <- structs.IndexedCARoots{
   275  		ActiveRootID: caRoot.ID,
   276  		TrustDomain:  "fake-trust-domain.consul",
   277  		Roots: []*structs.CARoot{
   278  			caRoot,
   279  		},
   280  		QueryMeta: structs.QueryMeta{Index: 1},
   281  	}
   282  
   283  	// Instrument ConnectCA.Sign to return signed cert
   284  	var resp *structs.IssuedCert
   285  	var idx uint64
   286  	rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil).
   287  		Run(func(args mock.Arguments) {
   288  			reply := args.Get(2).(*structs.IssuedCert)
   289  			leaf, _ := connect.TestLeaf(t, "web", caRoot)
   290  			reply.CertPEM = leaf
   291  			reply.ValidAfter = time.Now().Add(-1 * time.Hour)
   292  			reply.ValidBefore = time.Now().Add(11 * time.Hour)
   293  			reply.CreateIndex = atomic.AddUint64(&idx, 1)
   294  			reply.ModifyIndex = reply.CreateIndex
   295  			resp = reply
   296  		})
   297  
   298  	// We'll reuse the fetch options and request. Timeout must be much shorter
   299  	// than the initial root delay. 20ms means that if we deliver the root change
   300  	// during the first blocking call, we should need to block fully for 5 more
   301  	// calls before the cert is renewed. We pick a timeout that is not an exact
   302  	// multiple of the 100ms delay above to reduce the chance that timing works
   303  	// out in a way that makes it hard to tell a timeout from an early return due
   304  	// to a cert renewal.
   305  	opts := cache.FetchOptions{MinIndex: 0, Timeout: 35 * time.Millisecond}
   306  	req := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web"}
   307  
   308  	// First fetch should return immediately
   309  	fetchCh := TestFetchCh(t, typ, opts, req)
   310  	select {
   311  	case <-time.After(100 * time.Millisecond):
   312  		t.Fatal("shouldn't block waiting for fetch")
   313  	case result := <-fetchCh:
   314  		v := mustFetchResult(t, result)
   315  		require.Equal(resp, v.Value)
   316  		require.Equal(uint64(1), v.Index)
   317  		// Set the LastResult for subsequent fetches
   318  		opts.LastResult = &v
   319  	}
   320  
   321  	// Let's send in new roots, which should eventually trigger the sign req. We
   322  	// need to take care to set the new root as active. Note that this is
   323  	// implicitly testing that root updates that happen in between leaf blocking
   324  	// queries are still noticed too. At this point no leaf blocking query is
   325  	// running so the root watch should be stopped. By pushing this update, the
   326  	// next blocking query will _immediately_ see the new root which means it
   327  	// needs to correctly notice that it is not the same one that generated the
   328  	// current cert and start the rotation. This is good, just not obvious that
   329  	// the behavior is actually well tested here when it is.
   330  	caRoot2 := connect.TestCA(t, nil)
   331  	caRoot2.Active = true
   332  	caRoot.Active = false
   333  	rootsCh <- structs.IndexedCARoots{
   334  		ActiveRootID: caRoot2.ID,
   335  		TrustDomain:  "fake-trust-domain.consul",
   336  		Roots: []*structs.CARoot{
   337  			caRoot2,
   338  			caRoot,
   339  		},
   340  		QueryMeta: structs.QueryMeta{Index: atomic.AddUint64(&idx, 1)},
   341  	}
   342  	earliestRootDelivery := time.Now()
   343  
   344  	// Some number of fetches (2,3,4 likely) should timeout after 20ms and after
   345  	// 100ms has elapsed total we should see the new cert. Since this is all very
   346  	// timing dependent, we don't hard code exact numbers here and instead loop
   347  	// for plenty of time and do as many calls as it takes and just assert on the
   348  	// time taken and that the call either blocks and returns the cached cert, or
   349  	// returns the new one.
   350  	opts.MinIndex = 1
   351  	var shouldExpireAfter time.Time
   352  	i := 1
   353  	rootsDelivered := false
   354  	for rootsDelivered {
   355  		start := time.Now()
   356  		fetchCh = TestFetchCh(t, typ, opts, req)
   357  		select {
   358  		case result := <-fetchCh:
   359  			v := mustFetchResult(t, result)
   360  			timeTaken := time.Since(start)
   361  
   362  			// There are two options, either it blocked waiting for the delay after
   363  			// the rotation or it returned the new CA cert before the timeout was
   364  			// done. TO be more robust against timing, we take the value as the
   365  			// decider for which case it is, and assert timing matches our expected
   366  			// bounds rather than vice versa.
   367  
   368  			if v.Index > uint64(1) {
   369  				// Got a new cert
   370  				require.Equal(resp, v.Value)
   371  				require.Equal(uint64(3), v.Index)
   372  				// Should not have been delivered before the delay
   373  				require.True(time.Since(earliestRootDelivery) > typ.TestOverrideCAChangeInitialDelay)
   374  				// All good. We are done!
   375  				rootsDelivered = true
   376  			} else {
   377  				// Should be the cached cert
   378  				require.Equal(resp, v.Value)
   379  				require.Equal(uint64(1), v.Index)
   380  				// Sanity check we blocked for the whole timeout
   381  				require.Truef(timeTaken > opts.Timeout,
   382  					"should block for at least %s, returned after %s",
   383  					opts.Timeout, timeTaken)
   384  				// Sanity check that the forceExpireAfter state was set correctly
   385  				shouldExpireAfter = v.State.(*fetchState).forceExpireAfter
   386  				require.True(shouldExpireAfter.After(time.Now()))
   387  				require.True(shouldExpireAfter.Before(time.Now().Add(typ.TestOverrideCAChangeInitialDelay)))
   388  			}
   389  			// Set the LastResult for subsequent fetches
   390  			opts.LastResult = &v
   391  		case <-time.After(50 * time.Millisecond):
   392  			t.Fatalf("request %d blocked too long", i)
   393  		}
   394  		i++
   395  
   396  		// Sanity check that we've not gone way beyond the deadline without a
   397  		// new cert. We give some leeway to make it less brittle.
   398  		require.Falsef(
   399  			time.Now().After(shouldExpireAfter.Add(100*time.Millisecond)),
   400  			"waited extra 100ms and delayed CA rotate renew didn't happen")
   401  	}
   402  }
   403  
   404  // Tests that if the root changes in between blocking calls we still pick it up.
   405  func TestConnectCALeaf_changingRootsBetweenBlockingCalls(t *testing.T) {
   406  	t.Parallel()
   407  
   408  	require := require.New(t)
   409  	rpc := TestRPC(t)
   410  	defer rpc.AssertExpectations(t)
   411  
   412  	typ, rootsCh := testCALeafType(t, rpc)
   413  	defer close(rootsCh)
   414  
   415  	caRoot := connect.TestCA(t, nil)
   416  	caRoot.Active = true
   417  	rootsCh <- structs.IndexedCARoots{
   418  		ActiveRootID: caRoot.ID,
   419  		TrustDomain:  "fake-trust-domain.consul",
   420  		Roots: []*structs.CARoot{
   421  			caRoot,
   422  		},
   423  		QueryMeta: structs.QueryMeta{Index: 1},
   424  	}
   425  
   426  	// Instrument ConnectCA.Sign to return signed cert
   427  	var resp *structs.IssuedCert
   428  	var idx uint64
   429  	rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil).
   430  		Run(func(args mock.Arguments) {
   431  			reply := args.Get(2).(*structs.IssuedCert)
   432  			leaf, _ := connect.TestLeaf(t, "web", caRoot)
   433  			reply.CertPEM = leaf
   434  			reply.ValidAfter = time.Now().Add(-1 * time.Hour)
   435  			reply.ValidBefore = time.Now().Add(11 * time.Hour)
   436  			reply.CreateIndex = atomic.AddUint64(&idx, 1)
   437  			reply.ModifyIndex = reply.CreateIndex
   438  			resp = reply
   439  		})
   440  
   441  	// We'll reuse the fetch options and request. Short timeout important since we
   442  	// wait the full timeout before chaning roots.
   443  	opts := cache.FetchOptions{MinIndex: 0, Timeout: 35 * time.Millisecond}
   444  	req := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web"}
   445  
   446  	// First fetch should return immediately
   447  	fetchCh := TestFetchCh(t, typ, opts, req)
   448  	select {
   449  	case <-time.After(100 * time.Millisecond):
   450  		t.Fatal("shouldn't block waiting for fetch")
   451  	case result := <-fetchCh:
   452  		v := mustFetchResult(t, result)
   453  		require.Equal(resp, v.Value)
   454  		require.Equal(uint64(1), v.Index)
   455  		// Set the LastResult for subsequent fetches
   456  		opts.LastResult = &v
   457  	}
   458  
   459  	// Next fetch should block for the full timeout
   460  	start := time.Now()
   461  	fetchCh = TestFetchCh(t, typ, opts, req)
   462  	select {
   463  	case <-time.After(100 * time.Millisecond):
   464  		t.Fatal("shouldn't block for too long waiting for fetch")
   465  	case result := <-fetchCh:
   466  		v := mustFetchResult(t, result)
   467  		require.Equal(resp, v.Value)
   468  		// Still the initial cached result
   469  		require.Equal(uint64(1), v.Index)
   470  		// Sanity check that it waited
   471  		require.True(time.Since(start) > opts.Timeout)
   472  		// Set the LastResult for subsequent fetches
   473  		opts.LastResult = &v
   474  	}
   475  
   476  	// No active requests, simulate root change now
   477  	caRoot2 := connect.TestCA(t, nil)
   478  	caRoot2.Active = true
   479  	caRoot.Active = false
   480  	rootsCh <- structs.IndexedCARoots{
   481  		ActiveRootID: caRoot2.ID,
   482  		TrustDomain:  "fake-trust-domain.consul",
   483  		Roots: []*structs.CARoot{
   484  			caRoot2,
   485  			caRoot,
   486  		},
   487  		QueryMeta: structs.QueryMeta{Index: atomic.AddUint64(&idx, 1)},
   488  	}
   489  	earliestRootDelivery := time.Now()
   490  
   491  	// We should get the new cert immediately on next fetch (since test override
   492  	// root change jitter to be 1 nanosecond so no delay expected).
   493  	fetchCh = TestFetchCh(t, typ, opts, req)
   494  	select {
   495  	case <-time.After(100 * time.Millisecond):
   496  		t.Fatal("shouldn't block too long waiting for fetch")
   497  	case result := <-fetchCh:
   498  		v := mustFetchResult(t, result)
   499  		require.Equal(resp, v.Value)
   500  		// Index should be 3 since root change consumed 2
   501  		require.Equal(uint64(3), v.Index)
   502  		// Sanity check that we didn't wait too long
   503  		require.True(time.Since(earliestRootDelivery) < opts.Timeout)
   504  		// Set the LastResult for subsequent fetches
   505  		opts.LastResult = &v
   506  	}
   507  
   508  }
   509  
   510  func TestConnectCALeaf_CSRRateLimiting(t *testing.T) {
   511  	t.Parallel()
   512  
   513  	require := require.New(t)
   514  	rpc := TestRPC(t)
   515  	defer rpc.AssertExpectations(t)
   516  
   517  	typ, rootsCh := testCALeafType(t, rpc)
   518  	defer close(rootsCh)
   519  
   520  	// Each jitter window will be only 100 ms long to make testing quick but
   521  	// highly likely not to fail based on scheduling issues.
   522  	typ.TestOverrideCAChangeInitialDelay = 100 * time.Millisecond
   523  
   524  	// Setup root that will be returned by the mocked Root cache fetch
   525  	caRoot := connect.TestCA(t, nil)
   526  	caRoot.Active = true
   527  	rootsCh <- structs.IndexedCARoots{
   528  		ActiveRootID: caRoot.ID,
   529  		TrustDomain:  "fake-trust-domain.consul",
   530  		Roots: []*structs.CARoot{
   531  			caRoot,
   532  		},
   533  		QueryMeta: structs.QueryMeta{Index: 1},
   534  	}
   535  
   536  	// Instrument ConnectCA.Sign
   537  	var resp *structs.IssuedCert
   538  	var idx, rateLimitedRPCs uint64
   539  
   540  	genCert := func(args mock.Arguments) {
   541  		reply := args.Get(2).(*structs.IssuedCert)
   542  		leaf, _ := connect.TestLeaf(t, "web", caRoot)
   543  		reply.CertPEM = leaf
   544  		reply.ValidAfter = time.Now().Add(-1 * time.Hour)
   545  		reply.ValidBefore = time.Now().Add(11 * time.Hour)
   546  		reply.CreateIndex = atomic.AddUint64(&idx, 1)
   547  		reply.ModifyIndex = reply.CreateIndex
   548  		resp = reply
   549  	}
   550  
   551  	incRateLimit := func(args mock.Arguments) {
   552  		atomic.AddUint64(&rateLimitedRPCs, 1)
   553  	}
   554  
   555  	// First call return rate limit error. This is important as it checks
   556  	// behavior when cache is empty and we have to return a nil Value but need to
   557  	// save state to do the right thing for retry.
   558  	rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).
   559  		Return(consul.ErrRateLimited).Once().Run(incRateLimit)
   560  	// Then succeed on second call
   561  	rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).
   562  		Return(nil).Run(genCert).Once()
   563  	// Then be rate limited again on several further calls
   564  	rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).
   565  		Return(consul.ErrRateLimited).Twice().Run(incRateLimit)
   566  	// Then fine after that
   567  	rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).
   568  		Return(nil).Run(genCert)
   569  
   570  	opts := cache.FetchOptions{MinIndex: 0, Timeout: 10 * time.Minute}
   571  	req := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web"}
   572  
   573  	// First fetch should return rate limit error directly - client is expected to
   574  	// backoff itself.
   575  	fetchCh := TestFetchCh(t, typ, opts, req)
   576  	select {
   577  	case <-time.After(200 * time.Millisecond):
   578  		t.Fatal("shouldn't block longer than one jitter window for success")
   579  	case result := <-fetchCh:
   580  		switch v := result.(type) {
   581  		case error:
   582  			require.Error(v)
   583  			require.Equal(consul.ErrRateLimited.Error(), v.Error())
   584  		case cache.FetchResult:
   585  			t.Fatalf("Expected error")
   586  		}
   587  	}
   588  
   589  	// Second call should return correct cert immediately.
   590  	fetchCh = TestFetchCh(t, typ, opts, req)
   591  	select {
   592  	case <-time.After(100 * time.Millisecond):
   593  		t.Fatal("shouldn't block waiting for fetch")
   594  	case result := <-fetchCh:
   595  		v := mustFetchResult(t, result)
   596  		require.Equal(resp, v.Value)
   597  		require.Equal(uint64(1), v.Index)
   598  		// Set the LastResult for subsequent fetches
   599  		opts.LastResult = &v
   600  		// Set MinIndex
   601  		opts.MinIndex = 1
   602  	}
   603  
   604  	// Send in new roots, which should trigger the next sign req. We need to take
   605  	// care to set the new root as active
   606  	caRoot2 := connect.TestCA(t, nil)
   607  	caRoot2.Active = true
   608  	caRoot.Active = false
   609  	rootsCh <- structs.IndexedCARoots{
   610  		ActiveRootID: caRoot2.ID,
   611  		TrustDomain:  "fake-trust-domain.consul",
   612  		Roots: []*structs.CARoot{
   613  			caRoot2,
   614  			caRoot,
   615  		},
   616  		QueryMeta: structs.QueryMeta{Index: atomic.AddUint64(&idx, 1)},
   617  	}
   618  	earliestRootDelivery := time.Now()
   619  
   620  	// Sanity check state
   621  	require.Equal(uint64(1), atomic.LoadUint64(&rateLimitedRPCs))
   622  
   623  	// After root rotation jitter has been waited out, a new CSR will
   624  	// be attempted but will fail and return the previous cached result with no
   625  	// error since we will try again soon.
   626  	fetchCh = TestFetchCh(t, typ, opts, req)
   627  	select {
   628  	case <-time.After(200 * time.Millisecond):
   629  		t.Fatal("shouldn't block too long waiting for fetch")
   630  	case result := <-fetchCh:
   631  		// We should block for _at least_ one jitter period since we set that to
   632  		// 100ms and in test override mode we always pick the max jitter not a
   633  		// random amount.
   634  		require.True(time.Since(earliestRootDelivery) > 100*time.Millisecond)
   635  		require.Equal(uint64(2), atomic.LoadUint64(&rateLimitedRPCs))
   636  
   637  		v := mustFetchResult(t, result)
   638  		require.Equal(resp, v.Value)
   639  		// 1 since this should still be the original cached result as we failed to
   640  		// get a new cert.
   641  		require.Equal(uint64(1), v.Index)
   642  		// Set the LastResult for subsequent fetches
   643  		opts.LastResult = &v
   644  	}
   645  
   646  	// Root rotation state is now only captured in the opts.LastResult.State so a
   647  	// subsequent call should also wait for 100ms and then attempt to generate a
   648  	// new cert since we failed last time.
   649  	fetchCh = TestFetchCh(t, typ, opts, req)
   650  	select {
   651  	case <-time.After(200 * time.Millisecond):
   652  		t.Fatal("shouldn't block too long waiting for fetch")
   653  	case result := <-fetchCh:
   654  		// We should block for _at least_ two jitter periods now.
   655  		require.True(time.Since(earliestRootDelivery) > 200*time.Millisecond)
   656  		require.Equal(uint64(3), atomic.LoadUint64(&rateLimitedRPCs))
   657  
   658  		v := mustFetchResult(t, result)
   659  		require.Equal(resp, v.Value)
   660  		// 1 since this should still be the original cached result as we failed to
   661  		// get a new cert.
   662  		require.Equal(uint64(1), v.Index)
   663  		// Set the LastResult for subsequent fetches
   664  		opts.LastResult = &v
   665  	}
   666  
   667  	// Now we've had two rate limit failures and seen root rotation state work
   668  	// across both the blocking request that observed the rotation and the
   669  	// subsequent one. The next request should wait out the rest of the backoff
   670  	// and then actually fetch a new cert at last!
   671  	fetchCh = TestFetchCh(t, typ, opts, req)
   672  	select {
   673  	case <-time.After(200 * time.Millisecond):
   674  		t.Fatal("shouldn't block too long waiting for fetch")
   675  	case result := <-fetchCh:
   676  		// We should block for _at least_ three jitter periods now.
   677  		require.True(time.Since(earliestRootDelivery) > 300*time.Millisecond)
   678  		require.Equal(uint64(3), atomic.LoadUint64(&rateLimitedRPCs))
   679  
   680  		v := mustFetchResult(t, result)
   681  		require.Equal(resp, v.Value)
   682  		// 3 since the rootCA change used 2
   683  		require.Equal(uint64(3), v.Index)
   684  		// Set the LastResult for subsequent fetches
   685  		opts.LastResult = &v
   686  	}
   687  }
   688  
   689  // This test runs multiple concurrent callers watching different leaf certs and
   690  // tries to ensure that the background root watch activity behaves correctly.
   691  func TestConnectCALeaf_watchRootsDedupingMultipleCallers(t *testing.T) {
   692  	t.Parallel()
   693  
   694  	require := require.New(t)
   695  	rpc := TestRPC(t)
   696  	defer rpc.AssertExpectations(t)
   697  
   698  	typ, rootsCh := testCALeafType(t, rpc)
   699  	defer close(rootsCh)
   700  
   701  	caRoot := connect.TestCA(t, nil)
   702  	caRoot.Active = true
   703  	rootsCh <- structs.IndexedCARoots{
   704  		ActiveRootID: caRoot.ID,
   705  		TrustDomain:  "fake-trust-domain.consul",
   706  		Roots: []*structs.CARoot{
   707  			caRoot,
   708  		},
   709  		QueryMeta: structs.QueryMeta{Index: 1},
   710  	}
   711  
   712  	// Instrument ConnectCA.Sign to return signed cert
   713  	var idx uint64
   714  	rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil).
   715  		Run(func(args mock.Arguments) {
   716  			reply := args.Get(2).(*structs.IssuedCert)
   717  			// Note we will sign certs for same service name each time because
   718  			// otherwise we have to re-invent whole CSR endpoint here to be able to
   719  			// control things - parse PEM sign with right key etc. It doesn't matter -
   720  			// we use the CreateIndex to differentiate the "right" results.
   721  			leaf, _ := connect.TestLeaf(t, "web", caRoot)
   722  			reply.CertPEM = leaf
   723  			reply.ValidAfter = time.Now().Add(-1 * time.Hour)
   724  			reply.ValidBefore = time.Now().Add(11 * time.Hour)
   725  			reply.CreateIndex = atomic.AddUint64(&idx, 1)
   726  			reply.ModifyIndex = reply.CreateIndex
   727  		})
   728  
   729  	// n is the number of clients we'll run
   730  	n := 3
   731  
   732  	// setup/testDoneCh are used for coordinating clients such that each has
   733  	// initial cert delivered and is blocking before the root changes. It's not a
   734  	// wait group since we want to be able to timeout the main test goroutine if
   735  	// one of the clients gets stuck. Instead it's a buffered chan.
   736  	setupDoneCh := make(chan struct{}, n)
   737  	testDoneCh := make(chan struct{}, n)
   738  	// rootsUpdate is used to coordinate clients so they know when they should
   739  	// expect to see leaf renewed after root change.
   740  	rootsUpdatedCh := make(chan struct{})
   741  
   742  	// Create a function that models a single client. It should go through the
   743  	// steps of getting an initial cert and then watching for changes until root
   744  	// updates.
   745  	client := func(i int) {
   746  		// We'll reuse the fetch options and request
   747  		opts := cache.FetchOptions{MinIndex: 0, Timeout: 10 * time.Second}
   748  		req := &ConnectCALeafRequest{Datacenter: "dc1", Service: fmt.Sprintf("web-%d", i)}
   749  
   750  		// First fetch should return immediately
   751  		fetchCh := TestFetchCh(t, typ, opts, req)
   752  		select {
   753  		case <-time.After(100 * time.Millisecond):
   754  			t.Fatal("shouldn't block waiting for fetch")
   755  		case result := <-fetchCh:
   756  			v := mustFetchResult(t, result)
   757  			opts.LastResult = &v
   758  		}
   759  
   760  		// Second fetch should block with set index
   761  		opts.MinIndex = 1
   762  		fetchCh = TestFetchCh(t, typ, opts, req)
   763  		select {
   764  		case result := <-fetchCh:
   765  			t.Fatalf("should not return: %#v", result)
   766  		case <-time.After(100 * time.Millisecond):
   767  		}
   768  
   769  		// We're done with setup and the blocking call is still blocking in
   770  		// background.
   771  		setupDoneCh <- struct{}{}
   772  
   773  		// Wait until all others are also done and roots change incase there are
   774  		// stragglers delaying the root update.
   775  		select {
   776  		case <-rootsUpdatedCh:
   777  		case <-time.After(200 * time.Millisecond):
   778  			t.Fatalf("waited too long for root update")
   779  		}
   780  
   781  		// Now we should see root update within a short period
   782  		select {
   783  		case <-time.After(100 * time.Millisecond):
   784  			t.Fatal("shouldn't block waiting for fetch")
   785  		case result := <-fetchCh:
   786  			v := mustFetchResult(t, result)
   787  			// Index must be different
   788  			require.NotEqual(opts.MinIndex, v.Value.(*structs.IssuedCert).CreateIndex)
   789  		}
   790  
   791  		testDoneCh <- struct{}{}
   792  	}
   793  
   794  	// Sanity check the roots watcher is not running yet
   795  	assertRootsWatchCounts(t, typ, 0, 0)
   796  
   797  	for i := 0; i < n; i++ {
   798  		go client(i)
   799  	}
   800  
   801  	timeoutCh := time.After(200 * time.Millisecond)
   802  
   803  	for i := 0; i < n; i++ {
   804  		select {
   805  		case <-timeoutCh:
   806  			t.Fatal("timed out waiting for clients")
   807  		case <-setupDoneCh:
   808  		}
   809  	}
   810  
   811  	// Should be 3 clients running now, so the roots watcher should have started
   812  	// once and not stopped.
   813  	assertRootsWatchCounts(t, typ, 1, 0)
   814  
   815  	// Now we deliver the root update
   816  	caRoot2 := connect.TestCA(t, nil)
   817  	caRoot2.Active = true
   818  	caRoot.Active = false
   819  	rootsCh <- structs.IndexedCARoots{
   820  		ActiveRootID: caRoot2.ID,
   821  		TrustDomain:  "fake-trust-domain.consul",
   822  		Roots: []*structs.CARoot{
   823  			caRoot2,
   824  			caRoot,
   825  		},
   826  		QueryMeta: structs.QueryMeta{Index: atomic.AddUint64(&idx, 1)},
   827  	}
   828  	// And notify clients
   829  	close(rootsUpdatedCh)
   830  
   831  	timeoutCh = time.After(200 * time.Millisecond)
   832  	for i := 0; i < n; i++ {
   833  		select {
   834  		case <-timeoutCh:
   835  			t.Fatalf("timed out waiting for %d of %d clients to renew after root change", n-i, n)
   836  		case <-testDoneCh:
   837  		}
   838  	}
   839  
   840  	// All active requests have returned the new cert so the rootsWatcher should
   841  	// have stopped. This is timing dependent though so retry a few times
   842  	retry.RunWith(retry.ThreeTimes(), t, func(r *retry.R) {
   843  		assertRootsWatchCounts(r, typ, 1, 1)
   844  	})
   845  }
   846  
   847  func assertRootsWatchCounts(t require.TestingT, typ *ConnectCALeaf, wantStarts, wantStops int) {
   848  	if tt, ok := t.(*testing.T); ok {
   849  		tt.Helper()
   850  	}
   851  	starts := atomic.LoadUint32(&typ.testRootWatchStartCount)
   852  	stops := atomic.LoadUint32(&typ.testRootWatchStopCount)
   853  	require.Equal(t, wantStarts, int(starts))
   854  	require.Equal(t, wantStops, int(stops))
   855  }
   856  
   857  func mustFetchResult(t *testing.T, result interface{}) cache.FetchResult {
   858  	t.Helper()
   859  	switch v := result.(type) {
   860  	case error:
   861  		require.NoError(t, v)
   862  	case cache.FetchResult:
   863  		return v
   864  	default:
   865  		t.Fatalf("unexpected type from fetch %T", v)
   866  	}
   867  	return cache.FetchResult{}
   868  }
   869  
   870  // Test that after an initial signing, an expiringLeaf will trigger a
   871  // blocking query to resign.
   872  func TestConnectCALeaf_expiringLeaf(t *testing.T) {
   873  	t.Parallel()
   874  
   875  	require := require.New(t)
   876  	rpc := TestRPC(t)
   877  	defer rpc.AssertExpectations(t)
   878  
   879  	typ, rootsCh := testCALeafType(t, rpc)
   880  	defer close(rootsCh)
   881  
   882  	caRoot := connect.TestCA(t, nil)
   883  	caRoot.Active = true
   884  	rootsCh <- structs.IndexedCARoots{
   885  		ActiveRootID: caRoot.ID,
   886  		TrustDomain:  "fake-trust-domain.consul",
   887  		Roots: []*structs.CARoot{
   888  			caRoot,
   889  		},
   890  		QueryMeta: structs.QueryMeta{Index: 1},
   891  	}
   892  
   893  	// Instrument ConnectCA.Sign to
   894  	var resp *structs.IssuedCert
   895  	var idx uint64
   896  	rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil).
   897  		Run(func(args mock.Arguments) {
   898  			reply := args.Get(2).(*structs.IssuedCert)
   899  			reply.CreateIndex = atomic.AddUint64(&idx, 1)
   900  			reply.ModifyIndex = reply.CreateIndex
   901  
   902  			leaf, _ := connect.TestLeaf(t, "web", caRoot)
   903  			reply.CertPEM = leaf
   904  
   905  			if reply.CreateIndex == 1 {
   906  				// First call returns expired cert to prime cache with an expired one.
   907  				reply.ValidAfter = time.Now().Add(-13 * time.Hour)
   908  				reply.ValidBefore = time.Now().Add(-1 * time.Hour)
   909  			} else {
   910  				reply.ValidAfter = time.Now().Add(-1 * time.Hour)
   911  				reply.ValidBefore = time.Now().Add(11 * time.Hour)
   912  			}
   913  
   914  			resp = reply
   915  		})
   916  
   917  	// We'll reuse the fetch options and request
   918  	opts := cache.FetchOptions{MinIndex: 0, Timeout: 10 * time.Second}
   919  	req := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web"}
   920  
   921  	// First fetch should return immediately
   922  	fetchCh := TestFetchCh(t, typ, opts, req)
   923  	select {
   924  	case <-time.After(100 * time.Millisecond):
   925  		t.Fatal("shouldn't block waiting for fetch")
   926  	case result := <-fetchCh:
   927  		switch v := result.(type) {
   928  		case error:
   929  			require.NoError(v)
   930  		case cache.FetchResult:
   931  			require.Equal(resp, v.Value)
   932  			require.Equal(uint64(1), v.Index)
   933  			// Set the LastResult for subsequent fetches
   934  			opts.LastResult = &v
   935  		}
   936  	}
   937  
   938  	// Second fetch should return immediately despite there being
   939  	// no updated CA roots, because we issued an expired cert.
   940  	fetchCh = TestFetchCh(t, typ, opts, req)
   941  	select {
   942  	case <-time.After(100 * time.Millisecond):
   943  		t.Fatal("shouldn't block waiting for fetch")
   944  	case result := <-fetchCh:
   945  		switch v := result.(type) {
   946  		case error:
   947  			require.NoError(v)
   948  		case cache.FetchResult:
   949  			require.Equal(resp, v.Value)
   950  			require.Equal(uint64(2), v.Index)
   951  			// Set the LastResult for subsequent fetches
   952  			opts.LastResult = &v
   953  		}
   954  	}
   955  
   956  	// Third fetch should block since the cert is not expiring and
   957  	// we also didn't update CA certs.
   958  	opts.MinIndex = 2
   959  	fetchCh = TestFetchCh(t, typ, opts, req)
   960  	select {
   961  	case result := <-fetchCh:
   962  		t.Fatalf("should not return: %#v", result)
   963  	case <-time.After(100 * time.Millisecond):
   964  	}
   965  }
   966  
   967  // testCALeafType returns a *ConnectCALeaf that is pre-configured to
   968  // use the given RPC implementation for "ConnectCA.Sign" operations.
   969  func testCALeafType(t *testing.T, rpc RPC) (*ConnectCALeaf, chan structs.IndexedCARoots) {
   970  	// This creates an RPC implementation that will block until the
   971  	// value is sent on the channel. This lets us control when the
   972  	// next values show up.
   973  	rootsCh := make(chan structs.IndexedCARoots, 10)
   974  	rootsRPC := &testGatedRootsRPC{ValueCh: rootsCh}
   975  
   976  	// Create a cache
   977  	c := cache.TestCache(t)
   978  	c.RegisterType(ConnectCARootName, &ConnectCARoot{RPC: rootsRPC}, &cache.RegisterOptions{
   979  		// Disable refresh so that the gated channel controls the
   980  		// request directly. Otherwise, we get background refreshes and
   981  		// it screws up the ordering of the channel reads of the
   982  		// testGatedRootsRPC implementation.
   983  		Refresh: false,
   984  	})
   985  
   986  	// Create the leaf type
   987  	return &ConnectCALeaf{
   988  		RPC:        rpc,
   989  		Cache:      c,
   990  		Datacenter: "dc1",
   991  		// Override the root-change spread so we don't have to wait up to 20 seconds
   992  		// to see root changes work. Can be changed back for specific tests that
   993  		// need to test this, Note it's not 0 since that used default but is
   994  		// effectively the same.
   995  		TestOverrideCAChangeInitialDelay: 1 * time.Microsecond,
   996  	}, rootsCh
   997  }
   998  
   999  // testGatedRootsRPC will send each subsequent value on the channel as the
  1000  // RPC response, blocking if it is waiting for a value on the channel. This
  1001  // can be used to control when background fetches are returned and what they
  1002  // return.
  1003  //
  1004  // This should be used with Refresh = false for the registration options so
  1005  // automatic refreshes don't mess up the channel read ordering.
  1006  type testGatedRootsRPC struct {
  1007  	ValueCh chan structs.IndexedCARoots
  1008  }
  1009  
  1010  func (r *testGatedRootsRPC) RPC(method string, args interface{}, reply interface{}) error {
  1011  	if method != "ConnectCA.Roots" {
  1012  		return fmt.Errorf("invalid RPC method: %s", method)
  1013  	}
  1014  
  1015  	replyReal := reply.(*structs.IndexedCARoots)
  1016  	*replyReal = <-r.ValueCh
  1017  	return nil
  1018  }