github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/client_closed_timestamp_test.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver_test 12 13 import ( 14 "context" 15 "testing" 16 "time" 17 18 "github.com/cockroachdb/cockroach/pkg/base" 19 "github.com/cockroachdb/cockroach/pkg/keys" 20 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" 21 "github.com/cockroachdb/cockroach/pkg/roachpb" 22 "github.com/cockroachdb/cockroach/pkg/testutils" 23 "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" 24 "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" 25 "github.com/cockroachdb/cockroach/pkg/util/hlc" 26 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 27 "github.com/cockroachdb/errors" 28 "github.com/stretchr/testify/require" 29 ) 30 31 // TestTimestampsCanBeClosedWhenRequestsAreSentToNonLeaseHolders ensures that 32 // the errant closed timestamp requests sent to non-leaseholder nodes do not 33 // prevent future closed timestamps from being created if that node later 34 // becomes the leaseholder. See #48553 for more details. 35 func TestClosedTimestampWorksWhenRequestsAreSentToNonLeaseHolders(t *testing.T) { 36 defer leaktest.AfterTest(t)() 37 38 ctx := context.Background() 39 // Set an incredibly long timeout so we don't need to risk node liveness 40 // failures and subsequent unexpected lease transfers under extreme stress. 41 serverArgs := base.TestServerArgs{ 42 RaftConfig: base.RaftConfig{RaftElectionTimeoutTicks: 1000}, 43 } 44 tc := testcluster.StartTestCluster(t, 2, base.TestClusterArgs{ 45 ServerArgs: serverArgs, 46 ReplicationMode: base.ReplicationManual, 47 }) 48 defer tc.Stopper().Stop(ctx) 49 50 // We want to ensure that node 3 has a high epoch and then we want to 51 // make it the leaseholder of range and then we want to tickle requesting an 52 // MLAI from node 1. Then make node 1 the leaseholder and ensure that it 53 // can still close timestamps. 54 db1 := tc.Server(0).DB() 55 sqlRunner := sqlutils.MakeSQLRunner(tc.ServerConn(0)) 56 57 // Set a very short closed timestamp target duration so that we don't need to 58 // wait long for the closed timestamp machinery to propagate information. 59 const closeInterval = 10 * time.Millisecond 60 sqlRunner.Exec(t, "SET CLUSTER SETTING kv.closed_timestamp.target_duration = '"+ 61 closeInterval.String()+"'") 62 63 // To make node3 have a large epoch, synthesize a liveness record for with 64 // epoch 1000 before starting the node. 65 require.NoError(t, db1.Put(ctx, keys.NodeLivenessKey(3), 66 &kvserverpb.Liveness{ 67 NodeID: 3, 68 Epoch: 1000, 69 Expiration: hlc.LegacyTimestamp{WallTime: 1}, 70 })) 71 tc.AddServer(t, serverArgs) 72 73 // Create our scratch range and up-replicate it. 74 k := tc.ScratchRange(t) 75 _, err := tc.AddReplicas(k, tc.Target(1), tc.Target(2)) 76 require.NoError(t, err) 77 require.NoError(t, tc.WaitForVoters(k, tc.Target(1), tc.Target(2))) 78 79 // Wrap transferring the lease to deal with errors due to initial node 80 // liveness for n3. We could probably alternatively wait for n3 to be live but 81 // that felt like more work at the time and this works. 82 transferLease := func(desc *roachpb.RangeDescriptor, target roachpb.ReplicationTarget) { 83 testutils.SucceedsSoon(t, func() error { 84 return tc.TransferRangeLease(*desc, target) 85 }) 86 } 87 88 // transferLeaseAndWaitForClosed will transfer the lease to the serverIdx 89 // specified. It will ensure that the lease transfer happens and then will 90 // call afterLease. It will then wait until at the closed timestamp moves 91 // forward a few intervals. 92 transferLeaseAndWaitForClosed := func(serverIdx int, afterLease func()) { 93 _, repl := getFirstStoreReplica(t, tc.Server(serverIdx), k) 94 target := tc.Target(serverIdx) 95 transferLease(repl.Desc(), target) 96 testutils.SucceedsSoon(t, func() error { 97 if !repl.OwnsValidLease(db1.Clock().Now()) { 98 return errors.Errorf("don't yet have the lease") 99 } 100 return nil 101 }) 102 if afterLease != nil { 103 afterLease() 104 } 105 nowClosed, ok := repl.MaxClosed(ctx) 106 require.True(t, ok) 107 lease, _ := repl.GetLease() 108 if lease.Replica.NodeID != target.NodeID { 109 t.Fatalf("lease was unexpectedly transferred away which should" + 110 " not happen given the very long timeouts") 111 } 112 const closedMultiple = 5 113 targetClosed := nowClosed.Add(closedMultiple*closeInterval.Nanoseconds(), 0) 114 testutils.SucceedsSoon(t, func() error { 115 curLease, _ := repl.GetLease() 116 if !lease.Equivalent(curLease) { 117 t.Fatalf("lease was unexpectedly transferred away which should" + 118 " not happen given the very long timeouts") 119 } 120 closed, ok := repl.MaxClosed(ctx) 121 require.True(t, ok) 122 if closed.Less(targetClosed) { 123 return errors.Errorf("closed timestamp %v not yet after target %v", closed, targetClosed) 124 } 125 return nil 126 }) 127 } 128 129 // Our new server should have a liveness epoch of 1000. 130 s3, repl3 := getFirstStoreReplica(t, tc.Server(2), k) 131 transferLeaseAndWaitForClosed(2, func() { 132 s3.RequestClosedTimestamp(1, repl3.RangeID) 133 }) 134 135 // At this point we expect there's a high chance that the request made its 136 // way to n1. Now we're going to transfer the lease to n1 and make sure that 137 // the closed timestamp advances. 138 transferLeaseAndWaitForClosed(0, nil) 139 }