github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/batcheval/cmd_lease_test.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package batcheval 12 13 import ( 14 "context" 15 "testing" 16 "time" 17 18 "github.com/cockroachdb/cockroach/pkg/base" 19 "github.com/cockroachdb/cockroach/pkg/roachpb" 20 "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" 21 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 22 "github.com/cockroachdb/cockroach/pkg/util/log" 23 "github.com/stretchr/testify/require" 24 ) 25 26 // TestLeaseTransferWithPipelinedWrite verifies that pipelined writes 27 // do not cause retry errors to be leaked to clients when the error 28 // can be handled internally. Pipelining dissociates a write from its 29 // caller, so the retries of internally-generated errors (specifically 30 // out-of-order lease indexes) must be retried below that level. 31 // 32 // This issue was observed in practice to affect the first insert 33 // after table creation with high probability. 34 func TestLeaseTransferWithPipelinedWrite(t *testing.T) { 35 defer leaktest.AfterTest(t)() 36 37 ctx := context.Background() 38 39 tc := serverutils.StartTestCluster(t, 3, base.TestClusterArgs{}) 40 defer tc.Stopper().Stop(ctx) 41 42 db := tc.ServerConn(0) 43 44 // More than 30 iterations is flaky under stressrace on teamcity. 45 for iter := 0; iter < 30; iter++ { 46 log.Infof(ctx, "iter %d", iter) 47 if _, err := db.ExecContext(ctx, "drop table if exists test"); err != nil { 48 t.Fatal(err) 49 } 50 if _, err := db.ExecContext(ctx, "create table test (a int, b int, primary key (a, b))"); err != nil { 51 t.Fatal(err) 52 } 53 54 workerErrCh := make(chan error, 1) 55 go func() { 56 workerErrCh <- func() error { 57 for i := 0; i < 1; i++ { 58 tx, err := db.BeginTx(ctx, nil) 59 if err != nil { 60 return err 61 } 62 defer func() { 63 if tx != nil { 64 if err := tx.Rollback(); err != nil { 65 log.Warningf(ctx, "error rolling back: %+v", err) 66 } 67 } 68 }() 69 // Run two inserts in a transaction to ensure that we have 70 // pipelined writes that cannot be retried at the SQL layer 71 // due to the first-statement rule. 72 if _, err := tx.ExecContext(ctx, "INSERT INTO test (a, b) VALUES ($1, $2)", i, 1); err != nil { 73 return err 74 } 75 if _, err := tx.ExecContext(ctx, "INSERT INTO test (a, b) VALUES ($1, $2)", i, 2); err != nil { 76 return err 77 } 78 if err := tx.Commit(); err != nil { 79 return err 80 } 81 tx = nil 82 } 83 return nil 84 }() 85 }() 86 87 // TODO(bdarnell): This test reliably reproduced the issue when 88 // introduced, because table creation causes splits and repeated 89 // table creation leads to lease transfers due to rebalancing. 90 // This is a subtle thing to rely on and the test might become 91 // more reliable if we ran more iterations in the worker goroutine 92 // and added a second goroutine to explicitly transfer leases back 93 // and forth. 94 95 select { 96 case <-time.After(15 * time.Second): 97 // TODO(bdarnell): The test seems flaky under stress with a 5s 98 // timeout. Why? I'm giving it a high timeout since hanging 99 // isn't a failure mode we're particularly concerned about here, 100 // but it shouldn't be taking this long even with stress. 101 t.Fatal("timed out") 102 case err := <-workerErrCh: 103 if err != nil { 104 t.Fatalf("worker failed: %+v", err) 105 } 106 } 107 } 108 } 109 110 func TestLeaseCommandLearnerReplica(t *testing.T) { 111 defer leaktest.AfterTest(t)() 112 113 ctx := context.Background() 114 const voterStoreID, learnerStoreID roachpb.StoreID = 1, 2 115 replicas := []roachpb.ReplicaDescriptor{ 116 {NodeID: 1, StoreID: voterStoreID, Type: roachpb.ReplicaTypeVoterFull(), ReplicaID: 1}, 117 {NodeID: 2, StoreID: learnerStoreID, Type: roachpb.ReplicaTypeLearner(), ReplicaID: 2}, 118 } 119 desc := roachpb.RangeDescriptor{} 120 desc.SetReplicas(roachpb.MakeReplicaDescriptors(replicas)) 121 cArgs := CommandArgs{ 122 EvalCtx: (&MockEvalCtx{ 123 StoreID: voterStoreID, 124 Desc: &desc, 125 }).EvalContext(), 126 Args: &roachpb.TransferLeaseRequest{ 127 Lease: roachpb.Lease{ 128 Replica: replicas[1], 129 }, 130 }, 131 } 132 133 // Learners are not allowed to become leaseholders for now, see the comments 134 // in TransferLease and RequestLease. 135 _, err := TransferLease(ctx, nil, cArgs, nil) 136 require.EqualError(t, err, `replica (n2,s2):2LEARNER of type LEARNER cannot hold lease`) 137 138 cArgs.Args = &roachpb.RequestLeaseRequest{} 139 _, err = RequestLease(ctx, nil, cArgs, nil) 140 141 const expForUnknown = `cannot replace lease repl=(n0,s0):? seq=0 start=0,0 exp=<nil> ` + 142 `with repl=(n0,s0):? seq=0 start=0,0 exp=<nil>: ` + 143 `replica (n0,s0):? not found in r0:{-} [(n1,s1):1, (n2,s2):2LEARNER, next=0, gen=0]` 144 require.EqualError(t, err, expForUnknown) 145 146 cArgs.Args = &roachpb.RequestLeaseRequest{ 147 Lease: roachpb.Lease{ 148 Replica: replicas[1], 149 }, 150 } 151 _, err = RequestLease(ctx, nil, cArgs, nil) 152 153 const expForLearner = `cannot replace lease repl=(n0,s0):? seq=0 start=0,0 exp=<nil> ` + 154 `with repl=(n2,s2):2LEARNER seq=0 start=0,0 exp=<nil>: ` + 155 `replica (n2,s2):2LEARNER of type LEARNER cannot hold lease` 156 require.EqualError(t, err, expForLearner) 157 }