github.com/decred/dcrlnd@v0.7.6/lntest/itest/lnd_etcd_failover_test.go (about) 1 //go:build kvdb_etcd 2 // +build kvdb_etcd 3 4 package itest 5 6 import ( 7 "context" 8 "io/ioutil" 9 "testing" 10 "time" 11 12 "github.com/decred/dcrd/dcrutil/v4" 13 "github.com/decred/dcrlnd/cluster" 14 "github.com/decred/dcrlnd/kvdb" 15 "github.com/decred/dcrlnd/lncfg" 16 "github.com/decred/dcrlnd/lnrpc" 17 "github.com/decred/dcrlnd/lnrpc/routerrpc" 18 "github.com/decred/dcrlnd/lntest" 19 ) 20 21 func assertLeader(ht *harnessTest, observer cluster.LeaderElector, 22 expected string) { 23 24 leader, err := observer.Leader(context.Background()) 25 if err != nil { 26 ht.Fatalf("Unable to query leader: %v", err) 27 } 28 29 if leader != expected { 30 ht.Fatalf("Leader should be '%v', got: '%v'", expected, leader) 31 } 32 } 33 34 // testEtcdFailover tests that in a cluster setup where two LND nodes form a 35 // single cluster (sharing the same identity) one can hand over the leader role 36 // to the other (failing over after graceful shutdown or forceful abort). 37 func testEtcdFailover(net *lntest.NetworkHarness, ht *harnessTest) { 38 testCases := []struct { 39 name string 40 kill bool 41 }{{ 42 name: "failover after shutdown", 43 kill: false, 44 }, { 45 name: "failover after abort", 46 kill: true, 47 }} 48 49 for _, test := range testCases { 50 test := test 51 52 ht.t.Run(test.name, func(t1 *testing.T) { 53 ht1 := newHarnessTest(t1, ht.lndHarness) 54 ht1.RunTestCase(&testCase{ 55 name: test.name, 56 test: func(_ *lntest.NetworkHarness, 57 tt *harnessTest) { 58 59 testEtcdFailoverCase(net, tt, test.kill) 60 }, 61 }) 62 }) 63 } 64 } 65 66 func testEtcdFailoverCase(net *lntest.NetworkHarness, ht *harnessTest, 67 kill bool) { 68 69 ctxb := context.Background() 70 71 tmpDir, err := ioutil.TempDir("", "etcd") 72 etcdCfg, cleanup, err := kvdb.StartEtcdTestBackend( 73 tmpDir, uint16(lntest.NextAvailablePort()), 74 uint16(lntest.NextAvailablePort()), "", 75 ) 76 if err != nil { 77 ht.Fatalf("Failed to start etcd instance: %v", err) 78 } 79 defer cleanup() 80 81 // Make leader election session TTL 5 sec to make the test run fast. 82 const leaderSessionTTL = 5 83 84 observer, err := cluster.MakeLeaderElector( 85 ctxb, cluster.EtcdLeaderElector, "observer", 86 lncfg.DefaultEtcdElectionPrefix, leaderSessionTTL, etcdCfg, 87 ) 88 if err != nil { 89 ht.Fatalf("Cannot start election observer: %v", err) 90 } 91 92 password := []byte("the quick brown fox jumps the lazy dog") 93 entropy := [16]byte{1, 2, 3} 94 stateless := false 95 cluster := true 96 97 carol1, _, _, err := net.NewNodeWithSeedEtcd( 98 "Carol-1", etcdCfg, password, entropy[:], stateless, cluster, 99 leaderSessionTTL, 100 ) 101 if err != nil { 102 ht.Fatalf("unable to start Carol-1: %v", err) 103 } 104 105 ctxt, _ := context.WithTimeout(ctxb, defaultTimeout) 106 info1, err := carol1.GetInfo(ctxt, &lnrpc.GetInfoRequest{}) 107 108 net.ConnectNodes(ht.t, carol1, net.Alice) 109 110 // Open a channel with 100k satoshis between Carol and Alice with Alice 111 // being the sole funder of the channel. 112 chanAmt := dcrutil.Amount(100000) 113 _ = openChannelAndAssert( 114 ht, net, net.Alice, carol1, 115 lntest.OpenChannelParams{ 116 Amt: chanAmt, 117 }, 118 ) 119 120 // At this point Carol-1 is the elected leader, while Carol-2 will wait 121 // to become the leader when Carol-1 stops. 122 carol2, err := net.NewNodeEtcd( 123 "Carol-2", etcdCfg, password, cluster, false, leaderSessionTTL, 124 ) 125 if err != nil { 126 ht.Fatalf("Unable to start Carol-2: %v", err) 127 } 128 129 assertLeader(ht, observer, "Carol-1") 130 131 amt := dcrutil.Amount(1000) 132 payReqs, _, _, err := createPayReqs(carol1, amt, 2) 133 if err != nil { 134 ht.Fatalf("Carol-2 is unable to create payment requests: %v", 135 err) 136 } 137 sendAndAssertSuccess( 138 ht, net.Alice, &routerrpc.SendPaymentRequest{ 139 PaymentRequest: payReqs[0], 140 TimeoutSeconds: 60, 141 FeeLimitAtoms: noFeeLimitMAtoms, 142 }, 143 ) 144 145 // Shut down or kill Carol-1 and wait for Carol-2 to become the leader. 146 failoverTimeout := time.Duration(2*leaderSessionTTL) * time.Second 147 if kill { 148 err = net.KillNode(carol1) 149 if err != nil { 150 ht.Fatalf("Can't kill Carol-1: %v", err) 151 } 152 153 failoverTimeout = 2 * time.Minute 154 155 } else { 156 shutdownAndAssert(net, ht, carol1) 157 } 158 159 err = carol2.WaitUntilLeader(failoverTimeout) 160 if err != nil { 161 ht.Fatalf("Waiting for Carol-2 to become the leader failed: %v", 162 err) 163 } 164 165 assertLeader(ht, observer, "Carol-2") 166 167 err = carol2.Unlock(&lnrpc.UnlockWalletRequest{ 168 WalletPassword: password, 169 }) 170 if err != nil { 171 ht.Fatalf("Unlocking Carol-2 was not successful: %v", err) 172 } 173 174 ctxt, _ = context.WithTimeout(ctxb, defaultTimeout) 175 176 // Make sure Carol-1 and Carol-2 have the same identity. 177 info2, err := carol2.GetInfo(ctxt, &lnrpc.GetInfoRequest{}) 178 if info1.IdentityPubkey != info2.IdentityPubkey { 179 ht.Fatalf("Carol-1 and Carol-2 must have the same identity: "+ 180 "%v vs %v", info1.IdentityPubkey, info2.IdentityPubkey) 181 } 182 183 // Now let Alice pay the second invoice but this time we expect Carol-2 184 // to receive the payment. 185 sendAndAssertSuccess( 186 ht, net.Alice, &routerrpc.SendPaymentRequest{ 187 PaymentRequest: payReqs[1], 188 TimeoutSeconds: 60, 189 FeeLimitAtoms: noFeeLimitMAtoms, 190 }, 191 ) 192 193 shutdownAndAssert(net, ht, carol2) 194 }