github.com/decred/dcrlnd@v0.7.6/lntest/itest/lnd_etcd_failover_test.go (about)

     1  //go:build kvdb_etcd
     2  // +build kvdb_etcd
     3  
     4  package itest
     5  
     6  import (
     7  	"context"
     8  	"io/ioutil"
     9  	"testing"
    10  	"time"
    11  
    12  	"github.com/decred/dcrd/dcrutil/v4"
    13  	"github.com/decred/dcrlnd/cluster"
    14  	"github.com/decred/dcrlnd/kvdb"
    15  	"github.com/decred/dcrlnd/lncfg"
    16  	"github.com/decred/dcrlnd/lnrpc"
    17  	"github.com/decred/dcrlnd/lnrpc/routerrpc"
    18  	"github.com/decred/dcrlnd/lntest"
    19  )
    20  
    21  func assertLeader(ht *harnessTest, observer cluster.LeaderElector,
    22  	expected string) {
    23  
    24  	leader, err := observer.Leader(context.Background())
    25  	if err != nil {
    26  		ht.Fatalf("Unable to query leader: %v", err)
    27  	}
    28  
    29  	if leader != expected {
    30  		ht.Fatalf("Leader should be '%v', got: '%v'", expected, leader)
    31  	}
    32  }
    33  
    34  // testEtcdFailover tests that in a cluster setup where two LND nodes form a
    35  // single cluster (sharing the same identity) one can hand over the leader role
    36  // to the other (failing over after graceful shutdown or forceful abort).
    37  func testEtcdFailover(net *lntest.NetworkHarness, ht *harnessTest) {
    38  	testCases := []struct {
    39  		name string
    40  		kill bool
    41  	}{{
    42  		name: "failover after shutdown",
    43  		kill: false,
    44  	}, {
    45  		name: "failover after abort",
    46  		kill: true,
    47  	}}
    48  
    49  	for _, test := range testCases {
    50  		test := test
    51  
    52  		ht.t.Run(test.name, func(t1 *testing.T) {
    53  			ht1 := newHarnessTest(t1, ht.lndHarness)
    54  			ht1.RunTestCase(&testCase{
    55  				name: test.name,
    56  				test: func(_ *lntest.NetworkHarness,
    57  					tt *harnessTest) {
    58  
    59  					testEtcdFailoverCase(net, tt, test.kill)
    60  				},
    61  			})
    62  		})
    63  	}
    64  }
    65  
    66  func testEtcdFailoverCase(net *lntest.NetworkHarness, ht *harnessTest,
    67  	kill bool) {
    68  
    69  	ctxb := context.Background()
    70  
    71  	tmpDir, err := ioutil.TempDir("", "etcd")
    72  	etcdCfg, cleanup, err := kvdb.StartEtcdTestBackend(
    73  		tmpDir, uint16(lntest.NextAvailablePort()),
    74  		uint16(lntest.NextAvailablePort()), "",
    75  	)
    76  	if err != nil {
    77  		ht.Fatalf("Failed to start etcd instance: %v", err)
    78  	}
    79  	defer cleanup()
    80  
    81  	// Make leader election session TTL 5 sec to make the test run fast.
    82  	const leaderSessionTTL = 5
    83  
    84  	observer, err := cluster.MakeLeaderElector(
    85  		ctxb, cluster.EtcdLeaderElector, "observer",
    86  		lncfg.DefaultEtcdElectionPrefix, leaderSessionTTL, etcdCfg,
    87  	)
    88  	if err != nil {
    89  		ht.Fatalf("Cannot start election observer: %v", err)
    90  	}
    91  
    92  	password := []byte("the quick brown fox jumps the lazy dog")
    93  	entropy := [16]byte{1, 2, 3}
    94  	stateless := false
    95  	cluster := true
    96  
    97  	carol1, _, _, err := net.NewNodeWithSeedEtcd(
    98  		"Carol-1", etcdCfg, password, entropy[:], stateless, cluster,
    99  		leaderSessionTTL,
   100  	)
   101  	if err != nil {
   102  		ht.Fatalf("unable to start Carol-1: %v", err)
   103  	}
   104  
   105  	ctxt, _ := context.WithTimeout(ctxb, defaultTimeout)
   106  	info1, err := carol1.GetInfo(ctxt, &lnrpc.GetInfoRequest{})
   107  
   108  	net.ConnectNodes(ht.t, carol1, net.Alice)
   109  
   110  	// Open a channel with 100k satoshis between Carol and Alice with Alice
   111  	// being the sole funder of the channel.
   112  	chanAmt := dcrutil.Amount(100000)
   113  	_ = openChannelAndAssert(
   114  		ht, net, net.Alice, carol1,
   115  		lntest.OpenChannelParams{
   116  			Amt: chanAmt,
   117  		},
   118  	)
   119  
   120  	// At this point Carol-1 is the elected leader, while Carol-2 will wait
   121  	// to become the leader when Carol-1 stops.
   122  	carol2, err := net.NewNodeEtcd(
   123  		"Carol-2", etcdCfg, password, cluster, false, leaderSessionTTL,
   124  	)
   125  	if err != nil {
   126  		ht.Fatalf("Unable to start Carol-2: %v", err)
   127  	}
   128  
   129  	assertLeader(ht, observer, "Carol-1")
   130  
   131  	amt := dcrutil.Amount(1000)
   132  	payReqs, _, _, err := createPayReqs(carol1, amt, 2)
   133  	if err != nil {
   134  		ht.Fatalf("Carol-2 is unable to create payment requests: %v",
   135  			err)
   136  	}
   137  	sendAndAssertSuccess(
   138  		ht, net.Alice, &routerrpc.SendPaymentRequest{
   139  			PaymentRequest: payReqs[0],
   140  			TimeoutSeconds: 60,
   141  			FeeLimitAtoms:  noFeeLimitMAtoms,
   142  		},
   143  	)
   144  
   145  	// Shut down or kill Carol-1 and wait for Carol-2 to become the leader.
   146  	failoverTimeout := time.Duration(2*leaderSessionTTL) * time.Second
   147  	if kill {
   148  		err = net.KillNode(carol1)
   149  		if err != nil {
   150  			ht.Fatalf("Can't kill Carol-1: %v", err)
   151  		}
   152  
   153  		failoverTimeout = 2 * time.Minute
   154  
   155  	} else {
   156  		shutdownAndAssert(net, ht, carol1)
   157  	}
   158  
   159  	err = carol2.WaitUntilLeader(failoverTimeout)
   160  	if err != nil {
   161  		ht.Fatalf("Waiting for Carol-2 to become the leader failed: %v",
   162  			err)
   163  	}
   164  
   165  	assertLeader(ht, observer, "Carol-2")
   166  
   167  	err = carol2.Unlock(&lnrpc.UnlockWalletRequest{
   168  		WalletPassword: password,
   169  	})
   170  	if err != nil {
   171  		ht.Fatalf("Unlocking Carol-2 was not successful: %v", err)
   172  	}
   173  
   174  	ctxt, _ = context.WithTimeout(ctxb, defaultTimeout)
   175  
   176  	// Make sure Carol-1 and Carol-2 have the same identity.
   177  	info2, err := carol2.GetInfo(ctxt, &lnrpc.GetInfoRequest{})
   178  	if info1.IdentityPubkey != info2.IdentityPubkey {
   179  		ht.Fatalf("Carol-1 and Carol-2 must have the same identity: "+
   180  			"%v vs %v", info1.IdentityPubkey, info2.IdentityPubkey)
   181  	}
   182  
   183  	// Now let Alice pay the second invoice but this time we expect Carol-2
   184  	// to receive the payment.
   185  	sendAndAssertSuccess(
   186  		ht, net.Alice, &routerrpc.SendPaymentRequest{
   187  			PaymentRequest: payReqs[1],
   188  			TimeoutSeconds: 60,
   189  			FeeLimitAtoms:  noFeeLimitMAtoms,
   190  		},
   191  	)
   192  
   193  	shutdownAndAssert(net, ht, carol2)
   194  }