get.pme.sh/pnats@v0.0.0-20240304004023-26bb5a137ed0/server/raft_test.go (about)

     1  // Copyright 2021-2023 The NATS Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package server
    15  
    16  import (
    17  	"math"
    18  	"math/rand"
    19  	"testing"
    20  	"time"
    21  
    22  	"github.com/nats-io/nats.go"
    23  )
    24  
    25  func TestNRGSimple(t *testing.T) {
    26  	c := createJetStreamClusterExplicit(t, "R3S", 3)
    27  	defer c.shutdown()
    28  
    29  	rg := c.createRaftGroup("TEST", 3, newStateAdder)
    30  	rg.waitOnLeader()
    31  	// Do several state transitions.
    32  	rg.randomMember().(*stateAdder).proposeDelta(11)
    33  	rg.randomMember().(*stateAdder).proposeDelta(11)
    34  	rg.randomMember().(*stateAdder).proposeDelta(-22)
    35  	// Wait for all members to have the correct state.
    36  	rg.waitOnTotal(t, 0)
    37  }
    38  
    39  func TestNRGSnapshotAndRestart(t *testing.T) {
    40  	c := createJetStreamClusterExplicit(t, "R3S", 3)
    41  	defer c.shutdown()
    42  
    43  	rg := c.createRaftGroup("TEST", 3, newStateAdder)
    44  	rg.waitOnLeader()
    45  
    46  	var expectedTotal int64
    47  
    48  	leader := rg.leader().(*stateAdder)
    49  	sm := rg.nonLeader().(*stateAdder)
    50  
    51  	for i := 0; i < 1000; i++ {
    52  		delta := rand.Int63n(222)
    53  		expectedTotal += delta
    54  		leader.proposeDelta(delta)
    55  
    56  		if i == 250 {
    57  			// Let some things catchup.
    58  			time.Sleep(50 * time.Millisecond)
    59  			// Snapshot leader and stop and snapshot a member.
    60  			leader.snapshot(t)
    61  			sm.snapshot(t)
    62  			sm.stop()
    63  		}
    64  	}
    65  	// Restart.
    66  	sm.restart()
    67  	// Wait for all members to have the correct state.
    68  	rg.waitOnTotal(t, expectedTotal)
    69  }
    70  
    71  func TestNRGAppendEntryEncode(t *testing.T) {
    72  	ae := &appendEntry{
    73  		term:   1,
    74  		pindex: 0,
    75  	}
    76  
    77  	// Test leader should be _EMPTY_ or exactly idLen long
    78  	ae.leader = "foo_bar_baz"
    79  	_, err := ae.encode(nil)
    80  	require_Error(t, err, errLeaderLen)
    81  
    82  	// Empty ok (noLeader)
    83  	ae.leader = noLeader // _EMPTY_
    84  	_, err = ae.encode(nil)
    85  	require_NoError(t, err)
    86  
    87  	ae.leader = "DEREK123"
    88  	_, err = ae.encode(nil)
    89  	require_NoError(t, err)
    90  
    91  	// Buffer reuse
    92  	var rawSmall [32]byte
    93  	var rawBigger [64]byte
    94  
    95  	b := rawSmall[:]
    96  	ae.encode(b)
    97  	if b[0] != 0 {
    98  		t.Fatalf("Expected arg buffer to not be used")
    99  	}
   100  	b = rawBigger[:]
   101  	ae.encode(b)
   102  	if b[0] == 0 {
   103  		t.Fatalf("Expected arg buffer to be used")
   104  	}
   105  
   106  	// Test max number of entries.
   107  	for i := 0; i < math.MaxUint16+1; i++ {
   108  		ae.entries = append(ae.entries, &Entry{EntryNormal, nil})
   109  	}
   110  	_, err = ae.encode(b)
   111  	require_Error(t, err, errTooManyEntries)
   112  }
   113  
   114  func TestNRGAppendEntryDecode(t *testing.T) {
   115  	ae := &appendEntry{
   116  		leader: "12345678",
   117  		term:   1,
   118  		pindex: 0,
   119  	}
   120  	for i := 0; i < math.MaxUint16; i++ {
   121  		ae.entries = append(ae.entries, &Entry{EntryNormal, nil})
   122  	}
   123  	buf, err := ae.encode(nil)
   124  	require_NoError(t, err)
   125  
   126  	// Truncate buffer first.
   127  	var node *raft
   128  	short := buf[0 : len(buf)-1024]
   129  	_, err = node.decodeAppendEntry(short, nil, _EMPTY_)
   130  	require_Error(t, err, errBadAppendEntry)
   131  
   132  	for i := 0; i < 100; i++ {
   133  		b := copyBytes(buf)
   134  		// modifying the header (idx < 42) will not result in an error by decodeAppendEntry
   135  		bi := 42 + rand.Intn(len(b)-42)
   136  		if b[bi] != 0 && bi != 40 {
   137  			b[bi] = 0
   138  			_, err = node.decodeAppendEntry(b, nil, _EMPTY_)
   139  			require_Error(t, err, errBadAppendEntry)
   140  		}
   141  	}
   142  }
   143  
   144  func TestNRGRecoverFromFollowingNoLeader(t *testing.T) {
   145  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   146  	defer c.shutdown()
   147  
   148  	rg := c.createRaftGroup("TEST", 3, newStateAdder)
   149  	rg.waitOnLeader()
   150  
   151  	// Find out what term we are on.
   152  	term := rg.leader().node().Term()
   153  
   154  	// Start by pausing all of the nodes. This will stop them from
   155  	// processing new entries.
   156  	for _, n := range rg {
   157  		n.node().PauseApply()
   158  	}
   159  
   160  	// Now drain all of the ApplyQ entries from them, which will stop
   161  	// them from automatically trying to follow a previous leader if
   162  	// they happened to have received an apply entry from one. Then
   163  	// we're going to force them into a state where they are all
   164  	// followers but they don't have a leader.
   165  	for _, n := range rg {
   166  		rn := n.node().(*raft)
   167  		rn.ApplyQ().drain()
   168  		rn.switchToFollower("")
   169  	}
   170  
   171  	// Resume the nodes.
   172  	for _, n := range rg {
   173  		n.node().ResumeApply()
   174  	}
   175  
   176  	// Wait a while. The nodes should notice that they haven't heard
   177  	// from a leader lately and will switch to voting. After an
   178  	// election we should find a new leader and be on a new term.
   179  	rg.waitOnLeader()
   180  	require_True(t, rg.leader() != nil)
   181  	require_NotEqual(t, rg.leader().node().Term(), term)
   182  }
   183  
   184  func TestNRGObserverMode(t *testing.T) {
   185  	c := createJetStreamClusterExplicit(t, "R3S", 3)
   186  	defer c.shutdown()
   187  
   188  	rg := c.createRaftGroup("TEST", 3, newStateAdder)
   189  	rg.waitOnLeader()
   190  
   191  	// Put all of the followers into observer mode. In this state
   192  	// they will not participate in an election but they will continue
   193  	// to apply incoming commits.
   194  	for _, n := range rg {
   195  		if n.node().Leader() {
   196  			continue
   197  		}
   198  		n.node().SetObserver(true)
   199  	}
   200  
   201  	// Propose a change from the leader.
   202  	adder := rg.leader().(*stateAdder)
   203  	adder.proposeDelta(1)
   204  	adder.proposeDelta(2)
   205  	adder.proposeDelta(3)
   206  
   207  	// Wait for the followers to apply it.
   208  	rg.waitOnTotal(t, 6)
   209  
   210  	// Confirm the followers are still just observers and weren't
   211  	// reset out of that state for some reason.
   212  	for _, n := range rg {
   213  		if n.node().Leader() {
   214  			continue
   215  		}
   216  		require_True(t, n.node().IsObserver())
   217  	}
   218  }
   219  
   220  // TestNRGSimpleElection tests that a simple election succeeds. It is
   221  // simple because the group hasn't processed any entries and hasn't
   222  // suffered any interruptions of any kind, therefore there should be
   223  // no way that the conditions for granting the votes can fail.
   224  func TestNRGSimpleElection(t *testing.T) {
   225  	c := createJetStreamClusterExplicit(t, "R3S", 9)
   226  	defer c.shutdown()
   227  	c.waitOnLeader()
   228  
   229  	nc, _ := jsClientConnect(t, c.leader(), nats.UserInfo("admin", "s3cr3t!"))
   230  	defer nc.Close()
   231  
   232  	rg := c.createRaftGroup("TEST", 9, newStateAdder)
   233  	rg.waitOnLeader()
   234  
   235  	voteReqs := make(chan *nats.Msg, 1)
   236  	voteResps := make(chan *nats.Msg, len(rg)-1)
   237  
   238  	// Keep a record of the term when we started.
   239  	leader := rg.leader().node().(*raft)
   240  	startTerm := leader.term
   241  
   242  	// Subscribe to the vote request subject, this should be the
   243  	// same across all nodes in the group.
   244  	_, err := nc.ChanSubscribe(leader.vsubj, voteReqs)
   245  	require_NoError(t, err)
   246  
   247  	// Subscribe to all of the vote response inboxes for all nodes
   248  	// in the Raft group, as they can differ.
   249  	for _, n := range rg {
   250  		rn := n.node().(*raft)
   251  		_, err = nc.ChanSubscribe(rn.vreply, voteResps)
   252  		require_NoError(t, err)
   253  	}
   254  
   255  	// Step down, this will start a new voting session.
   256  	require_NoError(t, rg.leader().node().StepDown())
   257  
   258  	// Wait for a vote request to come in.
   259  	msg := require_ChanRead(t, voteReqs, time.Second)
   260  	vr := decodeVoteRequest(msg.Data, msg.Reply)
   261  	require_True(t, vr != nil)
   262  	require_NotEqual(t, vr.candidate, "")
   263  
   264  	// The leader should have bumped their term in order to start
   265  	// an election.
   266  	require_Equal(t, vr.term, startTerm+1)
   267  	require_Equal(t, vr.lastTerm, startTerm)
   268  
   269  	// Wait for all of the vote responses to come in. There should
   270  	// be as many vote responses as there are followers.
   271  	for i := 0; i < len(rg)-1; i++ {
   272  		msg := require_ChanRead(t, voteResps, time.Second)
   273  		re := decodeVoteResponse(msg.Data)
   274  		require_True(t, re != nil)
   275  
   276  		// The vote should have been granted.
   277  		require_Equal(t, re.granted, true)
   278  
   279  		// The node granted the vote, therefore the term in the vote
   280  		// response should have advanced as well.
   281  		require_Equal(t, re.term, vr.term)
   282  		require_Equal(t, re.term, startTerm+1)
   283  	}
   284  
   285  	// Everyone in the group should have voted for our candidate
   286  	// and arrived at the term from the vote request.
   287  	for _, n := range rg {
   288  		rn := n.node().(*raft)
   289  		require_Equal(t, rn.term, vr.term)
   290  		require_Equal(t, rn.term, startTerm+1)
   291  		require_Equal(t, rn.vote, vr.candidate)
   292  	}
   293  }