github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/nomad/serf_test.go (about)

     1  package nomad
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"os"
     7  	"path"
     8  	"strings"
     9  	"sync/atomic"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/hashicorp/nomad/testutil"
    14  	"github.com/hashicorp/serf/serf"
    15  	"github.com/stretchr/testify/require"
    16  )
    17  
    18  func TestNomad_JoinPeer(t *testing.T) {
    19  	t.Parallel()
    20  
    21  	s1, cleanupS1 := TestServer(t, nil)
    22  	defer cleanupS1()
    23  	s2, cleanupS2 := TestServer(t, func(c *Config) {
    24  		c.Region = "region2"
    25  	})
    26  	defer cleanupS2()
    27  	TestJoin(t, s1, s2)
    28  
    29  	testutil.WaitForResult(func() (bool, error) {
    30  		if members := s1.Members(); len(members) != 2 {
    31  			return false, fmt.Errorf("bad: %#v", members)
    32  		}
    33  		if members := s2.Members(); len(members) != 2 {
    34  			return false, fmt.Errorf("bad: %#v", members)
    35  		}
    36  		return true, nil
    37  	}, func(err error) {
    38  		t.Fatalf("err: %v", err)
    39  	})
    40  
    41  	testutil.WaitForResult(func() (bool, error) {
    42  		if len(s1.peers) != 2 {
    43  			return false, fmt.Errorf("bad: %#v", s1.peers)
    44  		}
    45  		if len(s2.peers) != 2 {
    46  			return false, fmt.Errorf("bad: %#v", s2.peers)
    47  		}
    48  		if len(s1.localPeers) != 1 {
    49  			return false, fmt.Errorf("bad: %#v", s1.localPeers)
    50  		}
    51  		if len(s2.localPeers) != 1 {
    52  			return false, fmt.Errorf("bad: %#v", s2.localPeers)
    53  		}
    54  		return true, nil
    55  	}, func(err error) {
    56  		t.Fatalf("err: %v", err)
    57  	})
    58  }
    59  
    60  func TestNomad_RemovePeer(t *testing.T) {
    61  	t.Parallel()
    62  
    63  	s1, cleanupS1 := TestServer(t, nil)
    64  	defer cleanupS1()
    65  	s2, cleanupS2 := TestServer(t, func(c *Config) {
    66  		c.Region = "global"
    67  	})
    68  	defer cleanupS2()
    69  	TestJoin(t, s1, s2)
    70  
    71  	testutil.WaitForResult(func() (bool, error) {
    72  		if members := s1.Members(); len(members) != 2 {
    73  			return false, fmt.Errorf("bad: %#v", members)
    74  		}
    75  		if members := s2.Members(); len(members) != 2 {
    76  			return false, fmt.Errorf("bad: %#v", members)
    77  		}
    78  		return true, nil
    79  	}, func(err error) {
    80  		t.Fatalf("err: %v", err)
    81  	})
    82  
    83  	// Leave immediately
    84  	s2.Leave()
    85  	s2.Shutdown()
    86  
    87  	testutil.WaitForResult(func() (bool, error) {
    88  		if len(s1.peers) != 1 {
    89  			return false, fmt.Errorf("bad: %#v", s1.peers)
    90  		}
    91  		if len(s2.peers) != 1 {
    92  			return false, fmt.Errorf("bad: %#v", s2.peers)
    93  		}
    94  		return true, nil
    95  	}, func(err error) {
    96  		t.Fatalf("err: %v", err)
    97  	})
    98  }
    99  
   100  func TestNomad_ReapPeer(t *testing.T) {
   101  	t.Parallel()
   102  
   103  	dir := tmpDir(t)
   104  	defer os.RemoveAll(dir)
   105  
   106  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   107  		c.NodeName = "node1"
   108  		c.BootstrapExpect = 3
   109  		c.DevMode = false
   110  		c.DataDir = path.Join(dir, "node1")
   111  	})
   112  	defer cleanupS1()
   113  	s2, cleanupS2 := TestServer(t, func(c *Config) {
   114  		c.NodeName = "node2"
   115  		c.BootstrapExpect = 3
   116  		c.DevMode = false
   117  		c.DataDir = path.Join(dir, "node2")
   118  	})
   119  	defer cleanupS2()
   120  	s3, cleanupS3 := TestServer(t, func(c *Config) {
   121  		c.NodeName = "node3"
   122  		c.BootstrapExpect = 3
   123  		c.DevMode = false
   124  		c.DataDir = path.Join(dir, "node3")
   125  	})
   126  	defer cleanupS3()
   127  	TestJoin(t, s1, s2, s3)
   128  
   129  	testutil.WaitForResult(func() (bool, error) {
   130  		// Retry the join to decrease flakiness
   131  		TestJoin(t, s1, s2, s3)
   132  		if members := s1.Members(); len(members) != 3 {
   133  			return false, fmt.Errorf("bad s1: %#v", members)
   134  		}
   135  		if members := s2.Members(); len(members) != 3 {
   136  			return false, fmt.Errorf("bad s2: %#v", members)
   137  		}
   138  		if members := s3.Members(); len(members) != 3 {
   139  			return false, fmt.Errorf("bad s3: %#v", members)
   140  		}
   141  		return true, nil
   142  	}, func(err error) {
   143  		t.Fatalf("err: %v", err)
   144  	})
   145  
   146  	testutil.WaitForLeader(t, s1.RPC)
   147  
   148  	// Simulate a reap
   149  	mems := s1.Members()
   150  	var s2mem serf.Member
   151  	for _, m := range mems {
   152  		if strings.Contains(m.Name, s2.config.NodeName) {
   153  			s2mem = m
   154  			s2mem.Status = StatusReap
   155  			break
   156  		}
   157  	}
   158  
   159  	// Shutdown and then send the reap
   160  	s2.Shutdown()
   161  	s1.reconcileCh <- s2mem
   162  	s2.reconcileCh <- s2mem
   163  	s3.reconcileCh <- s2mem
   164  
   165  	testutil.WaitForResult(func() (bool, error) {
   166  		if len(s1.peers["global"]) != 2 {
   167  			return false, fmt.Errorf("bad: %#v", s1.peers["global"])
   168  		}
   169  		peers, err := s1.numPeers()
   170  		if err != nil {
   171  			return false, fmt.Errorf("numPeers() failed: %v", err)
   172  		}
   173  		if peers != 2 {
   174  			return false, fmt.Errorf("bad: %#v", peers)
   175  		}
   176  
   177  		if len(s3.peers["global"]) != 2 {
   178  			return false, fmt.Errorf("bad: %#v", s1.peers["global"])
   179  		}
   180  		peers, err = s3.numPeers()
   181  		if err != nil {
   182  			return false, fmt.Errorf("numPeers() failed: %v", err)
   183  		}
   184  		if peers != 2 {
   185  			return false, fmt.Errorf("bad: %#v", peers)
   186  		}
   187  		return true, nil
   188  	}, func(err error) {
   189  		t.Fatalf("err: %v", err)
   190  	})
   191  }
   192  
   193  func TestNomad_BootstrapExpect(t *testing.T) {
   194  	t.Parallel()
   195  
   196  	dir := tmpDir(t)
   197  	defer os.RemoveAll(dir)
   198  
   199  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   200  		c.BootstrapExpect = 3
   201  		c.DevMode = false
   202  		c.DataDir = path.Join(dir, "node1")
   203  	})
   204  	defer cleanupS1()
   205  	s2, cleanupS2 := TestServer(t, func(c *Config) {
   206  		c.BootstrapExpect = 3
   207  		c.DevMode = false
   208  		c.DataDir = path.Join(dir, "node2")
   209  	})
   210  	defer cleanupS2()
   211  	s3, cleanupS3 := TestServer(t, func(c *Config) {
   212  		c.BootstrapExpect = 3
   213  		c.DevMode = false
   214  		c.DataDir = path.Join(dir, "node3")
   215  	})
   216  	defer cleanupS3()
   217  	TestJoin(t, s1, s2, s3)
   218  
   219  	testutil.WaitForResult(func() (bool, error) {
   220  		// Retry the join to decrease flakiness
   221  		TestJoin(t, s1, s2, s3)
   222  		peers, err := s1.numPeers()
   223  		if err != nil {
   224  			return false, err
   225  		}
   226  		if peers != 3 {
   227  			return false, fmt.Errorf("bad: %#v", peers)
   228  		}
   229  		peers, err = s2.numPeers()
   230  		if err != nil {
   231  			return false, err
   232  		}
   233  		if peers != 3 {
   234  			return false, fmt.Errorf("bad: %#v", peers)
   235  		}
   236  		peers, err = s3.numPeers()
   237  		if err != nil {
   238  			return false, err
   239  		}
   240  		if peers != 3 {
   241  			return false, fmt.Errorf("bad: %#v", peers)
   242  		}
   243  		if len(s1.localPeers) != 3 {
   244  			return false, fmt.Errorf("bad: %#v", s1.localPeers)
   245  		}
   246  		if len(s2.localPeers) != 3 {
   247  			return false, fmt.Errorf("bad: %#v", s2.localPeers)
   248  		}
   249  		if len(s3.localPeers) != 3 {
   250  			return false, fmt.Errorf("bad: %#v", s3.localPeers)
   251  		}
   252  		return true, nil
   253  	}, func(err error) {
   254  		t.Fatalf("err: %v", err)
   255  	})
   256  
   257  	// Join a fourth server after quorum has already been formed and ensure
   258  	// there is no election
   259  	s4, cleanupS4 := TestServer(t, func(c *Config) {
   260  		c.BootstrapExpect = 3
   261  		c.DevMode = false
   262  		c.DataDir = path.Join(dir, "node4")
   263  	})
   264  	defer cleanupS4()
   265  
   266  	// Make sure a leader is elected, grab the current term and then add in
   267  	// the fourth server.
   268  	testutil.WaitForLeader(t, s1.RPC)
   269  	termBefore := s1.raft.Stats()["last_log_term"]
   270  
   271  	var addresses []string
   272  	for _, s := range []*Server{s1, s2, s3} {
   273  		addr := fmt.Sprintf("127.0.0.1:%d", s.config.SerfConfig.MemberlistConfig.BindPort)
   274  		addresses = append(addresses, addr)
   275  	}
   276  	if _, err := s4.Join(addresses); err != nil {
   277  		t.Fatalf("err: %v", err)
   278  	}
   279  
   280  	// Wait for the new server to see itself added to the cluster.
   281  	var p4 int
   282  	testutil.WaitForResult(func() (bool, error) {
   283  		// Retry join to reduce flakiness
   284  		if _, err := s4.Join(addresses); err != nil {
   285  			t.Fatalf("err: %v", err)
   286  		}
   287  		p4, _ = s4.numPeers()
   288  		return p4 == 4, errors.New(fmt.Sprintf("%d", p4))
   289  	}, func(err error) {
   290  		t.Fatalf("should have 4 peers: %v", err)
   291  	})
   292  
   293  	// Make sure there's still a leader and that the term didn't change,
   294  	// so we know an election didn't occur.
   295  	testutil.WaitForLeader(t, s1.RPC)
   296  	termAfter := s1.raft.Stats()["last_log_term"]
   297  	if termAfter != termBefore {
   298  		t.Fatalf("looks like an election took place")
   299  	}
   300  }
   301  
   302  func TestNomad_BootstrapExpect_NonVoter(t *testing.T) {
   303  	t.Parallel()
   304  
   305  	dir := tmpDir(t)
   306  	defer os.RemoveAll(dir)
   307  
   308  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   309  		c.BootstrapExpect = 2
   310  		c.DevMode = false
   311  		c.DataDir = path.Join(dir, "node1")
   312  		c.NonVoter = true
   313  	})
   314  	defer cleanupS1()
   315  	s2, cleanupS2 := TestServer(t, func(c *Config) {
   316  		c.BootstrapExpect = 2
   317  		c.DevMode = false
   318  		c.DataDir = path.Join(dir, "node2")
   319  		c.NonVoter = true
   320  	})
   321  	defer cleanupS2()
   322  	s3, cleanupS3 := TestServer(t, func(c *Config) {
   323  		c.BootstrapExpect = 2
   324  		c.DevMode = false
   325  		c.DataDir = path.Join(dir, "node3")
   326  	})
   327  	defer cleanupS3()
   328  	TestJoin(t, s1, s2, s3)
   329  
   330  	// Assert that we do not bootstrap
   331  	testutil.AssertUntil(testutil.Timeout(time.Second), func() (bool, error) {
   332  		_, p := s1.getLeader()
   333  		if p != nil {
   334  			return false, fmt.Errorf("leader %v", p)
   335  		}
   336  
   337  		return true, nil
   338  	}, func(err error) {
   339  		t.Fatalf("should not have leader: %v", err)
   340  	})
   341  
   342  	// Add the fourth server that is a voter
   343  	s4, cleanupS4 := TestServer(t, func(c *Config) {
   344  		c.BootstrapExpect = 2
   345  		c.DevMode = false
   346  		c.DataDir = path.Join(dir, "node4")
   347  	})
   348  	defer cleanupS4()
   349  	TestJoin(t, s1, s2, s3, s4)
   350  
   351  	testutil.WaitForResult(func() (bool, error) {
   352  		// Retry the join to decrease flakiness
   353  		TestJoin(t, s1, s2, s3, s4)
   354  		peers, err := s1.numPeers()
   355  		if err != nil {
   356  			return false, err
   357  		}
   358  		if peers != 4 {
   359  			return false, fmt.Errorf("bad: %#v", peers)
   360  		}
   361  		peers, err = s2.numPeers()
   362  		if err != nil {
   363  			return false, err
   364  		}
   365  		if peers != 4 {
   366  			return false, fmt.Errorf("bad: %#v", peers)
   367  		}
   368  		peers, err = s3.numPeers()
   369  		if err != nil {
   370  			return false, err
   371  		}
   372  		if peers != 4 {
   373  			return false, fmt.Errorf("bad: %#v", peers)
   374  		}
   375  		peers, err = s4.numPeers()
   376  		if err != nil {
   377  			return false, err
   378  		}
   379  		if peers != 4 {
   380  			return false, fmt.Errorf("bad: %#v", peers)
   381  		}
   382  
   383  		if len(s1.localPeers) != 4 {
   384  			return false, fmt.Errorf("bad: %#v", s1.localPeers)
   385  		}
   386  		if len(s2.localPeers) != 4 {
   387  			return false, fmt.Errorf("bad: %#v", s2.localPeers)
   388  		}
   389  		if len(s3.localPeers) != 4 {
   390  			return false, fmt.Errorf("bad: %#v", s3.localPeers)
   391  		}
   392  		if len(s4.localPeers) != 4 {
   393  			return false, fmt.Errorf("bad: %#v", s3.localPeers)
   394  		}
   395  
   396  		_, p := s1.getLeader()
   397  		if p == nil {
   398  			return false, fmt.Errorf("no leader")
   399  		}
   400  		return true, nil
   401  	}, func(err error) {
   402  		t.Fatalf("err: %v", err)
   403  	})
   404  
   405  }
   406  
   407  func TestNomad_BadExpect(t *testing.T) {
   408  	t.Parallel()
   409  
   410  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   411  		c.BootstrapExpect = 2
   412  	})
   413  	defer cleanupS1()
   414  	s2, cleanupS2 := TestServer(t, func(c *Config) {
   415  		c.BootstrapExpect = 3
   416  	})
   417  	defer cleanupS2()
   418  	servers := []*Server{s1, s2}
   419  	TestJoin(t, s1, s2)
   420  
   421  	// Serf members should update
   422  	testutil.WaitForResult(func() (bool, error) {
   423  		for _, s := range servers {
   424  			members := s.Members()
   425  			if len(members) != 2 {
   426  				return false, fmt.Errorf("%d", len(members))
   427  			}
   428  		}
   429  		return true, nil
   430  	}, func(err error) {
   431  		t.Fatalf("should have 2 peers: %v", err)
   432  	})
   433  
   434  	// should still have no peers (because s2 is in expect=2 mode)
   435  	testutil.WaitForResult(func() (bool, error) {
   436  		for _, s := range servers {
   437  			p, _ := s.numPeers()
   438  			if p != 0 {
   439  				return false, fmt.Errorf("%d", p)
   440  			}
   441  		}
   442  		return true, nil
   443  	}, func(err error) {
   444  		t.Fatalf("should have 0 peers: %v", err)
   445  	})
   446  }
   447  
   448  // TestNomad_NonBootstraping_ShouldntBootstap asserts that if BootstrapExpect is zero,
   449  // the server shouldn't bootstrap
   450  func TestNomad_NonBootstraping_ShouldntBootstap(t *testing.T) {
   451  	t.Parallel()
   452  
   453  	dir := tmpDir(t)
   454  	defer os.RemoveAll(dir)
   455  
   456  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   457  		c.BootstrapExpect = 0
   458  		c.DevMode = false
   459  		c.DataDir = path.Join(dir, "node")
   460  	})
   461  	defer cleanupS1()
   462  
   463  	testutil.WaitForResult(func() (bool, error) {
   464  		s1.peerLock.Lock()
   465  		p := len(s1.localPeers)
   466  		s1.peerLock.Unlock()
   467  		if p != 1 {
   468  			return false, fmt.Errorf("%d", p)
   469  		}
   470  
   471  		return true, nil
   472  	}, func(err error) {
   473  		t.Fatalf("expected 1 local peer: %v", err)
   474  	})
   475  
   476  	// as non-bootstrap mode is the initial state, we must wait long enough to assert that
   477  	// we don't bootstrap even if enough time has elapsed.  Also, explicitly attempt bootstrap.
   478  	s1.maybeBootstrap()
   479  	time.Sleep(100 * time.Millisecond)
   480  
   481  	bootstrapped := atomic.LoadInt32(&s1.config.Bootstrapped)
   482  	require.Zero(t, bootstrapped, "expecting non-bootstrapped servers")
   483  
   484  	p, _ := s1.numPeers()
   485  	require.Zero(t, p, "number of peers in Raft")
   486  
   487  }