github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/nomad/serf_test.go (about) 1 package nomad 2 3 import ( 4 "errors" 5 "fmt" 6 "os" 7 "path" 8 "strings" 9 "sync/atomic" 10 "testing" 11 "time" 12 13 "github.com/hashicorp/nomad/testutil" 14 "github.com/hashicorp/serf/serf" 15 "github.com/stretchr/testify/require" 16 ) 17 18 func TestNomad_JoinPeer(t *testing.T) { 19 t.Parallel() 20 21 s1, cleanupS1 := TestServer(t, nil) 22 defer cleanupS1() 23 s2, cleanupS2 := TestServer(t, func(c *Config) { 24 c.Region = "region2" 25 }) 26 defer cleanupS2() 27 TestJoin(t, s1, s2) 28 29 testutil.WaitForResult(func() (bool, error) { 30 if members := s1.Members(); len(members) != 2 { 31 return false, fmt.Errorf("bad: %#v", members) 32 } 33 if members := s2.Members(); len(members) != 2 { 34 return false, fmt.Errorf("bad: %#v", members) 35 } 36 return true, nil 37 }, func(err error) { 38 t.Fatalf("err: %v", err) 39 }) 40 41 testutil.WaitForResult(func() (bool, error) { 42 if len(s1.peers) != 2 { 43 return false, fmt.Errorf("bad: %#v", s1.peers) 44 } 45 if len(s2.peers) != 2 { 46 return false, fmt.Errorf("bad: %#v", s2.peers) 47 } 48 if len(s1.localPeers) != 1 { 49 return false, fmt.Errorf("bad: %#v", s1.localPeers) 50 } 51 if len(s2.localPeers) != 1 { 52 return false, fmt.Errorf("bad: %#v", s2.localPeers) 53 } 54 return true, nil 55 }, func(err error) { 56 t.Fatalf("err: %v", err) 57 }) 58 } 59 60 func TestNomad_RemovePeer(t *testing.T) { 61 t.Parallel() 62 63 s1, cleanupS1 := TestServer(t, nil) 64 defer cleanupS1() 65 s2, cleanupS2 := TestServer(t, func(c *Config) { 66 c.Region = "global" 67 }) 68 defer cleanupS2() 69 TestJoin(t, s1, s2) 70 71 testutil.WaitForResult(func() (bool, error) { 72 if members := s1.Members(); len(members) != 2 { 73 return false, fmt.Errorf("bad: %#v", members) 74 } 75 if members := s2.Members(); len(members) != 2 { 76 return false, fmt.Errorf("bad: %#v", members) 77 } 78 return true, nil 79 }, func(err error) { 80 t.Fatalf("err: %v", err) 81 }) 82 83 // Leave immediately 84 s2.Leave() 85 s2.Shutdown() 86 87 testutil.WaitForResult(func() (bool, error) { 88 if len(s1.peers) != 1 { 89 return false, fmt.Errorf("bad: %#v", s1.peers) 90 } 91 if len(s2.peers) != 1 { 92 return false, fmt.Errorf("bad: %#v", s2.peers) 93 } 94 return true, nil 95 }, func(err error) { 96 t.Fatalf("err: %v", err) 97 }) 98 } 99 100 func TestNomad_ReapPeer(t *testing.T) { 101 t.Parallel() 102 103 dir := tmpDir(t) 104 defer os.RemoveAll(dir) 105 106 s1, cleanupS1 := TestServer(t, func(c *Config) { 107 c.NodeName = "node1" 108 c.BootstrapExpect = 3 109 c.DevMode = false 110 c.DataDir = path.Join(dir, "node1") 111 }) 112 defer cleanupS1() 113 s2, cleanupS2 := TestServer(t, func(c *Config) { 114 c.NodeName = "node2" 115 c.BootstrapExpect = 3 116 c.DevMode = false 117 c.DataDir = path.Join(dir, "node2") 118 }) 119 defer cleanupS2() 120 s3, cleanupS3 := TestServer(t, func(c *Config) { 121 c.NodeName = "node3" 122 c.BootstrapExpect = 3 123 c.DevMode = false 124 c.DataDir = path.Join(dir, "node3") 125 }) 126 defer cleanupS3() 127 TestJoin(t, s1, s2, s3) 128 129 testutil.WaitForResult(func() (bool, error) { 130 // Retry the join to decrease flakiness 131 TestJoin(t, s1, s2, s3) 132 if members := s1.Members(); len(members) != 3 { 133 return false, fmt.Errorf("bad s1: %#v", members) 134 } 135 if members := s2.Members(); len(members) != 3 { 136 return false, fmt.Errorf("bad s2: %#v", members) 137 } 138 if members := s3.Members(); len(members) != 3 { 139 return false, fmt.Errorf("bad s3: %#v", members) 140 } 141 return true, nil 142 }, func(err error) { 143 t.Fatalf("err: %v", err) 144 }) 145 146 testutil.WaitForLeader(t, s1.RPC) 147 148 // Simulate a reap 149 mems := s1.Members() 150 var s2mem serf.Member 151 for _, m := range mems { 152 if strings.Contains(m.Name, s2.config.NodeName) { 153 s2mem = m 154 s2mem.Status = StatusReap 155 break 156 } 157 } 158 159 // Shutdown and then send the reap 160 s2.Shutdown() 161 s1.reconcileCh <- s2mem 162 s2.reconcileCh <- s2mem 163 s3.reconcileCh <- s2mem 164 165 testutil.WaitForResult(func() (bool, error) { 166 if len(s1.peers["global"]) != 2 { 167 return false, fmt.Errorf("bad: %#v", s1.peers["global"]) 168 } 169 peers, err := s1.numPeers() 170 if err != nil { 171 return false, fmt.Errorf("numPeers() failed: %v", err) 172 } 173 if peers != 2 { 174 return false, fmt.Errorf("bad: %#v", peers) 175 } 176 177 if len(s3.peers["global"]) != 2 { 178 return false, fmt.Errorf("bad: %#v", s1.peers["global"]) 179 } 180 peers, err = s3.numPeers() 181 if err != nil { 182 return false, fmt.Errorf("numPeers() failed: %v", err) 183 } 184 if peers != 2 { 185 return false, fmt.Errorf("bad: %#v", peers) 186 } 187 return true, nil 188 }, func(err error) { 189 t.Fatalf("err: %v", err) 190 }) 191 } 192 193 func TestNomad_BootstrapExpect(t *testing.T) { 194 t.Parallel() 195 196 dir := tmpDir(t) 197 defer os.RemoveAll(dir) 198 199 s1, cleanupS1 := TestServer(t, func(c *Config) { 200 c.BootstrapExpect = 3 201 c.DevMode = false 202 c.DataDir = path.Join(dir, "node1") 203 }) 204 defer cleanupS1() 205 s2, cleanupS2 := TestServer(t, func(c *Config) { 206 c.BootstrapExpect = 3 207 c.DevMode = false 208 c.DataDir = path.Join(dir, "node2") 209 }) 210 defer cleanupS2() 211 s3, cleanupS3 := TestServer(t, func(c *Config) { 212 c.BootstrapExpect = 3 213 c.DevMode = false 214 c.DataDir = path.Join(dir, "node3") 215 }) 216 defer cleanupS3() 217 TestJoin(t, s1, s2, s3) 218 219 testutil.WaitForResult(func() (bool, error) { 220 // Retry the join to decrease flakiness 221 TestJoin(t, s1, s2, s3) 222 peers, err := s1.numPeers() 223 if err != nil { 224 return false, err 225 } 226 if peers != 3 { 227 return false, fmt.Errorf("bad: %#v", peers) 228 } 229 peers, err = s2.numPeers() 230 if err != nil { 231 return false, err 232 } 233 if peers != 3 { 234 return false, fmt.Errorf("bad: %#v", peers) 235 } 236 peers, err = s3.numPeers() 237 if err != nil { 238 return false, err 239 } 240 if peers != 3 { 241 return false, fmt.Errorf("bad: %#v", peers) 242 } 243 if len(s1.localPeers) != 3 { 244 return false, fmt.Errorf("bad: %#v", s1.localPeers) 245 } 246 if len(s2.localPeers) != 3 { 247 return false, fmt.Errorf("bad: %#v", s2.localPeers) 248 } 249 if len(s3.localPeers) != 3 { 250 return false, fmt.Errorf("bad: %#v", s3.localPeers) 251 } 252 return true, nil 253 }, func(err error) { 254 t.Fatalf("err: %v", err) 255 }) 256 257 // Join a fourth server after quorum has already been formed and ensure 258 // there is no election 259 s4, cleanupS4 := TestServer(t, func(c *Config) { 260 c.BootstrapExpect = 3 261 c.DevMode = false 262 c.DataDir = path.Join(dir, "node4") 263 }) 264 defer cleanupS4() 265 266 // Make sure a leader is elected, grab the current term and then add in 267 // the fourth server. 268 testutil.WaitForLeader(t, s1.RPC) 269 termBefore := s1.raft.Stats()["last_log_term"] 270 271 var addresses []string 272 for _, s := range []*Server{s1, s2, s3} { 273 addr := fmt.Sprintf("127.0.0.1:%d", s.config.SerfConfig.MemberlistConfig.BindPort) 274 addresses = append(addresses, addr) 275 } 276 if _, err := s4.Join(addresses); err != nil { 277 t.Fatalf("err: %v", err) 278 } 279 280 // Wait for the new server to see itself added to the cluster. 281 var p4 int 282 testutil.WaitForResult(func() (bool, error) { 283 // Retry join to reduce flakiness 284 if _, err := s4.Join(addresses); err != nil { 285 t.Fatalf("err: %v", err) 286 } 287 p4, _ = s4.numPeers() 288 return p4 == 4, errors.New(fmt.Sprintf("%d", p4)) 289 }, func(err error) { 290 t.Fatalf("should have 4 peers: %v", err) 291 }) 292 293 // Make sure there's still a leader and that the term didn't change, 294 // so we know an election didn't occur. 295 testutil.WaitForLeader(t, s1.RPC) 296 termAfter := s1.raft.Stats()["last_log_term"] 297 if termAfter != termBefore { 298 t.Fatalf("looks like an election took place") 299 } 300 } 301 302 func TestNomad_BootstrapExpect_NonVoter(t *testing.T) { 303 t.Parallel() 304 305 dir := tmpDir(t) 306 defer os.RemoveAll(dir) 307 308 s1, cleanupS1 := TestServer(t, func(c *Config) { 309 c.BootstrapExpect = 2 310 c.DevMode = false 311 c.DataDir = path.Join(dir, "node1") 312 c.NonVoter = true 313 }) 314 defer cleanupS1() 315 s2, cleanupS2 := TestServer(t, func(c *Config) { 316 c.BootstrapExpect = 2 317 c.DevMode = false 318 c.DataDir = path.Join(dir, "node2") 319 c.NonVoter = true 320 }) 321 defer cleanupS2() 322 s3, cleanupS3 := TestServer(t, func(c *Config) { 323 c.BootstrapExpect = 2 324 c.DevMode = false 325 c.DataDir = path.Join(dir, "node3") 326 }) 327 defer cleanupS3() 328 TestJoin(t, s1, s2, s3) 329 330 // Assert that we do not bootstrap 331 testutil.AssertUntil(testutil.Timeout(time.Second), func() (bool, error) { 332 _, p := s1.getLeader() 333 if p != nil { 334 return false, fmt.Errorf("leader %v", p) 335 } 336 337 return true, nil 338 }, func(err error) { 339 t.Fatalf("should not have leader: %v", err) 340 }) 341 342 // Add the fourth server that is a voter 343 s4, cleanupS4 := TestServer(t, func(c *Config) { 344 c.BootstrapExpect = 2 345 c.DevMode = false 346 c.DataDir = path.Join(dir, "node4") 347 }) 348 defer cleanupS4() 349 TestJoin(t, s1, s2, s3, s4) 350 351 testutil.WaitForResult(func() (bool, error) { 352 // Retry the join to decrease flakiness 353 TestJoin(t, s1, s2, s3, s4) 354 peers, err := s1.numPeers() 355 if err != nil { 356 return false, err 357 } 358 if peers != 4 { 359 return false, fmt.Errorf("bad: %#v", peers) 360 } 361 peers, err = s2.numPeers() 362 if err != nil { 363 return false, err 364 } 365 if peers != 4 { 366 return false, fmt.Errorf("bad: %#v", peers) 367 } 368 peers, err = s3.numPeers() 369 if err != nil { 370 return false, err 371 } 372 if peers != 4 { 373 return false, fmt.Errorf("bad: %#v", peers) 374 } 375 peers, err = s4.numPeers() 376 if err != nil { 377 return false, err 378 } 379 if peers != 4 { 380 return false, fmt.Errorf("bad: %#v", peers) 381 } 382 383 if len(s1.localPeers) != 4 { 384 return false, fmt.Errorf("bad: %#v", s1.localPeers) 385 } 386 if len(s2.localPeers) != 4 { 387 return false, fmt.Errorf("bad: %#v", s2.localPeers) 388 } 389 if len(s3.localPeers) != 4 { 390 return false, fmt.Errorf("bad: %#v", s3.localPeers) 391 } 392 if len(s4.localPeers) != 4 { 393 return false, fmt.Errorf("bad: %#v", s3.localPeers) 394 } 395 396 _, p := s1.getLeader() 397 if p == nil { 398 return false, fmt.Errorf("no leader") 399 } 400 return true, nil 401 }, func(err error) { 402 t.Fatalf("err: %v", err) 403 }) 404 405 } 406 407 func TestNomad_BadExpect(t *testing.T) { 408 t.Parallel() 409 410 s1, cleanupS1 := TestServer(t, func(c *Config) { 411 c.BootstrapExpect = 2 412 }) 413 defer cleanupS1() 414 s2, cleanupS2 := TestServer(t, func(c *Config) { 415 c.BootstrapExpect = 3 416 }) 417 defer cleanupS2() 418 servers := []*Server{s1, s2} 419 TestJoin(t, s1, s2) 420 421 // Serf members should update 422 testutil.WaitForResult(func() (bool, error) { 423 for _, s := range servers { 424 members := s.Members() 425 if len(members) != 2 { 426 return false, fmt.Errorf("%d", len(members)) 427 } 428 } 429 return true, nil 430 }, func(err error) { 431 t.Fatalf("should have 2 peers: %v", err) 432 }) 433 434 // should still have no peers (because s2 is in expect=2 mode) 435 testutil.WaitForResult(func() (bool, error) { 436 for _, s := range servers { 437 p, _ := s.numPeers() 438 if p != 0 { 439 return false, fmt.Errorf("%d", p) 440 } 441 } 442 return true, nil 443 }, func(err error) { 444 t.Fatalf("should have 0 peers: %v", err) 445 }) 446 } 447 448 // TestNomad_NonBootstraping_ShouldntBootstap asserts that if BootstrapExpect is zero, 449 // the server shouldn't bootstrap 450 func TestNomad_NonBootstraping_ShouldntBootstap(t *testing.T) { 451 t.Parallel() 452 453 dir := tmpDir(t) 454 defer os.RemoveAll(dir) 455 456 s1, cleanupS1 := TestServer(t, func(c *Config) { 457 c.BootstrapExpect = 0 458 c.DevMode = false 459 c.DataDir = path.Join(dir, "node") 460 }) 461 defer cleanupS1() 462 463 testutil.WaitForResult(func() (bool, error) { 464 s1.peerLock.Lock() 465 p := len(s1.localPeers) 466 s1.peerLock.Unlock() 467 if p != 1 { 468 return false, fmt.Errorf("%d", p) 469 } 470 471 return true, nil 472 }, func(err error) { 473 t.Fatalf("expected 1 local peer: %v", err) 474 }) 475 476 // as non-bootstrap mode is the initial state, we must wait long enough to assert that 477 // we don't bootstrap even if enough time has elapsed. Also, explicitly attempt bootstrap. 478 s1.maybeBootstrap() 479 time.Sleep(100 * time.Millisecond) 480 481 bootstrapped := atomic.LoadInt32(&s1.config.Bootstrapped) 482 require.Zero(t, bootstrapped, "expecting non-bootstrapped servers") 483 484 p, _ := s1.numPeers() 485 require.Zero(t, p, "number of peers in Raft") 486 487 }