github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/nomad/autopilot_test.go (about) 1 package nomad 2 3 import ( 4 "testing" 5 "time" 6 7 "fmt" 8 9 "github.com/hashicorp/consul/agent/consul/autopilot" 10 "github.com/hashicorp/consul/testutil/retry" 11 "github.com/hashicorp/nomad/testutil" 12 "github.com/hashicorp/raft" 13 "github.com/hashicorp/serf/serf" 14 ) 15 16 // wantPeers determines whether the server has the given 17 // number of voting raft peers. 18 func wantPeers(s *Server, peers int) error { 19 future := s.raft.GetConfiguration() 20 if err := future.Error(); err != nil { 21 return err 22 } 23 24 n := autopilot.NumPeers(future.Configuration()) 25 if got, want := n, peers; got != want { 26 return fmt.Errorf("got %d peers want %d", got, want) 27 } 28 return nil 29 } 30 31 // wantRaft determines if the servers have all of each other in their 32 // Raft configurations, 33 func wantRaft(servers []*Server) error { 34 // Make sure all the servers are represented in the Raft config, 35 // and that there are no extras. 36 verifyRaft := func(c raft.Configuration) error { 37 want := make(map[raft.ServerID]bool) 38 for _, s := range servers { 39 want[s.config.RaftConfig.LocalID] = true 40 } 41 42 for _, s := range c.Servers { 43 if !want[s.ID] { 44 return fmt.Errorf("don't want %q", s.ID) 45 } 46 delete(want, s.ID) 47 } 48 49 if len(want) > 0 { 50 return fmt.Errorf("didn't find %v", want) 51 } 52 return nil 53 } 54 55 for _, s := range servers { 56 future := s.raft.GetConfiguration() 57 if err := future.Error(); err != nil { 58 return err 59 } 60 if err := verifyRaft(future.Configuration()); err != nil { 61 return err 62 } 63 } 64 return nil 65 } 66 67 func TestAutopilot_CleanupDeadServer(t *testing.T) { 68 t.Parallel() 69 for i := 1; i <= 3; i++ { 70 testCleanupDeadServer(t, i) 71 } 72 } 73 74 func testCleanupDeadServer(t *testing.T, raftVersion int) { 75 conf := func(c *Config) { 76 c.DevDisableBootstrap = true 77 c.BootstrapExpect = 3 78 c.RaftConfig.ProtocolVersion = raft.ProtocolVersion(raftVersion) 79 } 80 s1 := TestServer(t, conf) 81 defer s1.Shutdown() 82 83 s2 := TestServer(t, conf) 84 defer s2.Shutdown() 85 86 s3 := TestServer(t, conf) 87 defer s3.Shutdown() 88 89 servers := []*Server{s1, s2, s3} 90 91 // Try to join 92 TestJoin(t, s1, s2, s3) 93 94 for _, s := range servers { 95 retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) }) 96 } 97 98 // Bring up a new server 99 s4 := TestServer(t, conf) 100 defer s4.Shutdown() 101 102 // Kill a non-leader server 103 s3.Shutdown() 104 retry.Run(t, func(r *retry.R) { 105 alive := 0 106 for _, m := range s1.Members() { 107 if m.Status == serf.StatusAlive { 108 alive++ 109 } 110 } 111 if alive != 2 { 112 r.Fatal(nil) 113 } 114 }) 115 116 // Join the new server 117 TestJoin(t, s1, s4) 118 servers[2] = s4 119 120 // Make sure the dead server is removed and we're back to 3 total peers 121 for _, s := range servers { 122 retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) }) 123 } 124 } 125 126 func TestAutopilot_CleanupDeadServerPeriodic(t *testing.T) { 127 t.Parallel() 128 s1 := TestServer(t, nil) 129 defer s1.Shutdown() 130 131 conf := func(c *Config) { 132 c.DevDisableBootstrap = true 133 } 134 135 s2 := TestServer(t, conf) 136 defer s2.Shutdown() 137 138 s3 := TestServer(t, conf) 139 defer s3.Shutdown() 140 141 s4 := TestServer(t, conf) 142 defer s4.Shutdown() 143 144 s5 := TestServer(t, conf) 145 defer s5.Shutdown() 146 147 servers := []*Server{s1, s2, s3, s4, s5} 148 149 // Join the servers to s1, and wait until they are all promoted to 150 // voters. 151 TestJoin(t, s1, servers[1:]...) 152 retry.Run(t, func(r *retry.R) { 153 r.Check(wantRaft(servers)) 154 for _, s := range servers { 155 r.Check(wantPeers(s, 5)) 156 } 157 }) 158 159 // Kill a non-leader server 160 s4.Shutdown() 161 162 // Should be removed from the peers automatically 163 servers = []*Server{s1, s2, s3, s5} 164 retry.Run(t, func(r *retry.R) { 165 r.Check(wantRaft(servers)) 166 for _, s := range servers { 167 r.Check(wantPeers(s, 4)) 168 } 169 }) 170 } 171 172 func TestAutopilot_RollingUpdate(t *testing.T) { 173 t.Parallel() 174 s1 := TestServer(t, func(c *Config) { 175 c.RaftConfig.ProtocolVersion = 3 176 }) 177 defer s1.Shutdown() 178 179 conf := func(c *Config) { 180 c.DevDisableBootstrap = true 181 c.RaftConfig.ProtocolVersion = 3 182 } 183 184 s2 := TestServer(t, conf) 185 defer s2.Shutdown() 186 187 s3 := TestServer(t, conf) 188 defer s3.Shutdown() 189 190 // Join the servers to s1, and wait until they are all promoted to 191 // voters. 192 servers := []*Server{s1, s2, s3} 193 TestJoin(t, s1, s2, s3) 194 retry.Run(t, func(r *retry.R) { 195 r.Check(wantRaft(servers)) 196 for _, s := range servers { 197 r.Check(wantPeers(s, 3)) 198 } 199 }) 200 201 // Add one more server like we are doing a rolling update. 202 s4 := TestServer(t, conf) 203 defer s4.Shutdown() 204 TestJoin(t, s1, s4) 205 servers = append(servers, s4) 206 retry.Run(t, func(r *retry.R) { 207 r.Check(wantRaft(servers)) 208 for _, s := range servers { 209 r.Check(wantPeers(s, 3)) 210 } 211 }) 212 213 // Now kill one of the "old" nodes like we are doing a rolling update. 214 s3.Shutdown() 215 216 isVoter := func() bool { 217 future := s1.raft.GetConfiguration() 218 if err := future.Error(); err != nil { 219 t.Fatalf("err: %v", err) 220 } 221 for _, s := range future.Configuration().Servers { 222 if string(s.ID) == string(s4.config.NodeID) { 223 return s.Suffrage == raft.Voter 224 } 225 } 226 t.Fatalf("didn't find s4") 227 return false 228 } 229 230 // Wait for s4 to stabilize, get promoted to a voter, and for s3 to be 231 // removed. 232 servers = []*Server{s1, s2, s4} 233 retry.Run(t, func(r *retry.R) { 234 r.Check(wantRaft(servers)) 235 for _, s := range servers { 236 r.Check(wantPeers(s, 3)) 237 } 238 if !isVoter() { 239 r.Fatalf("should be a voter") 240 } 241 }) 242 } 243 244 func TestAutopilot_CleanupStaleRaftServer(t *testing.T) { 245 t.Parallel() 246 s1 := TestServer(t, nil) 247 defer s1.Shutdown() 248 249 conf := func(c *Config) { 250 c.DevDisableBootstrap = true 251 } 252 s2 := TestServer(t, conf) 253 defer s2.Shutdown() 254 255 s3 := TestServer(t, conf) 256 defer s3.Shutdown() 257 258 s4 := TestServer(t, conf) 259 defer s4.Shutdown() 260 261 servers := []*Server{s1, s2, s3} 262 263 // Join the servers to s1 264 TestJoin(t, s1, s2, s3) 265 266 for _, s := range servers { 267 retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) }) 268 } 269 270 testutil.WaitForLeader(t, s1.RPC) 271 272 // Add s4 to peers directly 273 addr := fmt.Sprintf("127.0.0.1:%d", s4.config.RPCAddr.Port) 274 future := s1.raft.AddVoter(raft.ServerID(s4.config.NodeID), raft.ServerAddress(addr), 0, 0) 275 if err := future.Error(); err != nil { 276 t.Fatal(err) 277 } 278 279 // Verify we have 4 peers 280 peers, err := s1.numPeers() 281 if err != nil { 282 t.Fatal(err) 283 } 284 if peers != 4 { 285 t.Fatalf("bad: %v", peers) 286 } 287 288 // Wait for s4 to be removed 289 for _, s := range []*Server{s1, s2, s3} { 290 retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) }) 291 } 292 } 293 294 func TestAutopilot_PromoteNonVoter(t *testing.T) { 295 t.Parallel() 296 s1 := TestServer(t, func(c *Config) { 297 c.RaftConfig.ProtocolVersion = 3 298 }) 299 defer s1.Shutdown() 300 codec := rpcClient(t, s1) 301 defer codec.Close() 302 testutil.WaitForLeader(t, s1.RPC) 303 304 s2 := TestServer(t, func(c *Config) { 305 c.DevDisableBootstrap = true 306 c.RaftConfig.ProtocolVersion = 3 307 }) 308 defer s2.Shutdown() 309 TestJoin(t, s1, s2) 310 311 // Make sure we see it as a nonvoter initially. We wait until half 312 // the stabilization period has passed. 313 retry.Run(t, func(r *retry.R) { 314 future := s1.raft.GetConfiguration() 315 if err := future.Error(); err != nil { 316 r.Fatal(err) 317 } 318 319 servers := future.Configuration().Servers 320 if len(servers) != 2 { 321 r.Fatalf("bad: %v", servers) 322 } 323 if servers[1].Suffrage != raft.Nonvoter { 324 r.Fatalf("bad: %v", servers) 325 } 326 health := s1.autopilot.GetServerHealth(string(servers[1].ID)) 327 if health == nil { 328 r.Fatalf("nil health, %v", s1.autopilot.GetClusterHealth()) 329 } 330 if !health.Healthy { 331 r.Fatalf("bad: %v", health) 332 } 333 if time.Since(health.StableSince) < s1.config.AutopilotConfig.ServerStabilizationTime/2 { 334 r.Fatal("stable period not elapsed") 335 } 336 }) 337 338 // Make sure it ends up as a voter. 339 retry.Run(t, func(r *retry.R) { 340 future := s1.raft.GetConfiguration() 341 if err := future.Error(); err != nil { 342 r.Fatal(err) 343 } 344 345 servers := future.Configuration().Servers 346 if len(servers) != 2 { 347 r.Fatalf("bad: %v", servers) 348 } 349 if servers[1].Suffrage != raft.Voter { 350 r.Fatalf("bad: %v", servers) 351 } 352 }) 353 }