github.imxd.top/hashicorp/consul@v1.4.5/agent/consul/autopilot_test.go (about) 1 package consul 2 3 import ( 4 "os" 5 "testing" 6 "time" 7 8 "github.com/hashicorp/consul/testrpc" 9 "github.com/hashicorp/consul/testutil/retry" 10 "github.com/hashicorp/raft" 11 "github.com/hashicorp/serf/serf" 12 ) 13 14 func TestAutopilot_IdempotentShutdown(t *testing.T) { 15 dir1, s1 := testServerWithConfig(t, nil) 16 defer os.RemoveAll(dir1) 17 defer s1.Shutdown() 18 retry.Run(t, func(r *retry.R) { r.Check(waitForLeader(s1)) }) 19 20 s1.autopilot.Start() 21 s1.autopilot.Start() 22 s1.autopilot.Start() 23 s1.autopilot.Stop() 24 s1.autopilot.Stop() 25 s1.autopilot.Stop() 26 } 27 28 func TestAutopilot_CleanupDeadServer(t *testing.T) { 29 t.Parallel() 30 for i := 1; i <= 3; i++ { 31 testCleanupDeadServer(t, i) 32 } 33 } 34 35 func testCleanupDeadServer(t *testing.T, raftVersion int) { 36 dc := "dc1" 37 conf := func(c *Config) { 38 c.Datacenter = dc 39 c.Bootstrap = false 40 c.BootstrapExpect = 3 41 c.RaftConfig.ProtocolVersion = raft.ProtocolVersion(raftVersion) 42 } 43 dir1, s1 := testServerWithConfig(t, conf) 44 defer os.RemoveAll(dir1) 45 defer s1.Shutdown() 46 47 dir2, s2 := testServerWithConfig(t, conf) 48 defer os.RemoveAll(dir2) 49 defer s2.Shutdown() 50 51 dir3, s3 := testServerWithConfig(t, conf) 52 defer os.RemoveAll(dir3) 53 defer s3.Shutdown() 54 55 servers := []*Server{s1, s2, s3} 56 57 // Try to join 58 joinLAN(t, s2, s1) 59 joinLAN(t, s3, s1) 60 61 for _, s := range servers { 62 testrpc.WaitForLeader(t, s.RPC, dc) 63 retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) }) 64 } 65 66 // Bring up a new server 67 dir4, s4 := testServerWithConfig(t, conf) 68 defer os.RemoveAll(dir4) 69 defer s4.Shutdown() 70 71 // Kill a non-leader server 72 s3.Shutdown() 73 retry.Run(t, func(r *retry.R) { 74 alive := 0 75 for _, m := range s1.LANMembers() { 76 if m.Status == serf.StatusAlive { 77 alive++ 78 } 79 } 80 if alive != 2 { 81 r.Fatal(nil) 82 } 83 }) 84 85 // Join the new server 86 joinLAN(t, s4, s1) 87 servers[2] = s4 88 89 // Make sure the dead server is removed and we're back to 3 total peers 90 for _, s := range servers { 91 retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) }) 92 } 93 } 94 95 func TestAutopilot_CleanupDeadNonvoter(t *testing.T) { 96 dir1, s1 := testServer(t) 97 defer os.RemoveAll(dir1) 98 defer s1.Shutdown() 99 100 dir2, s2 := testServerDCBootstrap(t, "dc1", false) 101 defer os.RemoveAll(dir2) 102 defer s2.Shutdown() 103 104 testrpc.WaitForLeader(t, s1.RPC, "dc1") 105 106 // Have s2 join and then shut it down immediately before it gets a chance to 107 // be promoted to a voter. 108 joinLAN(t, s2, s1) 109 retry.Run(t, func(r *retry.R) { 110 r.Check(wantRaft([]*Server{s1, s2})) 111 }) 112 s2.Shutdown() 113 114 retry.Run(t, func(r *retry.R) { 115 r.Check(wantRaft([]*Server{s1})) 116 }) 117 } 118 119 func TestAutopilot_CleanupDeadServerPeriodic(t *testing.T) { 120 t.Parallel() 121 dir1, s1 := testServerWithConfig(t, func(c *Config) { 122 c.Datacenter = "dc1" 123 c.Bootstrap = true 124 }) 125 defer os.RemoveAll(dir1) 126 defer s1.Shutdown() 127 128 conf := func(c *Config) { 129 c.Datacenter = "dc1" 130 c.Bootstrap = false 131 } 132 133 dir2, s2 := testServerWithConfig(t, conf) 134 defer os.RemoveAll(dir2) 135 defer s2.Shutdown() 136 137 dir3, s3 := testServerWithConfig(t, conf) 138 defer os.RemoveAll(dir3) 139 defer s3.Shutdown() 140 141 dir4, s4 := testServerWithConfig(t, conf) 142 defer os.RemoveAll(dir4) 143 defer s4.Shutdown() 144 145 dir5, s5 := testServerWithConfig(t, conf) 146 defer os.RemoveAll(dir5) 147 defer s5.Shutdown() 148 149 servers := []*Server{s1, s2, s3, s4, s5} 150 151 // Join the servers to s1, and wait until they are all promoted to 152 // voters. 153 for _, s := range servers[1:] { 154 joinLAN(t, s, s1) 155 } 156 retry.Run(t, func(r *retry.R) { 157 r.Check(wantRaft(servers)) 158 for _, s := range servers { 159 r.Check(wantPeers(s, 5)) 160 } 161 }) 162 163 // Kill a non-leader server 164 s4.Shutdown() 165 166 // Should be removed from the peers automatically 167 servers = []*Server{s1, s2, s3, s5} 168 retry.Run(t, func(r *retry.R) { 169 r.Check(wantRaft(servers)) 170 for _, s := range servers { 171 r.Check(wantPeers(s, 4)) 172 } 173 }) 174 } 175 176 func TestAutopilot_RollingUpdate(t *testing.T) { 177 t.Parallel() 178 dir1, s1 := testServerWithConfig(t, func(c *Config) { 179 c.Datacenter = "dc1" 180 c.Bootstrap = true 181 }) 182 defer os.RemoveAll(dir1) 183 defer s1.Shutdown() 184 185 conf := func(c *Config) { 186 c.Datacenter = "dc1" 187 c.Bootstrap = false 188 } 189 190 dir2, s2 := testServerWithConfig(t, conf) 191 defer os.RemoveAll(dir2) 192 defer s2.Shutdown() 193 194 dir3, s3 := testServerWithConfig(t, conf) 195 defer os.RemoveAll(dir3) 196 defer s3.Shutdown() 197 198 // Join the servers to s1, and wait until they are all promoted to 199 // voters. 200 servers := []*Server{s1, s2, s3} 201 for _, s := range servers[1:] { 202 joinLAN(t, s, s1) 203 } 204 retry.Run(t, func(r *retry.R) { 205 r.Check(wantRaft(servers)) 206 for _, s := range servers { 207 r.Check(wantPeers(s, 3)) 208 } 209 }) 210 211 // Add one more server like we are doing a rolling update. 212 dir4, s4 := testServerWithConfig(t, conf) 213 defer os.RemoveAll(dir4) 214 defer s4.Shutdown() 215 joinLAN(t, s1, s4) 216 servers = append(servers, s4) 217 retry.Run(t, func(r *retry.R) { 218 r.Check(wantRaft(servers)) 219 for _, s := range servers { 220 r.Check(wantPeers(s, 3)) 221 } 222 }) 223 224 // Now kill one of the "old" nodes like we are doing a rolling update. 225 s3.Shutdown() 226 227 isVoter := func() bool { 228 future := s1.raft.GetConfiguration() 229 if err := future.Error(); err != nil { 230 t.Fatalf("err: %v", err) 231 } 232 for _, s := range future.Configuration().Servers { 233 if string(s.ID) == string(s4.config.NodeID) { 234 return s.Suffrage == raft.Voter 235 } 236 } 237 t.Fatalf("didn't find s4") 238 return false 239 } 240 241 // Wait for s4 to stabilize, get promoted to a voter, and for s3 to be 242 // removed. 243 servers = []*Server{s1, s2, s4} 244 retry.Run(t, func(r *retry.R) { 245 r.Check(wantRaft(servers)) 246 for _, s := range servers { 247 r.Check(wantPeers(s, 3)) 248 } 249 if !isVoter() { 250 r.Fatalf("should be a voter") 251 } 252 }) 253 } 254 255 func TestAutopilot_CleanupStaleRaftServer(t *testing.T) { 256 t.Parallel() 257 dir1, s1 := testServerDCBootstrap(t, "dc1", true) 258 defer os.RemoveAll(dir1) 259 defer s1.Shutdown() 260 261 dir2, s2 := testServerDCBootstrap(t, "dc1", false) 262 defer os.RemoveAll(dir2) 263 defer s2.Shutdown() 264 265 dir3, s3 := testServerDCBootstrap(t, "dc1", false) 266 defer os.RemoveAll(dir3) 267 defer s3.Shutdown() 268 269 dir4, s4 := testServerDCBootstrap(t, "dc1", false) 270 defer os.RemoveAll(dir4) 271 defer s4.Shutdown() 272 273 servers := []*Server{s1, s2, s3} 274 275 // Join the servers to s1 276 for _, s := range servers[1:] { 277 joinLAN(t, s, s1) 278 } 279 280 for _, s := range servers { 281 retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) }) 282 } 283 284 testrpc.WaitForLeader(t, s1.RPC, "dc1") 285 286 // Add s4 to peers directly 287 s1.raft.AddVoter(raft.ServerID(s4.config.NodeID), raft.ServerAddress(joinAddrLAN(s4)), 0, 0) 288 289 // Verify we have 4 peers 290 peers, err := s1.numPeers() 291 if err != nil { 292 t.Fatal(err) 293 } 294 if peers != 4 { 295 t.Fatalf("bad: %v", peers) 296 } 297 298 // Wait for s4 to be removed 299 for _, s := range []*Server{s1, s2, s3} { 300 retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) }) 301 } 302 } 303 304 func TestAutopilot_PromoteNonVoter(t *testing.T) { 305 t.Parallel() 306 dir1, s1 := testServerWithConfig(t, func(c *Config) { 307 c.Datacenter = "dc1" 308 c.Bootstrap = true 309 c.RaftConfig.ProtocolVersion = 3 310 c.AutopilotConfig.ServerStabilizationTime = 200 * time.Millisecond 311 c.ServerHealthInterval = 100 * time.Millisecond 312 c.AutopilotInterval = 100 * time.Millisecond 313 }) 314 defer os.RemoveAll(dir1) 315 defer s1.Shutdown() 316 codec := rpcClient(t, s1) 317 defer codec.Close() 318 testrpc.WaitForLeader(t, s1.RPC, "dc1") 319 320 dir2, s2 := testServerWithConfig(t, func(c *Config) { 321 c.Datacenter = "dc1" 322 c.Bootstrap = false 323 c.RaftConfig.ProtocolVersion = 3 324 }) 325 defer os.RemoveAll(dir2) 326 defer s2.Shutdown() 327 joinLAN(t, s2, s1) 328 329 // Make sure we see it as a nonvoter initially. We wait until half 330 // the stabilization period has passed. 331 retry.Run(t, func(r *retry.R) { 332 future := s1.raft.GetConfiguration() 333 if err := future.Error(); err != nil { 334 r.Fatal(err) 335 } 336 337 servers := future.Configuration().Servers 338 if len(servers) != 2 { 339 r.Fatalf("bad: %v", servers) 340 } 341 if servers[1].Suffrage != raft.Nonvoter { 342 r.Fatalf("bad: %v", servers) 343 } 344 health := s1.autopilot.GetServerHealth(string(servers[1].ID)) 345 if health == nil { 346 r.Fatal("nil health") 347 } 348 if !health.Healthy { 349 r.Fatalf("bad: %v", health) 350 } 351 if time.Since(health.StableSince) < s1.config.AutopilotConfig.ServerStabilizationTime/2 { 352 r.Fatal("stable period not elapsed") 353 } 354 }) 355 356 // Make sure it ends up as a voter. 357 retry.Run(t, func(r *retry.R) { 358 future := s1.raft.GetConfiguration() 359 if err := future.Error(); err != nil { 360 r.Fatal(err) 361 } 362 363 servers := future.Configuration().Servers 364 if len(servers) != 2 { 365 r.Fatalf("bad: %v", servers) 366 } 367 if servers[1].Suffrage != raft.Voter { 368 r.Fatalf("bad: %v", servers) 369 } 370 }) 371 }