github.com/altoros/juju-vmware@v0.0.0-20150312064031-f19ae857ccca/worker/peergrouper/worker_test.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package peergrouper 5 6 import ( 7 "errors" 8 "fmt" 9 "time" 10 11 jc "github.com/juju/testing/checkers" 12 "github.com/juju/utils/voyeur" 13 gc "gopkg.in/check.v1" 14 15 "github.com/juju/juju/instance" 16 "github.com/juju/juju/network" 17 coretesting "github.com/juju/juju/testing" 18 "github.com/juju/juju/worker" 19 ) 20 21 type TestIPVersion struct { 22 version string 23 formatHostPort string 24 formatHost string 25 machineFormatHost string 26 extraHostPort string 27 extraHost string 28 extraAddress string 29 addressType network.AddressType 30 } 31 32 var ( 33 testIPv4 = TestIPVersion{ 34 version: "IPv4", 35 formatHostPort: "0.1.2.%d:%d", 36 formatHost: "0.1.2.%d", 37 machineFormatHost: "0.1.2.%d", 38 extraHostPort: "0.1.99.99:9876", 39 extraHost: "0.1.99.13", 40 extraAddress: "0.1.99.13:1234", 41 addressType: network.IPv4Address, 42 } 43 testIPv6 = TestIPVersion{ 44 version: "IPv6", 45 formatHostPort: "[2001:DB8::%d]:%d", 46 formatHost: "[2001:DB8::%d]", 47 machineFormatHost: "2001:DB8::%d", 48 extraHostPort: "[2001:DB8::99:99]:9876", 49 extraHost: "2001:DB8::99:13", 50 extraAddress: "[2001:DB8::99:13]:1234", 51 addressType: network.IPv6Address, 52 } 53 ) 54 55 // DoTestForIPv4AndIPv6 runs the passed test for IPv4 and IPv6. 56 func DoTestForIPv4AndIPv6(t func(ipVersion TestIPVersion)) { 57 t(testIPv4) 58 t(testIPv6) 59 } 60 61 type workerSuite struct { 62 coretesting.BaseSuite 63 } 64 65 var _ = gc.Suite(&workerSuite{}) 66 67 func (s *workerSuite) SetUpTest(c *gc.C) { 68 s.BaseSuite.SetUpTest(c) 69 resetErrors() 70 } 71 72 // InitState initializes the fake state with a single 73 // replicaset member and numMachines machines 74 // primed to vote. 75 func InitState(c *gc.C, st *fakeState, numMachines int, ipVersion TestIPVersion) { 76 var ids []string 77 for i := 10; i < 10+numMachines; i++ { 78 id := fmt.Sprint(i) 79 m := st.addMachine(id, true) 80 m.setInstanceId(instance.Id("id-" + id)) 81 m.setStateHostPort(fmt.Sprintf(ipVersion.formatHostPort, i, mongoPort)) 82 ids = append(ids, id) 83 c.Assert(m.MongoHostPorts(), gc.HasLen, 1) 84 85 m.setAPIHostPorts(addressesWithPort(apiPort, fmt.Sprintf(ipVersion.formatHost, i))) 86 } 87 st.machine("10").SetHasVote(true) 88 st.setStateServers(ids...) 89 st.session.Set(mkMembers("0v", ipVersion)) 90 st.session.setStatus(mkStatuses("0p", ipVersion)) 91 st.check = checkInvariants 92 } 93 94 // ExpectedAPIHostPorts returns the expected addresses 95 // of the machines as created by InitState. 96 func ExpectedAPIHostPorts(n int, ipVersion TestIPVersion) [][]network.HostPort { 97 servers := make([][]network.HostPort, n) 98 for i := range servers { 99 servers[i] = []network.HostPort{{ 100 Address: network.NewAddress(fmt.Sprintf(ipVersion.formatHost, i+10), network.ScopeUnknown), 101 Port: apiPort, 102 }} 103 } 104 return servers 105 } 106 107 func addressesWithPort(port int, addrs ...string) []network.HostPort { 108 return network.AddressesWithPort(network.NewAddresses(addrs...), port) 109 } 110 111 func (s *workerSuite) TestSetsAndUpdatesMembers(c *gc.C) { 112 DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) { 113 s.PatchValue(&pollInterval, 5*time.Millisecond) 114 115 st := NewFakeState() 116 InitState(c, st, 3, ipVersion) 117 118 memberWatcher := st.session.members.Watch() 119 mustNext(c, memberWatcher) 120 assertMembers(c, memberWatcher.Value(), mkMembers("0v", ipVersion)) 121 122 logger.Infof("starting worker") 123 w := newWorker(st, noPublisher{}) 124 defer func() { 125 c.Check(worker.Stop(w), gc.IsNil) 126 }() 127 128 // Wait for the worker to set the initial members. 129 mustNext(c, memberWatcher) 130 assertMembers(c, memberWatcher.Value(), mkMembers("0v 1 2", ipVersion)) 131 132 // Update the status of the new members 133 // and check that they become voting. 134 c.Logf("updating new member status") 135 st.session.setStatus(mkStatuses("0p 1s 2s", ipVersion)) 136 mustNext(c, memberWatcher) 137 assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v", ipVersion)) 138 139 c.Logf("adding another machine") 140 // Add another machine. 141 m13 := st.addMachine("13", false) 142 m13.setStateHostPort(fmt.Sprintf(ipVersion.formatHostPort, 13, mongoPort)) 143 st.setStateServers("10", "11", "12", "13") 144 145 c.Logf("waiting for new member to be added") 146 mustNext(c, memberWatcher) 147 assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v 3", ipVersion)) 148 149 // Remove vote from an existing member; 150 // and give it to the new machine. 151 // Also set the status of the new machine to 152 // healthy. 153 c.Logf("removing vote from machine 10 and adding it to machine 13") 154 st.machine("10").setWantsVote(false) 155 st.machine("13").setWantsVote(true) 156 157 st.session.setStatus(mkStatuses("0p 1s 2s 3s", ipVersion)) 158 159 // Check that the new machine gets the vote and the 160 // old machine loses it. 161 c.Logf("waiting for vote switch") 162 mustNext(c, memberWatcher) 163 assertMembers(c, memberWatcher.Value(), mkMembers("0 1v 2v 3v", ipVersion)) 164 165 c.Logf("removing old machine") 166 // Remove the old machine. 167 st.removeMachine("10") 168 st.setStateServers("11", "12", "13") 169 170 // Check that it's removed from the members. 171 c.Logf("waiting for removal") 172 mustNext(c, memberWatcher) 173 assertMembers(c, memberWatcher.Value(), mkMembers("1v 2v 3v", ipVersion)) 174 }) 175 } 176 177 func (s *workerSuite) TestHasVoteMaintainedEvenWhenReplicaSetFails(c *gc.C) { 178 DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) { 179 st := NewFakeState() 180 181 // Simulate a state where we have four state servers, 182 // one has gone down, and we're replacing it: 183 // 0 - hasvote true, wantsvote false, down 184 // 1 - hasvote true, wantsvote true 185 // 2 - hasvote true, wantsvote true 186 // 3 - hasvote false, wantsvote true 187 // 188 // When it starts, the worker should move the vote from 189 // 0 to 3. We'll arrange things so that it will succeed in 190 // setting the membership but fail setting the HasVote 191 // to false. 192 InitState(c, st, 4, ipVersion) 193 st.machine("10").SetHasVote(true) 194 st.machine("11").SetHasVote(true) 195 st.machine("12").SetHasVote(true) 196 st.machine("13").SetHasVote(false) 197 198 st.machine("10").setWantsVote(false) 199 st.machine("11").setWantsVote(true) 200 st.machine("12").setWantsVote(true) 201 st.machine("13").setWantsVote(true) 202 203 st.session.Set(mkMembers("0v 1v 2v 3", ipVersion)) 204 st.session.setStatus(mkStatuses("0H 1p 2s 3s", ipVersion)) 205 206 // Make the worker fail to set HasVote to false 207 // after changing the replica set membership. 208 setErrorFor("Machine.SetHasVote * false", errors.New("frood")) 209 210 memberWatcher := st.session.members.Watch() 211 mustNext(c, memberWatcher) 212 assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v 3", ipVersion)) 213 214 w := newWorker(st, noPublisher{}) 215 done := make(chan error) 216 go func() { 217 done <- w.Wait() 218 }() 219 220 // Wait for the worker to set the initial members. 221 mustNext(c, memberWatcher) 222 assertMembers(c, memberWatcher.Value(), mkMembers("0 1v 2v 3v", ipVersion)) 223 224 // The worker should encounter an error setting the 225 // has-vote status to false and exit. 226 select { 227 case err := <-done: 228 c.Assert(err, gc.ErrorMatches, `cannot set voting status of "[0-9]+" to false: frood`) 229 case <-time.After(coretesting.LongWait): 230 c.Fatalf("timed out waiting for worker to exit") 231 } 232 233 // Start the worker again - although the membership should 234 // not change, the HasVote status should be updated correctly. 235 resetErrors() 236 w = newWorker(st, noPublisher{}) 237 238 // Watch all the machines for changes, so we can check 239 // their has-vote status without polling. 240 changed := make(chan struct{}, 1) 241 for i := 10; i < 14; i++ { 242 watcher := st.machine(fmt.Sprint(i)).val.Watch() 243 defer watcher.Close() 244 go func() { 245 for watcher.Next() { 246 select { 247 case changed <- struct{}{}: 248 default: 249 } 250 } 251 }() 252 } 253 timeout := time.After(coretesting.LongWait) 254 loop: 255 for { 256 select { 257 case <-changed: 258 correct := true 259 for i := 10; i < 14; i++ { 260 hasVote := st.machine(fmt.Sprint(i)).HasVote() 261 expectHasVote := i != 10 262 if hasVote != expectHasVote { 263 correct = false 264 } 265 } 266 if correct { 267 break loop 268 } 269 case <-timeout: 270 c.Fatalf("timed out waiting for vote to be set") 271 } 272 } 273 }) 274 } 275 276 func (s *workerSuite) TestAddressChange(c *gc.C) { 277 DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) { 278 st := NewFakeState() 279 InitState(c, st, 3, ipVersion) 280 281 memberWatcher := st.session.members.Watch() 282 mustNext(c, memberWatcher) 283 assertMembers(c, memberWatcher.Value(), mkMembers("0v", ipVersion)) 284 285 logger.Infof("starting worker") 286 w := newWorker(st, noPublisher{}) 287 defer func() { 288 c.Check(worker.Stop(w), gc.IsNil) 289 }() 290 291 // Wait for the worker to set the initial members. 292 mustNext(c, memberWatcher) 293 assertMembers(c, memberWatcher.Value(), mkMembers("0v 1 2", ipVersion)) 294 295 // Change an address and wait for it to be changed in the 296 // members. 297 st.machine("11").setStateHostPort(ipVersion.extraHostPort) 298 299 mustNext(c, memberWatcher) 300 expectMembers := mkMembers("0v 1 2", ipVersion) 301 expectMembers[1].Address = ipVersion.extraHostPort 302 assertMembers(c, memberWatcher.Value(), expectMembers) 303 resetErrors() 304 }) 305 } 306 307 var fatalErrorsTests = []struct { 308 errPattern string 309 err error 310 expectErr string 311 }{{ 312 errPattern: "State.StateServerInfo", 313 expectErr: "cannot get state server info: sample", 314 }, { 315 errPattern: "Machine.SetHasVote 11 true", 316 expectErr: `cannot set voting status of "11" to true: sample`, 317 }, { 318 errPattern: "Session.CurrentStatus", 319 expectErr: "cannot get replica set status: sample", 320 }, { 321 errPattern: "Session.CurrentMembers", 322 expectErr: "cannot get replica set members: sample", 323 }, { 324 errPattern: "State.Machine *", 325 expectErr: `cannot get machine "10": sample`, 326 }, { 327 errPattern: "Machine.InstanceId *", 328 expectErr: `cannot get API server info: sample`, 329 }} 330 331 func (s *workerSuite) TestFatalErrors(c *gc.C) { 332 DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) { 333 s.PatchValue(&pollInterval, 5*time.Millisecond) 334 for i, testCase := range fatalErrorsTests { 335 c.Logf("test %d: %s -> %s", i, testCase.errPattern, testCase.expectErr) 336 resetErrors() 337 st := NewFakeState() 338 st.session.InstantlyReady = true 339 InitState(c, st, 3, ipVersion) 340 setErrorFor(testCase.errPattern, errors.New("sample")) 341 w := newWorker(st, noPublisher{}) 342 done := make(chan error) 343 go func() { 344 done <- w.Wait() 345 }() 346 select { 347 case err := <-done: 348 c.Assert(err, gc.ErrorMatches, testCase.expectErr) 349 case <-time.After(coretesting.LongWait): 350 c.Fatalf("timed out waiting for error") 351 } 352 } 353 }) 354 } 355 356 func (s *workerSuite) TestSetMembersErrorIsNotFatal(c *gc.C) { 357 DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) { 358 st := NewFakeState() 359 InitState(c, st, 3, ipVersion) 360 st.session.setStatus(mkStatuses("0p 1s 2s", ipVersion)) 361 var setCount voyeur.Value 362 setErrorFuncFor("Session.Set", func() error { 363 setCount.Set(true) 364 return errors.New("sample") 365 }) 366 s.PatchValue(&initialRetryInterval, 10*time.Microsecond) 367 s.PatchValue(&maxRetryInterval, coretesting.ShortWait/4) 368 369 w := newWorker(st, noPublisher{}) 370 defer func() { 371 c.Check(worker.Stop(w), gc.IsNil) 372 }() 373 374 // See that the worker is retrying. 375 setCountW := setCount.Watch() 376 mustNext(c, setCountW) 377 mustNext(c, setCountW) 378 mustNext(c, setCountW) 379 380 resetErrors() 381 }) 382 } 383 384 type PublisherFunc func(apiServers [][]network.HostPort, instanceIds []instance.Id) error 385 386 func (f PublisherFunc) publishAPIServers(apiServers [][]network.HostPort, instanceIds []instance.Id) error { 387 return f(apiServers, instanceIds) 388 } 389 390 func (s *workerSuite) TestStateServersArePublished(c *gc.C) { 391 DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) { 392 publishCh := make(chan [][]network.HostPort) 393 publish := func(apiServers [][]network.HostPort, instanceIds []instance.Id) error { 394 publishCh <- apiServers 395 return nil 396 } 397 398 st := NewFakeState() 399 InitState(c, st, 3, ipVersion) 400 w := newWorker(st, PublisherFunc(publish)) 401 defer func() { 402 c.Check(worker.Stop(w), gc.IsNil) 403 }() 404 select { 405 case servers := <-publishCh: 406 AssertAPIHostPorts(c, servers, ExpectedAPIHostPorts(3, ipVersion)) 407 case <-time.After(coretesting.LongWait): 408 c.Fatalf("timed out waiting for publish") 409 } 410 411 // Change one of the servers' API addresses and check that it's published. 412 var newMachine10APIHostPorts []network.HostPort 413 newMachine10APIHostPorts = addressesWithPort(apiPort, ipVersion.extraHostPort) 414 st.machine("10").setAPIHostPorts(newMachine10APIHostPorts) 415 select { 416 case servers := <-publishCh: 417 expected := ExpectedAPIHostPorts(3, ipVersion) 418 expected[0] = newMachine10APIHostPorts 419 AssertAPIHostPorts(c, servers, expected) 420 case <-time.After(coretesting.LongWait): 421 c.Fatalf("timed out waiting for publish") 422 } 423 }) 424 } 425 426 func (s *workerSuite) TestWorkerRetriesOnPublishError(c *gc.C) { 427 DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) { 428 s.PatchValue(&pollInterval, coretesting.LongWait+time.Second) 429 s.PatchValue(&initialRetryInterval, 5*time.Millisecond) 430 s.PatchValue(&maxRetryInterval, initialRetryInterval) 431 432 publishCh := make(chan [][]network.HostPort, 100) 433 434 count := 0 435 publish := func(apiServers [][]network.HostPort, instanceIds []instance.Id) error { 436 publishCh <- apiServers 437 count++ 438 if count <= 3 { 439 return fmt.Errorf("publish error") 440 } 441 return nil 442 } 443 st := NewFakeState() 444 InitState(c, st, 3, ipVersion) 445 446 w := newWorker(st, PublisherFunc(publish)) 447 defer func() { 448 c.Check(worker.Stop(w), gc.IsNil) 449 }() 450 451 for i := 0; i < 4; i++ { 452 select { 453 case servers := <-publishCh: 454 AssertAPIHostPorts(c, servers, ExpectedAPIHostPorts(3, ipVersion)) 455 case <-time.After(coretesting.LongWait): 456 c.Fatalf("timed out waiting for publish #%d", i) 457 } 458 } 459 select { 460 case <-publishCh: 461 c.Errorf("unexpected publish event") 462 case <-time.After(coretesting.ShortWait): 463 } 464 }) 465 } 466 467 func (s *workerSuite) TestWorkerPublishesInstanceIds(c *gc.C) { 468 DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) { 469 s.PatchValue(&pollInterval, coretesting.LongWait+time.Second) 470 s.PatchValue(&initialRetryInterval, 5*time.Millisecond) 471 s.PatchValue(&maxRetryInterval, initialRetryInterval) 472 473 publishCh := make(chan []instance.Id, 100) 474 475 publish := func(apiServers [][]network.HostPort, instanceIds []instance.Id) error { 476 publishCh <- instanceIds 477 return nil 478 } 479 st := NewFakeState() 480 InitState(c, st, 3, ipVersion) 481 482 w := newWorker(st, PublisherFunc(publish)) 483 defer func() { 484 c.Check(worker.Stop(w), gc.IsNil) 485 }() 486 487 select { 488 case instanceIds := <-publishCh: 489 c.Assert(instanceIds, jc.SameContents, []instance.Id{"id-10", "id-11", "id-12"}) 490 case <-time.After(coretesting.LongWait): 491 c.Errorf("timed out waiting for publish") 492 } 493 }) 494 } 495 496 // mustNext waits for w's value to be set and returns it. 497 func mustNext(c *gc.C, w *voyeur.Watcher) (val interface{}) { 498 type voyeurResult struct { 499 ok bool 500 val interface{} 501 } 502 done := make(chan voyeurResult) 503 go func() { 504 c.Logf("mustNext %p", w) 505 ok := w.Next() 506 val = w.Value() 507 c.Logf("mustNext done %p, ok: %v, val: %#v", w, ok, val) 508 done <- voyeurResult{ok, val} 509 }() 510 select { 511 case result := <-done: 512 c.Assert(result.ok, jc.IsTrue) 513 return result.val 514 case <-time.After(coretesting.LongWait): 515 c.Fatalf("timed out waiting for value to be set") 516 } 517 panic("unreachable") 518 } 519 520 type noPublisher struct{} 521 522 func (noPublisher) publishAPIServers(apiServers [][]network.HostPort, instanceIds []instance.Id) error { 523 return nil 524 }