github.com/rogpeppe/juju@v0.0.0-20140613142852-6337964b789e/worker/peergrouper/worker_test.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package peergrouper 5 6 import ( 7 "errors" 8 "fmt" 9 "time" 10 11 jc "github.com/juju/testing/checkers" 12 "github.com/juju/utils/voyeur" 13 gc "launchpad.net/gocheck" 14 15 "github.com/juju/juju/instance" 16 "github.com/juju/juju/juju/testing" 17 "github.com/juju/juju/network" 18 statetesting "github.com/juju/juju/state/testing" 19 coretesting "github.com/juju/juju/testing" 20 "github.com/juju/juju/worker" 21 ) 22 23 type workerJujuConnSuite struct { 24 testing.JujuConnSuite 25 } 26 27 var _ = gc.Suite(&workerJujuConnSuite{}) 28 29 func (s *workerJujuConnSuite) TestStartStop(c *gc.C) { 30 w, err := New(s.State) 31 c.Assert(err, gc.IsNil) 32 err = worker.Stop(w) 33 c.Assert(err, gc.IsNil) 34 } 35 36 func (s *workerJujuConnSuite) TestPublisherSetsAPIHostPorts(c *gc.C) { 37 st := newFakeState() 38 initState(c, st, 3) 39 40 watcher := s.State.WatchAPIHostPorts() 41 cwatch := statetesting.NewNotifyWatcherC(c, s.State, watcher) 42 cwatch.AssertOneChange() 43 44 statePublish := newPublisher(s.State) 45 46 // Wrap the publisher so that we can call StartSync immediately 47 // after the publishAPIServers method is called. 48 publish := func(apiServers [][]network.HostPort, instanceIds []instance.Id) error { 49 err := statePublish.publishAPIServers(apiServers, instanceIds) 50 s.State.StartSync() 51 return err 52 } 53 54 w := newWorker(st, publisherFunc(publish)) 55 defer func() { 56 c.Check(worker.Stop(w), gc.IsNil) 57 }() 58 59 cwatch.AssertOneChange() 60 hps, err := s.State.APIHostPorts() 61 c.Assert(err, gc.IsNil) 62 assertAPIHostPorts(c, hps, expectedAPIHostPorts(3)) 63 } 64 65 type workerSuite struct { 66 coretesting.BaseSuite 67 } 68 69 var _ = gc.Suite(&workerSuite{}) 70 71 func (s *workerSuite) SetUpTest(c *gc.C) { 72 s.BaseSuite.SetUpTest(c) 73 resetErrors() 74 } 75 76 // initState initializes the fake state with a single 77 // replicaset member and numMachines machines 78 // primed to vote. 79 func initState(c *gc.C, st *fakeState, numMachines int) { 80 var ids []string 81 for i := 10; i < 10+numMachines; i++ { 82 id := fmt.Sprint(i) 83 m := st.addMachine(id, true) 84 m.setInstanceId(instance.Id("id-" + id)) 85 m.setStateHostPort(fmt.Sprintf("0.1.2.%d:%d", i, mongoPort)) 86 ids = append(ids, id) 87 c.Assert(m.MongoHostPorts(), gc.HasLen, 1) 88 89 m.setAPIHostPorts(addressesWithPort(apiPort, fmt.Sprintf("0.1.2.%d", i))) 90 } 91 st.machine("10").SetHasVote(true) 92 st.setStateServers(ids...) 93 st.session.Set(mkMembers("0v")) 94 st.session.setStatus(mkStatuses("0p")) 95 st.check = checkInvariants 96 } 97 98 // expectedAPIHostPorts returns the expected addresses 99 // of the machines as created by initState. 100 func expectedAPIHostPorts(n int) [][]network.HostPort { 101 servers := make([][]network.HostPort, n) 102 for i := range servers { 103 servers[i] = []network.HostPort{{ 104 Address: network.NewAddress(fmt.Sprintf("0.1.2.%d", i+10), network.ScopeUnknown), 105 Port: apiPort, 106 }} 107 } 108 return servers 109 } 110 111 func addressesWithPort(port int, addrs ...string) []network.HostPort { 112 return network.AddressesWithPort(network.NewAddresses(addrs...), port) 113 } 114 115 func (s *workerSuite) TestSetsAndUpdatesMembers(c *gc.C) { 116 s.PatchValue(&pollInterval, 5*time.Millisecond) 117 118 st := newFakeState() 119 initState(c, st, 3) 120 121 memberWatcher := st.session.members.Watch() 122 mustNext(c, memberWatcher) 123 assertMembers(c, memberWatcher.Value(), mkMembers("0v")) 124 125 logger.Infof("starting worker") 126 w := newWorker(st, noPublisher{}) 127 defer func() { 128 c.Check(worker.Stop(w), gc.IsNil) 129 }() 130 131 // Wait for the worker to set the initial members. 132 mustNext(c, memberWatcher) 133 assertMembers(c, memberWatcher.Value(), mkMembers("0v 1 2")) 134 135 // Update the status of the new members 136 // and check that they become voting. 137 c.Logf("updating new member status") 138 st.session.setStatus(mkStatuses("0p 1s 2s")) 139 mustNext(c, memberWatcher) 140 assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v")) 141 142 c.Logf("adding another machine") 143 // Add another machine. 144 m13 := st.addMachine("13", false) 145 m13.setStateHostPort(fmt.Sprintf("0.1.2.%d:%d", 13, mongoPort)) 146 st.setStateServers("10", "11", "12", "13") 147 148 c.Logf("waiting for new member to be added") 149 mustNext(c, memberWatcher) 150 assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v 3")) 151 152 // Remove vote from an existing member; 153 // and give it to the new machine. 154 // Also set the status of the new machine to 155 // healthy. 156 c.Logf("removing vote from machine 10 and adding it to machine 13") 157 st.machine("10").setWantsVote(false) 158 st.machine("13").setWantsVote(true) 159 160 st.session.setStatus(mkStatuses("0p 1s 2s 3s")) 161 162 // Check that the new machine gets the vote and the 163 // old machine loses it. 164 c.Logf("waiting for vote switch") 165 mustNext(c, memberWatcher) 166 assertMembers(c, memberWatcher.Value(), mkMembers("0 1v 2v 3v")) 167 168 c.Logf("removing old machine") 169 // Remove the old machine. 170 st.removeMachine("10") 171 st.setStateServers("11", "12", "13") 172 173 // Check that it's removed from the members. 174 c.Logf("waiting for removal") 175 mustNext(c, memberWatcher) 176 assertMembers(c, memberWatcher.Value(), mkMembers("1v 2v 3v")) 177 } 178 179 func (s *workerSuite) TestHasVoteMaintainedEvenWhenReplicaSetFails(c *gc.C) { 180 st := newFakeState() 181 182 // Simulate a state where we have four state servers, 183 // one has gone down, and we're replacing it: 184 // 0 - hasvote true, wantsvote false, down 185 // 1 - hasvote true, wantsvote true 186 // 2 - hasvote true, wantsvote true 187 // 3 - hasvote false, wantsvote true 188 // 189 // When it starts, the worker should move the vote from 190 // 0 to 3. We'll arrange things so that it will succeed in 191 // setting the membership but fail setting the HasVote 192 // to false. 193 initState(c, st, 4) 194 st.machine("10").SetHasVote(true) 195 st.machine("11").SetHasVote(true) 196 st.machine("12").SetHasVote(true) 197 st.machine("13").SetHasVote(false) 198 199 st.machine("10").setWantsVote(false) 200 st.machine("11").setWantsVote(true) 201 st.machine("12").setWantsVote(true) 202 st.machine("13").setWantsVote(true) 203 204 st.session.Set(mkMembers("0v 1v 2v 3")) 205 st.session.setStatus(mkStatuses("0H 1p 2s 3s")) 206 207 // Make the worker fail to set HasVote to false 208 // after changing the replica set membership. 209 setErrorFor("Machine.SetHasVote * false", errors.New("frood")) 210 211 memberWatcher := st.session.members.Watch() 212 mustNext(c, memberWatcher) 213 assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v 3")) 214 215 w := newWorker(st, noPublisher{}) 216 done := make(chan error) 217 go func() { 218 done <- w.Wait() 219 }() 220 221 // Wait for the worker to set the initial members. 222 mustNext(c, memberWatcher) 223 assertMembers(c, memberWatcher.Value(), mkMembers("0 1v 2v 3v")) 224 225 // The worker should encounter an error setting the 226 // has-vote status to false and exit. 227 select { 228 case err := <-done: 229 c.Assert(err, gc.ErrorMatches, `cannot set voting status of "[0-9]+" to false: frood`) 230 case <-time.After(coretesting.LongWait): 231 c.Fatalf("timed out waiting for worker to exit") 232 } 233 234 // Start the worker again - although the membership should 235 // not change, the HasVote status should be updated correctly. 236 resetErrors() 237 w = newWorker(st, noPublisher{}) 238 239 // Watch all the machines for changes, so we can check 240 // their has-vote status without polling. 241 changed := make(chan struct{}, 1) 242 for i := 10; i < 14; i++ { 243 watcher := st.machine(fmt.Sprint(i)).val.Watch() 244 defer watcher.Close() 245 go func() { 246 for watcher.Next() { 247 select { 248 case changed <- struct{}{}: 249 default: 250 } 251 } 252 }() 253 } 254 timeout := time.After(coretesting.LongWait) 255 loop: 256 for { 257 select { 258 case <-changed: 259 correct := true 260 for i := 10; i < 14; i++ { 261 hasVote := st.machine(fmt.Sprint(i)).HasVote() 262 expectHasVote := i != 10 263 if hasVote != expectHasVote { 264 correct = false 265 } 266 } 267 if correct { 268 break loop 269 } 270 case <-timeout: 271 c.Fatalf("timed out waiting for vote to be set") 272 } 273 } 274 } 275 276 func (s *workerSuite) TestAddressChange(c *gc.C) { 277 st := newFakeState() 278 initState(c, st, 3) 279 280 memberWatcher := st.session.members.Watch() 281 mustNext(c, memberWatcher) 282 assertMembers(c, memberWatcher.Value(), mkMembers("0v")) 283 284 logger.Infof("starting worker") 285 w := newWorker(st, noPublisher{}) 286 defer func() { 287 c.Check(worker.Stop(w), gc.IsNil) 288 }() 289 290 // Wait for the worker to set the initial members. 291 mustNext(c, memberWatcher) 292 assertMembers(c, memberWatcher.Value(), mkMembers("0v 1 2")) 293 294 // Change an address and wait for it to be changed in the 295 // members. 296 st.machine("11").setStateHostPort("0.1.99.99:9876") 297 298 mustNext(c, memberWatcher) 299 expectMembers := mkMembers("0v 1 2") 300 expectMembers[1].Address = "0.1.99.99:9876" 301 assertMembers(c, memberWatcher.Value(), expectMembers) 302 } 303 304 var fatalErrorsTests = []struct { 305 errPattern string 306 err error 307 expectErr string 308 }{{ 309 errPattern: "State.StateServerInfo", 310 expectErr: "cannot get state server info: sample", 311 }, { 312 errPattern: "Machine.SetHasVote 11 true", 313 expectErr: `cannot set voting status of "11" to true: sample`, 314 }, { 315 errPattern: "Session.CurrentStatus", 316 expectErr: "cannot get replica set status: sample", 317 }, { 318 errPattern: "Session.CurrentMembers", 319 expectErr: "cannot get replica set members: sample", 320 }, { 321 errPattern: "State.Machine *", 322 expectErr: `cannot get machine "10": sample`, 323 }, { 324 errPattern: "Machine.InstanceId *", 325 expectErr: `cannot get API server info: sample`, 326 }} 327 328 func (s *workerSuite) TestFatalErrors(c *gc.C) { 329 s.PatchValue(&pollInterval, 5*time.Millisecond) 330 for i, test := range fatalErrorsTests { 331 c.Logf("test %d: %s -> %s", i, test.errPattern, test.expectErr) 332 resetErrors() 333 st := newFakeState() 334 st.session.InstantlyReady = true 335 initState(c, st, 3) 336 setErrorFor(test.errPattern, errors.New("sample")) 337 w := newWorker(st, noPublisher{}) 338 done := make(chan error) 339 go func() { 340 done <- w.Wait() 341 }() 342 select { 343 case err := <-done: 344 c.Assert(err, gc.ErrorMatches, test.expectErr) 345 case <-time.After(coretesting.LongWait): 346 c.Fatalf("timed out waiting for error") 347 } 348 } 349 } 350 351 func (s *workerSuite) TestSetMembersErrorIsNotFatal(c *gc.C) { 352 st := newFakeState() 353 initState(c, st, 3) 354 st.session.setStatus(mkStatuses("0p 1s 2s")) 355 var isSet voyeur.Value 356 count := 0 357 setErrorFuncFor("Session.Set", func() error { 358 isSet.Set(count) 359 count++ 360 return errors.New("sample") 361 }) 362 s.PatchValue(&initialRetryInterval, 10*time.Microsecond) 363 s.PatchValue(&maxRetryInterval, coretesting.ShortWait/4) 364 365 expectedIterations := 0 366 for d := initialRetryInterval; d < maxRetryInterval*2; d *= 2 { 367 expectedIterations++ 368 } 369 370 w := newWorker(st, noPublisher{}) 371 defer func() { 372 c.Check(worker.Stop(w), gc.IsNil) 373 }() 374 isSetWatcher := isSet.Watch() 375 376 n0 := mustNext(c, isSetWatcher).(int) 377 time.Sleep(maxRetryInterval * 2) 378 n1 := mustNext(c, isSetWatcher).(int) 379 380 // The worker should have backed off exponentially... 381 c.Assert(n1-n0, jc.LessThan, expectedIterations+1) 382 c.Logf("actual iterations %d; expected iterations %d", n1-n0, expectedIterations) 383 384 // ... but only up to the maximum retry interval 385 n0 = mustNext(c, isSetWatcher).(int) 386 time.Sleep(maxRetryInterval * 2) 387 n1 = mustNext(c, isSetWatcher).(int) 388 389 c.Assert(n1-n0, jc.LessThan, 3) 390 } 391 392 type publisherFunc func(apiServers [][]network.HostPort, instanceIds []instance.Id) error 393 394 func (f publisherFunc) publishAPIServers(apiServers [][]network.HostPort, instanceIds []instance.Id) error { 395 return f(apiServers, instanceIds) 396 } 397 398 func (s *workerSuite) TestStateServersArePublished(c *gc.C) { 399 publishCh := make(chan [][]network.HostPort) 400 publish := func(apiServers [][]network.HostPort, instanceIds []instance.Id) error { 401 publishCh <- apiServers 402 return nil 403 } 404 405 st := newFakeState() 406 initState(c, st, 3) 407 w := newWorker(st, publisherFunc(publish)) 408 defer func() { 409 c.Check(worker.Stop(w), gc.IsNil) 410 }() 411 select { 412 case servers := <-publishCh: 413 assertAPIHostPorts(c, servers, expectedAPIHostPorts(3)) 414 case <-time.After(coretesting.LongWait): 415 c.Fatalf("timed out waiting for publish") 416 } 417 418 // Change one of the servers' API addresses and check that it's published. 419 420 newMachine10APIHostPorts := addressesWithPort(apiPort, "0.2.8.124") 421 st.machine("10").setAPIHostPorts(newMachine10APIHostPorts) 422 select { 423 case servers := <-publishCh: 424 expected := expectedAPIHostPorts(3) 425 expected[0] = newMachine10APIHostPorts 426 assertAPIHostPorts(c, servers, expected) 427 case <-time.After(coretesting.LongWait): 428 c.Fatalf("timed out waiting for publish") 429 } 430 } 431 432 func (s *workerSuite) TestWorkerRetriesOnPublishError(c *gc.C) { 433 s.PatchValue(&pollInterval, coretesting.LongWait+time.Second) 434 s.PatchValue(&initialRetryInterval, 5*time.Millisecond) 435 s.PatchValue(&maxRetryInterval, initialRetryInterval) 436 437 publishCh := make(chan [][]network.HostPort, 100) 438 439 count := 0 440 publish := func(apiServers [][]network.HostPort, instanceIds []instance.Id) error { 441 publishCh <- apiServers 442 count++ 443 if count <= 3 { 444 return fmt.Errorf("publish error") 445 } 446 return nil 447 } 448 st := newFakeState() 449 initState(c, st, 3) 450 451 w := newWorker(st, publisherFunc(publish)) 452 defer func() { 453 c.Check(worker.Stop(w), gc.IsNil) 454 }() 455 456 for i := 0; i < 4; i++ { 457 select { 458 case servers := <-publishCh: 459 assertAPIHostPorts(c, servers, expectedAPIHostPorts(3)) 460 case <-time.After(coretesting.LongWait): 461 c.Fatalf("timed out waiting for publish #%d", i) 462 } 463 } 464 select { 465 case <-publishCh: 466 c.Errorf("unexpected publish event") 467 case <-time.After(coretesting.ShortWait): 468 } 469 } 470 471 func (s *workerSuite) TestWorkerPublishesInstanceIds(c *gc.C) { 472 s.PatchValue(&pollInterval, coretesting.LongWait+time.Second) 473 s.PatchValue(&initialRetryInterval, 5*time.Millisecond) 474 s.PatchValue(&maxRetryInterval, initialRetryInterval) 475 476 publishCh := make(chan []instance.Id, 100) 477 478 publish := func(apiServers [][]network.HostPort, instanceIds []instance.Id) error { 479 publishCh <- instanceIds 480 return nil 481 } 482 st := newFakeState() 483 initState(c, st, 3) 484 485 w := newWorker(st, publisherFunc(publish)) 486 defer func() { 487 c.Check(worker.Stop(w), gc.IsNil) 488 }() 489 490 select { 491 case instanceIds := <-publishCh: 492 c.Assert(instanceIds, jc.SameContents, []instance.Id{"id-10", "id-11", "id-12"}) 493 case <-time.After(coretesting.LongWait): 494 c.Errorf("timed out waiting for publish") 495 } 496 } 497 498 // mustNext waits for w's value to be set and returns it. 499 func mustNext(c *gc.C, w *voyeur.Watcher) (val interface{}) { 500 done := make(chan bool) 501 go func() { 502 c.Logf("mustNext %p", w) 503 ok := w.Next() 504 val = w.Value() 505 c.Logf("mustNext done %p, ok %v", w, ok) 506 done <- ok 507 }() 508 select { 509 case ok := <-done: 510 c.Assert(ok, jc.IsTrue) 511 return 512 case <-time.After(coretesting.LongWait): 513 c.Fatalf("timed out waiting for value to be set") 514 } 515 panic("unreachable") 516 } 517 518 type noPublisher struct{} 519 520 func (noPublisher) publishAPIServers(apiServers [][]network.HostPort, instanceIds []instance.Id) error { 521 return nil 522 }