github.com/cloud-green/juju@v0.0.0-20151002100041-a00291338d3d/worker/peergrouper/worker_test.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package peergrouper 5 6 import ( 7 "errors" 8 "fmt" 9 "time" 10 11 jc "github.com/juju/testing/checkers" 12 "github.com/juju/utils/voyeur" 13 gc "gopkg.in/check.v1" 14 15 "github.com/juju/juju/instance" 16 "github.com/juju/juju/network" 17 coretesting "github.com/juju/juju/testing" 18 "github.com/juju/juju/worker" 19 ) 20 21 type TestIPVersion struct { 22 version string 23 formatHostPort string 24 formatHost string 25 machineFormatHost string 26 extraHostPort string 27 extraHost string 28 extraAddress string 29 addressType network.AddressType 30 } 31 32 var ( 33 testIPv4 = TestIPVersion{ 34 version: "IPv4", 35 formatHostPort: "0.1.2.%d:%d", 36 formatHost: "0.1.2.%d", 37 machineFormatHost: "0.1.2.%d", 38 extraHostPort: "0.1.99.99:9876", 39 extraHost: "0.1.99.13", 40 extraAddress: "0.1.99.13:1234", 41 addressType: network.IPv4Address, 42 } 43 testIPv6 = TestIPVersion{ 44 version: "IPv6", 45 formatHostPort: "[2001:DB8::%d]:%d", 46 formatHost: "[2001:DB8::%d]", 47 machineFormatHost: "2001:DB8::%d", 48 extraHostPort: "[2001:DB8::99:99]:9876", 49 extraHost: "2001:DB8::99:13", 50 extraAddress: "[2001:DB8::99:13]:1234", 51 addressType: network.IPv6Address, 52 } 53 ) 54 55 // DoTestForIPv4AndIPv6 runs the passed test for IPv4 and IPv6. 56 func DoTestForIPv4AndIPv6(t func(ipVersion TestIPVersion)) { 57 t(testIPv4) 58 t(testIPv6) 59 } 60 61 type workerSuite struct { 62 coretesting.BaseSuite 63 } 64 65 var _ = gc.Suite(&workerSuite{}) 66 67 func (s *workerSuite) SetUpTest(c *gc.C) { 68 s.BaseSuite.SetUpTest(c) 69 resetErrors() 70 } 71 72 // InitState initializes the fake state with a single 73 // replicaset member and numMachines machines 74 // primed to vote. 75 func InitState(c *gc.C, st *fakeState, numMachines int, ipVersion TestIPVersion) { 76 var ids []string 77 for i := 10; i < 10+numMachines; i++ { 78 id := fmt.Sprint(i) 79 m := st.addMachine(id, true) 80 m.setInstanceId(instance.Id("id-" + id)) 81 m.setStateHostPort(fmt.Sprintf(ipVersion.formatHostPort, i, mongoPort)) 82 ids = append(ids, id) 83 c.Assert(m.MongoHostPorts(), gc.HasLen, 1) 84 85 m.setAPIHostPorts(network.NewHostPorts( 86 apiPort, fmt.Sprintf(ipVersion.formatHost, i), 87 )) 88 } 89 st.machine("10").SetHasVote(true) 90 st.setStateServers(ids...) 91 st.session.Set(mkMembers("0v", ipVersion)) 92 st.session.setStatus(mkStatuses("0p", ipVersion)) 93 st.check = checkInvariants 94 } 95 96 // ExpectedAPIHostPorts returns the expected addresses 97 // of the machines as created by InitState. 98 func ExpectedAPIHostPorts(n int, ipVersion TestIPVersion) [][]network.HostPort { 99 servers := make([][]network.HostPort, n) 100 for i := range servers { 101 servers[i] = network.NewHostPorts( 102 apiPort, 103 fmt.Sprintf(ipVersion.formatHost, i+10), 104 ) 105 } 106 return servers 107 } 108 109 func (s *workerSuite) TestSetsAndUpdatesMembers(c *gc.C) { 110 DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) { 111 s.PatchValue(&pollInterval, 5*time.Millisecond) 112 113 st := NewFakeState() 114 InitState(c, st, 3, ipVersion) 115 116 memberWatcher := st.session.members.Watch() 117 mustNext(c, memberWatcher) 118 assertMembers(c, memberWatcher.Value(), mkMembers("0v", ipVersion)) 119 120 logger.Infof("starting worker") 121 w := newWorker(st, noPublisher{}) 122 defer func() { 123 c.Check(worker.Stop(w), gc.IsNil) 124 }() 125 126 // Wait for the worker to set the initial members. 127 mustNext(c, memberWatcher) 128 assertMembers(c, memberWatcher.Value(), mkMembers("0v 1 2", ipVersion)) 129 130 // Update the status of the new members 131 // and check that they become voting. 132 c.Logf("updating new member status") 133 st.session.setStatus(mkStatuses("0p 1s 2s", ipVersion)) 134 mustNext(c, memberWatcher) 135 assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v", ipVersion)) 136 137 c.Logf("adding another machine") 138 // Add another machine. 139 m13 := st.addMachine("13", false) 140 m13.setStateHostPort(fmt.Sprintf(ipVersion.formatHostPort, 13, mongoPort)) 141 st.setStateServers("10", "11", "12", "13") 142 143 c.Logf("waiting for new member to be added") 144 mustNext(c, memberWatcher) 145 assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v 3", ipVersion)) 146 147 // Remove vote from an existing member; 148 // and give it to the new machine. 149 // Also set the status of the new machine to 150 // healthy. 151 c.Logf("removing vote from machine 10 and adding it to machine 13") 152 st.machine("10").setWantsVote(false) 153 st.machine("13").setWantsVote(true) 154 155 st.session.setStatus(mkStatuses("0p 1s 2s 3s", ipVersion)) 156 157 // Check that the new machine gets the vote and the 158 // old machine loses it. 159 c.Logf("waiting for vote switch") 160 mustNext(c, memberWatcher) 161 assertMembers(c, memberWatcher.Value(), mkMembers("0 1v 2v 3v", ipVersion)) 162 163 c.Logf("removing old machine") 164 // Remove the old machine. 165 st.removeMachine("10") 166 st.setStateServers("11", "12", "13") 167 168 // Check that it's removed from the members. 169 c.Logf("waiting for removal") 170 mustNext(c, memberWatcher) 171 assertMembers(c, memberWatcher.Value(), mkMembers("1v 2v 3v", ipVersion)) 172 }) 173 } 174 175 func (s *workerSuite) TestHasVoteMaintainedEvenWhenReplicaSetFails(c *gc.C) { 176 DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) { 177 st := NewFakeState() 178 179 // Simulate a state where we have four state servers, 180 // one has gone down, and we're replacing it: 181 // 0 - hasvote true, wantsvote false, down 182 // 1 - hasvote true, wantsvote true 183 // 2 - hasvote true, wantsvote true 184 // 3 - hasvote false, wantsvote true 185 // 186 // When it starts, the worker should move the vote from 187 // 0 to 3. We'll arrange things so that it will succeed in 188 // setting the membership but fail setting the HasVote 189 // to false. 190 InitState(c, st, 4, ipVersion) 191 st.machine("10").SetHasVote(true) 192 st.machine("11").SetHasVote(true) 193 st.machine("12").SetHasVote(true) 194 st.machine("13").SetHasVote(false) 195 196 st.machine("10").setWantsVote(false) 197 st.machine("11").setWantsVote(true) 198 st.machine("12").setWantsVote(true) 199 st.machine("13").setWantsVote(true) 200 201 st.session.Set(mkMembers("0v 1v 2v 3", ipVersion)) 202 st.session.setStatus(mkStatuses("0H 1p 2s 3s", ipVersion)) 203 204 // Make the worker fail to set HasVote to false 205 // after changing the replica set membership. 206 setErrorFor("Machine.SetHasVote * false", errors.New("frood")) 207 208 memberWatcher := st.session.members.Watch() 209 mustNext(c, memberWatcher) 210 assertMembers(c, memberWatcher.Value(), mkMembers("0v 1v 2v 3", ipVersion)) 211 212 w := newWorker(st, noPublisher{}) 213 done := make(chan error) 214 go func() { 215 done <- w.Wait() 216 }() 217 218 // Wait for the worker to set the initial members. 219 mustNext(c, memberWatcher) 220 assertMembers(c, memberWatcher.Value(), mkMembers("0 1v 2v 3v", ipVersion)) 221 222 // The worker should encounter an error setting the 223 // has-vote status to false and exit. 224 select { 225 case err := <-done: 226 c.Assert(err, gc.ErrorMatches, `cannot set voting status of "[0-9]+" to false: frood`) 227 case <-time.After(coretesting.LongWait): 228 c.Fatalf("timed out waiting for worker to exit") 229 } 230 231 // Start the worker again - although the membership should 232 // not change, the HasVote status should be updated correctly. 233 resetErrors() 234 w = newWorker(st, noPublisher{}) 235 236 // Watch all the machines for changes, so we can check 237 // their has-vote status without polling. 238 changed := make(chan struct{}, 1) 239 for i := 10; i < 14; i++ { 240 watcher := st.machine(fmt.Sprint(i)).val.Watch() 241 defer watcher.Close() 242 go func() { 243 for watcher.Next() { 244 select { 245 case changed <- struct{}{}: 246 default: 247 } 248 } 249 }() 250 } 251 timeout := time.After(coretesting.LongWait) 252 loop: 253 for { 254 select { 255 case <-changed: 256 correct := true 257 for i := 10; i < 14; i++ { 258 hasVote := st.machine(fmt.Sprint(i)).HasVote() 259 expectHasVote := i != 10 260 if hasVote != expectHasVote { 261 correct = false 262 } 263 } 264 if correct { 265 break loop 266 } 267 case <-timeout: 268 c.Fatalf("timed out waiting for vote to be set") 269 } 270 } 271 }) 272 } 273 274 func (s *workerSuite) TestAddressChange(c *gc.C) { 275 DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) { 276 st := NewFakeState() 277 InitState(c, st, 3, ipVersion) 278 279 memberWatcher := st.session.members.Watch() 280 mustNext(c, memberWatcher) 281 assertMembers(c, memberWatcher.Value(), mkMembers("0v", ipVersion)) 282 283 logger.Infof("starting worker") 284 w := newWorker(st, noPublisher{}) 285 defer func() { 286 c.Check(worker.Stop(w), gc.IsNil) 287 }() 288 289 // Wait for the worker to set the initial members. 290 mustNext(c, memberWatcher) 291 assertMembers(c, memberWatcher.Value(), mkMembers("0v 1 2", ipVersion)) 292 293 // Change an address and wait for it to be changed in the 294 // members. 295 st.machine("11").setStateHostPort(ipVersion.extraHostPort) 296 297 mustNext(c, memberWatcher) 298 expectMembers := mkMembers("0v 1 2", ipVersion) 299 expectMembers[1].Address = ipVersion.extraHostPort 300 assertMembers(c, memberWatcher.Value(), expectMembers) 301 resetErrors() 302 }) 303 } 304 305 var fatalErrorsTests = []struct { 306 errPattern string 307 err error 308 expectErr string 309 }{{ 310 errPattern: "State.StateServerInfo", 311 expectErr: "cannot get state server info: sample", 312 }, { 313 errPattern: "Machine.SetHasVote 11 true", 314 expectErr: `cannot set voting status of "11" to true: sample`, 315 }, { 316 errPattern: "Session.CurrentStatus", 317 expectErr: "cannot get replica set status: sample", 318 }, { 319 errPattern: "Session.CurrentMembers", 320 expectErr: "cannot get replica set members: sample", 321 }, { 322 errPattern: "State.Machine *", 323 expectErr: `cannot get machine "10": sample`, 324 }, { 325 errPattern: "Machine.InstanceId *", 326 expectErr: `cannot get API server info: sample`, 327 }} 328 329 func (s *workerSuite) TestFatalErrors(c *gc.C) { 330 DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) { 331 s.PatchValue(&pollInterval, 5*time.Millisecond) 332 for i, testCase := range fatalErrorsTests { 333 c.Logf("test %d: %s -> %s", i, testCase.errPattern, testCase.expectErr) 334 resetErrors() 335 st := NewFakeState() 336 st.session.InstantlyReady = true 337 InitState(c, st, 3, ipVersion) 338 setErrorFor(testCase.errPattern, errors.New("sample")) 339 w := newWorker(st, noPublisher{}) 340 done := make(chan error) 341 go func() { 342 done <- w.Wait() 343 }() 344 select { 345 case err := <-done: 346 c.Assert(err, gc.ErrorMatches, testCase.expectErr) 347 case <-time.After(coretesting.LongWait): 348 c.Fatalf("timed out waiting for error") 349 } 350 } 351 }) 352 } 353 354 func (s *workerSuite) TestSetMembersErrorIsNotFatal(c *gc.C) { 355 coretesting.SkipIfI386(c, "lp:1425569") 356 357 DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) { 358 st := NewFakeState() 359 InitState(c, st, 3, ipVersion) 360 st.session.setStatus(mkStatuses("0p 1s 2s", ipVersion)) 361 var setCount voyeur.Value 362 setErrorFuncFor("Session.Set", func() error { 363 setCount.Set(true) 364 return errors.New("sample") 365 }) 366 s.PatchValue(&initialRetryInterval, 10*time.Microsecond) 367 s.PatchValue(&maxRetryInterval, coretesting.ShortWait/4) 368 369 w := newWorker(st, noPublisher{}) 370 defer func() { 371 c.Check(worker.Stop(w), gc.IsNil) 372 }() 373 374 // See that the worker is retrying. 375 setCountW := setCount.Watch() 376 mustNext(c, setCountW) 377 mustNext(c, setCountW) 378 mustNext(c, setCountW) 379 380 resetErrors() 381 }) 382 } 383 384 type PublisherFunc func(apiServers [][]network.HostPort, instanceIds []instance.Id) error 385 386 func (f PublisherFunc) publishAPIServers(apiServers [][]network.HostPort, instanceIds []instance.Id) error { 387 return f(apiServers, instanceIds) 388 } 389 390 func (s *workerSuite) TestStateServersArePublished(c *gc.C) { 391 DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) { 392 publishCh := make(chan [][]network.HostPort) 393 publish := func(apiServers [][]network.HostPort, instanceIds []instance.Id) error { 394 publishCh <- apiServers 395 return nil 396 } 397 398 st := NewFakeState() 399 InitState(c, st, 3, ipVersion) 400 w := newWorker(st, PublisherFunc(publish)) 401 defer func() { 402 c.Check(worker.Stop(w), gc.IsNil) 403 }() 404 select { 405 case servers := <-publishCh: 406 AssertAPIHostPorts(c, servers, ExpectedAPIHostPorts(3, ipVersion)) 407 case <-time.After(coretesting.LongWait): 408 c.Fatalf("timed out waiting for publish") 409 } 410 411 // Change one of the servers' API addresses and check that it's published. 412 var newMachine10APIHostPorts []network.HostPort 413 newMachine10APIHostPorts = network.NewHostPorts(apiPort, ipVersion.extraHostPort) 414 st.machine("10").setAPIHostPorts(newMachine10APIHostPorts) 415 select { 416 case servers := <-publishCh: 417 expected := ExpectedAPIHostPorts(3, ipVersion) 418 expected[0] = newMachine10APIHostPorts 419 AssertAPIHostPorts(c, servers, expected) 420 case <-time.After(coretesting.LongWait): 421 c.Fatalf("timed out waiting for publish") 422 } 423 }) 424 } 425 426 func (s *workerSuite) TestWorkerRetriesOnPublishError(c *gc.C) { 427 DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) { 428 s.PatchValue(&pollInterval, coretesting.LongWait+time.Second) 429 s.PatchValue(&initialRetryInterval, 5*time.Millisecond) 430 s.PatchValue(&maxRetryInterval, initialRetryInterval) 431 432 publishCh := make(chan [][]network.HostPort, 100) 433 434 count := 0 435 publish := func(apiServers [][]network.HostPort, instanceIds []instance.Id) error { 436 publishCh <- apiServers 437 count++ 438 if count <= 3 { 439 return fmt.Errorf("publish error") 440 } 441 return nil 442 } 443 st := NewFakeState() 444 InitState(c, st, 3, ipVersion) 445 446 w := newWorker(st, PublisherFunc(publish)) 447 defer func() { 448 c.Check(worker.Stop(w), gc.IsNil) 449 }() 450 451 for i := 0; i < 4; i++ { 452 select { 453 case servers := <-publishCh: 454 AssertAPIHostPorts(c, servers, ExpectedAPIHostPorts(3, ipVersion)) 455 case <-time.After(coretesting.LongWait): 456 c.Fatalf("timed out waiting for publish #%d", i) 457 } 458 } 459 select { 460 case <-publishCh: 461 c.Errorf("unexpected publish event") 462 case <-time.After(coretesting.ShortWait): 463 } 464 }) 465 } 466 467 func (s *workerSuite) TestWorkerPublishesInstanceIds(c *gc.C) { 468 DoTestForIPv4AndIPv6(func(ipVersion TestIPVersion) { 469 s.PatchValue(&pollInterval, coretesting.LongWait+time.Second) 470 s.PatchValue(&initialRetryInterval, 5*time.Millisecond) 471 s.PatchValue(&maxRetryInterval, initialRetryInterval) 472 473 publishCh := make(chan []instance.Id, 100) 474 475 publish := func(apiServers [][]network.HostPort, instanceIds []instance.Id) error { 476 publishCh <- instanceIds 477 return nil 478 } 479 st := NewFakeState() 480 InitState(c, st, 3, ipVersion) 481 482 w := newWorker(st, PublisherFunc(publish)) 483 defer func() { 484 c.Check(worker.Stop(w), gc.IsNil) 485 }() 486 487 select { 488 case instanceIds := <-publishCh: 489 c.Assert(instanceIds, jc.SameContents, []instance.Id{"id-10", "id-11", "id-12"}) 490 case <-time.After(coretesting.LongWait): 491 c.Errorf("timed out waiting for publish") 492 } 493 }) 494 } 495 496 // mustNext waits for w's value to be set and returns it. 497 func mustNext(c *gc.C, w *voyeur.Watcher) (val interface{}) { 498 type voyeurResult struct { 499 ok bool 500 val interface{} 501 } 502 done := make(chan voyeurResult) 503 go func() { 504 c.Logf("mustNext %p", w) 505 ok := w.Next() 506 val = w.Value() 507 c.Logf("mustNext done %p, ok: %v, val: %#v", w, ok, val) 508 done <- voyeurResult{ok, val} 509 }() 510 select { 511 case result := <-done: 512 c.Assert(result.ok, jc.IsTrue) 513 return result.val 514 case <-time.After(coretesting.LongWait): 515 c.Fatalf("timed out waiting for value to be set") 516 } 517 panic("unreachable") 518 } 519 520 type noPublisher struct{} 521 522 func (noPublisher) publishAPIServers(apiServers [][]network.HostPort, instanceIds []instance.Id) error { 523 return nil 524 }