github.com/rogpeppe/juju@v0.0.0-20140613142852-6337964b789e/worker/provisioner/provisioner_test.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package provisioner_test 5 6 import ( 7 "fmt" 8 "strings" 9 "time" 10 11 "github.com/juju/errors" 12 "github.com/juju/names" 13 jc "github.com/juju/testing/checkers" 14 "github.com/juju/utils" 15 "github.com/juju/utils/set" 16 gc "launchpad.net/gocheck" 17 18 "github.com/juju/juju/constraints" 19 "github.com/juju/juju/environs" 20 "github.com/juju/juju/environs/config" 21 "github.com/juju/juju/environs/simplestreams" 22 "github.com/juju/juju/environs/tools" 23 "github.com/juju/juju/instance" 24 "github.com/juju/juju/juju/testing" 25 "github.com/juju/juju/mongo" 26 "github.com/juju/juju/network" 27 "github.com/juju/juju/provider/dummy" 28 "github.com/juju/juju/state" 29 "github.com/juju/juju/state/api" 30 "github.com/juju/juju/state/api/params" 31 apiprovisioner "github.com/juju/juju/state/api/provisioner" 32 apiserverprovisioner "github.com/juju/juju/state/apiserver/provisioner" 33 coretesting "github.com/juju/juju/testing" 34 "github.com/juju/juju/worker/provisioner" 35 ) 36 37 type CommonProvisionerSuite struct { 38 testing.JujuConnSuite 39 op <-chan dummy.Operation 40 cfg *config.Config 41 // defaultConstraints are used when adding a machine and then later in test assertions. 42 defaultConstraints constraints.Value 43 44 st *api.State 45 provisioner *apiprovisioner.State 46 } 47 48 type ProvisionerSuite struct { 49 CommonProvisionerSuite 50 } 51 52 var _ = gc.Suite(&ProvisionerSuite{}) 53 54 var veryShortAttempt = utils.AttemptStrategy{ 55 Total: 1 * time.Second, 56 Delay: 80 * time.Millisecond, 57 } 58 59 func (s *CommonProvisionerSuite) SetUpSuite(c *gc.C) { 60 s.JujuConnSuite.SetUpSuite(c) 61 s.defaultConstraints = constraints.MustParse("arch=amd64 mem=4G cpu-cores=1 root-disk=8G") 62 } 63 64 func (s *CommonProvisionerSuite) SetUpTest(c *gc.C) { 65 // Disable the default state policy, because the 66 // provisioner needs to be able to test pathological 67 // scenarios where a machine exists in state with 68 // invalid environment config. 69 dummy.SetStatePolicy(nil) 70 71 s.JujuConnSuite.SetUpTest(c) 72 // Create the operations channel with more than enough space 73 // for those tests that don't listen on it. 74 op := make(chan dummy.Operation, 500) 75 dummy.Listen(op) 76 s.op = op 77 78 cfg, err := s.State.EnvironConfig() 79 c.Assert(err, gc.IsNil) 80 s.cfg = cfg 81 } 82 83 func (s *CommonProvisionerSuite) APILogin(c *gc.C, machine *state.Machine) { 84 if s.st != nil { 85 c.Assert(s.st.Close(), gc.IsNil) 86 } 87 password, err := utils.RandomPassword() 88 c.Assert(err, gc.IsNil) 89 err = machine.SetPassword(password) 90 c.Assert(err, gc.IsNil) 91 err = machine.SetProvisioned("i-fake", "fake_nonce", nil) 92 c.Assert(err, gc.IsNil) 93 s.st = s.OpenAPIAsMachine(c, machine.Tag(), password, "fake_nonce") 94 c.Assert(s.st, gc.NotNil) 95 c.Logf("API: login as %q successful", machine.Tag()) 96 s.provisioner = s.st.Provisioner() 97 c.Assert(s.provisioner, gc.NotNil) 98 } 99 100 // breakDummyProvider changes the environment config in state in a way 101 // that causes the given environMethod of the dummy provider to return 102 // an error, which is also returned as a message to be checked. 103 func breakDummyProvider(c *gc.C, st *state.State, environMethod string) string { 104 attrs := map[string]interface{}{"broken": environMethod} 105 err := st.UpdateEnvironConfig(attrs, nil, nil) 106 c.Assert(err, gc.IsNil) 107 return fmt.Sprintf("dummy.%s is broken", environMethod) 108 } 109 110 // setupEnvironmentManager adds an environment manager machine and login to the API. 111 func (s *CommonProvisionerSuite) setupEnvironmentManager(c *gc.C) { 112 machine, err := s.State.AddMachine("quantal", state.JobManageEnviron) 113 c.Assert(err, gc.IsNil) 114 c.Assert(machine.Id(), gc.Equals, "0") 115 err = machine.SetAddresses(network.NewAddress("0.1.2.3", network.ScopeUnknown)) 116 c.Assert(err, gc.IsNil) 117 s.APILogin(c, machine) 118 } 119 120 // invalidateEnvironment alters the environment configuration 121 // so the Settings returned from the watcher will not pass 122 // validation. 123 func (s *CommonProvisionerSuite) invalidateEnvironment(c *gc.C) { 124 st, err := state.Open(s.StateInfo(c), mongo.DefaultDialOpts(), state.Policy(nil)) 125 c.Assert(err, gc.IsNil) 126 defer st.Close() 127 attrs := map[string]interface{}{"type": "unknown"} 128 err = st.UpdateEnvironConfig(attrs, nil, nil) 129 c.Assert(err, gc.IsNil) 130 } 131 132 // fixEnvironment undoes the work of invalidateEnvironment. 133 func (s *CommonProvisionerSuite) fixEnvironment(c *gc.C) error { 134 st, err := state.Open(s.StateInfo(c), mongo.DefaultDialOpts(), state.Policy(nil)) 135 c.Assert(err, gc.IsNil) 136 defer st.Close() 137 attrs := map[string]interface{}{"type": s.cfg.AllAttrs()["type"]} 138 return st.UpdateEnvironConfig(attrs, nil, nil) 139 } 140 141 // stopper is stoppable. 142 type stopper interface { 143 Stop() error 144 } 145 146 // stop stops a stopper. 147 func stop(c *gc.C, s stopper) { 148 c.Assert(s.Stop(), gc.IsNil) 149 } 150 151 func (s *CommonProvisionerSuite) startUnknownInstance(c *gc.C, id string) instance.Instance { 152 instance, _ := testing.AssertStartInstance(c, s.Conn.Environ, id) 153 select { 154 case o := <-s.op: 155 switch o := o.(type) { 156 case dummy.OpStartInstance: 157 default: 158 c.Fatalf("unexpected operation %#v", o) 159 } 160 case <-time.After(coretesting.LongWait): 161 c.Fatalf("timed out waiting for startinstance operation") 162 } 163 return instance 164 } 165 166 func (s *CommonProvisionerSuite) checkStartInstance(c *gc.C, m *state.Machine) instance.Instance { 167 return s.checkStartInstanceCustom(c, m, "pork", s.defaultConstraints, nil, nil, true) 168 } 169 170 func (s *CommonProvisionerSuite) checkStartInstanceCustom(c *gc.C, m *state.Machine, secret string, cons constraints.Value, networks []string, networkInfo []network.Info, waitInstanceId bool) (inst instance.Instance) { 171 s.BackingState.StartSync() 172 for { 173 select { 174 case o := <-s.op: 175 switch o := o.(type) { 176 case dummy.OpStartInstance: 177 inst = o.Instance 178 if waitInstanceId { 179 s.waitInstanceId(c, m, inst.Id()) 180 } 181 182 // Check the instance was started with the expected params. 183 c.Assert(o.MachineId, gc.Equals, m.Id()) 184 nonceParts := strings.SplitN(o.MachineNonce, ":", 2) 185 c.Assert(nonceParts, gc.HasLen, 2) 186 c.Assert(nonceParts[0], gc.Equals, names.NewMachineTag("0").String()) 187 c.Assert(nonceParts[1], jc.Satisfies, utils.IsValidUUIDString) 188 c.Assert(o.Secret, gc.Equals, secret) 189 c.Assert(o.Networks, jc.DeepEquals, networks) 190 c.Assert(o.NetworkInfo, jc.DeepEquals, networkInfo) 191 192 // All provisioned machines in this test suite have 193 // their hardware characteristics attributes set to 194 // the same values as the constraints due to the dummy 195 // environment being used. 196 if !constraints.IsEmpty(&cons) { 197 c.Assert(o.Constraints, gc.DeepEquals, cons) 198 hc, err := m.HardwareCharacteristics() 199 c.Assert(err, gc.IsNil) 200 c.Assert(*hc, gc.DeepEquals, instance.HardwareCharacteristics{ 201 Arch: cons.Arch, 202 Mem: cons.Mem, 203 RootDisk: cons.RootDisk, 204 CpuCores: cons.CpuCores, 205 CpuPower: cons.CpuPower, 206 Tags: cons.Tags, 207 }) 208 } 209 return 210 default: 211 c.Logf("ignoring unexpected operation %#v", o) 212 } 213 case <-time.After(2 * time.Second): 214 c.Fatalf("provisioner did not start an instance") 215 return 216 } 217 } 218 return 219 } 220 221 // checkNoOperations checks that the environ was not operated upon. 222 func (s *CommonProvisionerSuite) checkNoOperations(c *gc.C) { 223 s.BackingState.StartSync() 224 select { 225 case o := <-s.op: 226 c.Fatalf("unexpected operation %#v", o) 227 case <-time.After(coretesting.ShortWait): 228 return 229 } 230 } 231 232 // checkStopInstances checks that an instance has been stopped. 233 func (s *CommonProvisionerSuite) checkStopInstances(c *gc.C, instances ...instance.Instance) { 234 s.checkStopSomeInstances(c, instances, nil) 235 } 236 237 // checkStopSomeInstances checks that instancesToStop are stopped while instancesToKeep are not. 238 func (s *CommonProvisionerSuite) checkStopSomeInstances(c *gc.C, 239 instancesToStop []instance.Instance, instancesToKeep []instance.Instance) { 240 241 s.BackingState.StartSync() 242 instanceIdsToStop := set.NewStrings() 243 for _, instance := range instancesToStop { 244 instanceIdsToStop.Add(string(instance.Id())) 245 } 246 instanceIdsToKeep := set.NewStrings() 247 for _, instance := range instancesToKeep { 248 instanceIdsToKeep.Add(string(instance.Id())) 249 } 250 // Continue checking for stop instance calls until all the instances we 251 // are waiting on to finish, actually finish, or we time out. 252 for !instanceIdsToStop.IsEmpty() { 253 select { 254 case o := <-s.op: 255 switch o := o.(type) { 256 case dummy.OpStopInstances: 257 for _, id := range o.Ids { 258 instId := string(id) 259 instanceIdsToStop.Remove(instId) 260 if instanceIdsToKeep.Contains(instId) { 261 c.Errorf("provisioner unexpectedly stopped instance %s", instId) 262 } 263 } 264 default: 265 c.Fatalf("unexpected operation %#v", o) 266 return 267 } 268 case <-time.After(2 * time.Second): 269 c.Fatalf("provisioner did not stop an instance") 270 return 271 } 272 } 273 } 274 275 func (s *CommonProvisionerSuite) waitMachine(c *gc.C, m *state.Machine, check func() bool) { 276 // TODO(jam): We need to grow a new method on NotifyWatcherC 277 // that calls StartSync while waiting for changes, then 278 // waitMachine and waitHardwareCharacteristics can use that 279 // instead 280 w := m.Watch() 281 defer stop(c, w) 282 timeout := time.After(coretesting.LongWait) 283 resync := time.After(0) 284 for { 285 select { 286 case <-w.Changes(): 287 if check() { 288 return 289 } 290 case <-resync: 291 resync = time.After(coretesting.ShortWait) 292 s.BackingState.StartSync() 293 case <-timeout: 294 c.Fatalf("machine %v wait timed out", m) 295 } 296 } 297 } 298 299 func (s *CommonProvisionerSuite) waitHardwareCharacteristics(c *gc.C, m *state.Machine, check func() bool) { 300 w := m.WatchHardwareCharacteristics() 301 defer stop(c, w) 302 timeout := time.After(coretesting.LongWait) 303 resync := time.After(0) 304 for { 305 select { 306 case <-w.Changes(): 307 if check() { 308 return 309 } 310 case <-resync: 311 resync = time.After(coretesting.ShortWait) 312 s.BackingState.StartSync() 313 case <-timeout: 314 c.Fatalf("hardware characteristics for machine %v wait timed out", m) 315 } 316 } 317 } 318 319 // waitRemoved waits for the supplied machine to be removed from state. 320 func (s *CommonProvisionerSuite) waitRemoved(c *gc.C, m *state.Machine) { 321 s.waitMachine(c, m, func() bool { 322 err := m.Refresh() 323 if errors.IsNotFound(err) { 324 return true 325 } 326 c.Assert(err, gc.IsNil) 327 c.Logf("machine %v is still %s", m, m.Life()) 328 return false 329 }) 330 } 331 332 // waitInstanceId waits until the supplied machine has an instance id, then 333 // asserts it is as expected. 334 func (s *CommonProvisionerSuite) waitInstanceId(c *gc.C, m *state.Machine, expect instance.Id) { 335 s.waitHardwareCharacteristics(c, m, func() bool { 336 if actual, err := m.InstanceId(); err == nil { 337 c.Assert(actual, gc.Equals, expect) 338 return true 339 } else if !state.IsNotProvisionedError(err) { 340 // We don't expect any errors. 341 panic(err) 342 } 343 c.Logf("machine %v is still unprovisioned", m) 344 return false 345 }) 346 } 347 348 func (s *CommonProvisionerSuite) newEnvironProvisioner(c *gc.C) provisioner.Provisioner { 349 machineTag := "machine-0" 350 agentConfig := s.AgentConfigForTag(c, machineTag) 351 return provisioner.NewEnvironProvisioner(s.provisioner, agentConfig) 352 } 353 354 func (s *CommonProvisionerSuite) addMachine() (*state.Machine, error) { 355 return s.addMachineWithRequestedNetworks(nil, s.defaultConstraints) 356 } 357 358 func (s *CommonProvisionerSuite) addMachineWithRequestedNetworks(networks []string, cons constraints.Value) (*state.Machine, error) { 359 return s.BackingState.AddOneMachine(state.MachineTemplate{ 360 Series: coretesting.FakeDefaultSeries, 361 Jobs: []state.MachineJob{state.JobHostUnits}, 362 Constraints: cons, 363 RequestedNetworks: networks, 364 }) 365 } 366 367 func (s *ProvisionerSuite) SetUpTest(c *gc.C) { 368 s.CommonProvisionerSuite.SetUpTest(c) 369 s.CommonProvisionerSuite.setupEnvironmentManager(c) 370 } 371 372 func (s *ProvisionerSuite) TestProvisionerStartStop(c *gc.C) { 373 p := s.newEnvironProvisioner(c) 374 c.Assert(p.Stop(), gc.IsNil) 375 } 376 377 func (s *ProvisionerSuite) TestSimple(c *gc.C) { 378 p := s.newEnvironProvisioner(c) 379 defer stop(c, p) 380 381 // Check that an instance is provisioned when the machine is created... 382 m, err := s.addMachine() 383 c.Assert(err, gc.IsNil) 384 instance := s.checkStartInstance(c, m) 385 386 // ...and removed, along with the machine, when the machine is Dead. 387 c.Assert(m.EnsureDead(), gc.IsNil) 388 s.checkStopInstances(c, instance) 389 s.waitRemoved(c, m) 390 } 391 392 func (s *ProvisionerSuite) TestConstraints(c *gc.C) { 393 // Create a machine with non-standard constraints. 394 m, err := s.addMachine() 395 c.Assert(err, gc.IsNil) 396 cons := constraints.MustParse("mem=8G arch=amd64 cpu-cores=2 root-disk=10G") 397 err = m.SetConstraints(cons) 398 c.Assert(err, gc.IsNil) 399 400 // Start a provisioner and check those constraints are used. 401 p := s.newEnvironProvisioner(c) 402 defer stop(c, p) 403 s.checkStartInstanceCustom(c, m, "pork", cons, nil, nil, true) 404 } 405 406 func (s *ProvisionerSuite) TestProvisionerSetsErrorStatusWhenNoToolsAreAvailable(c *gc.C) { 407 p := s.newEnvironProvisioner(c) 408 defer stop(c, p) 409 410 // Check that an instance is not provisioned when the machine is created... 411 m, err := s.BackingState.AddOneMachine(state.MachineTemplate{ 412 // We need a valid series that has no tools uploaded 413 Series: "raring", 414 Jobs: []state.MachineJob{state.JobHostUnits}, 415 Constraints: s.defaultConstraints, 416 }) 417 c.Assert(err, gc.IsNil) 418 s.checkNoOperations(c) 419 420 t0 := time.Now() 421 for time.Since(t0) < coretesting.LongWait { 422 // And check the machine status is set to error. 423 status, info, _, err := m.Status() 424 c.Assert(err, gc.IsNil) 425 if status == params.StatusPending { 426 time.Sleep(coretesting.ShortWait) 427 continue 428 } 429 c.Assert(status, gc.Equals, params.StatusError) 430 c.Assert(info, gc.Equals, "no matching tools available") 431 break 432 } 433 434 // Restart the PA to make sure the machine is skipped again. 435 stop(c, p) 436 p = s.newEnvironProvisioner(c) 437 defer stop(c, p) 438 s.checkNoOperations(c) 439 } 440 441 func (s *ProvisionerSuite) TestProvisionerSetsErrorStatusWhenStartInstanceFailed(c *gc.C) { 442 brokenMsg := breakDummyProvider(c, s.State, "StartInstance") 443 p := s.newEnvironProvisioner(c) 444 defer stop(c, p) 445 446 // Check that an instance is not provisioned when the machine is created... 447 m, err := s.addMachine() 448 c.Assert(err, gc.IsNil) 449 s.checkNoOperations(c) 450 451 t0 := time.Now() 452 for time.Since(t0) < coretesting.LongWait { 453 // And check the machine status is set to error. 454 status, info, _, err := m.Status() 455 c.Assert(err, gc.IsNil) 456 if status == params.StatusPending { 457 time.Sleep(coretesting.ShortWait) 458 continue 459 } 460 c.Assert(status, gc.Equals, params.StatusError) 461 c.Assert(info, gc.Equals, brokenMsg) 462 break 463 } 464 465 // Unbreak the environ config. 466 err = s.fixEnvironment(c) 467 c.Assert(err, gc.IsNil) 468 469 // Restart the PA to make sure the machine is skipped again. 470 stop(c, p) 471 p = s.newEnvironProvisioner(c) 472 defer stop(c, p) 473 s.checkNoOperations(c) 474 } 475 476 func (s *ProvisionerSuite) TestProvisioningDoesNotOccurForContainers(c *gc.C) { 477 p := s.newEnvironProvisioner(c) 478 defer stop(c, p) 479 480 // create a machine to host the container. 481 m, err := s.addMachine() 482 c.Assert(err, gc.IsNil) 483 inst := s.checkStartInstance(c, m) 484 485 // make a container on the machine we just created 486 template := state.MachineTemplate{ 487 Series: coretesting.FakeDefaultSeries, 488 Jobs: []state.MachineJob{state.JobHostUnits}, 489 } 490 container, err := s.State.AddMachineInsideMachine(template, m.Id(), instance.LXC) 491 c.Assert(err, gc.IsNil) 492 493 // the PA should not attempt to create it 494 s.checkNoOperations(c) 495 496 // cleanup 497 c.Assert(container.EnsureDead(), gc.IsNil) 498 c.Assert(container.Remove(), gc.IsNil) 499 c.Assert(m.EnsureDead(), gc.IsNil) 500 s.checkStopInstances(c, inst) 501 s.waitRemoved(c, m) 502 } 503 504 func (s *ProvisionerSuite) TestProvisioningMachinesWithRequestedNetworks(c *gc.C) { 505 p := s.newEnvironProvisioner(c) 506 defer stop(c, p) 507 508 // Add and provision a machine with networks specified. 509 requestedNetworks := []string{"net1", "net2"} 510 cons := constraints.MustParse(s.defaultConstraints.String(), "networks=^net3,^net4") 511 expectNetworkInfo := []network.Info{{ 512 MACAddress: "aa:bb:cc:dd:ee:f0", 513 InterfaceName: "eth0", 514 ProviderId: "net1", 515 NetworkName: "net1", 516 VLANTag: 0, 517 CIDR: "0.1.2.0/24", 518 }, { 519 MACAddress: "aa:bb:cc:dd:ee:f1", 520 InterfaceName: "eth1", 521 ProviderId: "net2", 522 NetworkName: "net2", 523 VLANTag: 1, 524 CIDR: "0.2.2.0/24", 525 }} 526 m, err := s.addMachineWithRequestedNetworks(requestedNetworks, cons) 527 c.Assert(err, gc.IsNil) 528 inst := s.checkStartInstanceCustom( 529 c, m, "pork", cons, 530 requestedNetworks, 531 expectNetworkInfo, true) 532 533 _, err = s.State.Network("net1") 534 c.Assert(err, gc.IsNil) 535 _, err = s.State.Network("net2") 536 c.Assert(err, gc.IsNil) 537 _, err = s.State.Network("net3") 538 c.Assert(err, jc.Satisfies, errors.IsNotFound) 539 _, err = s.State.Network("net4") 540 c.Assert(err, jc.Satisfies, errors.IsNotFound) 541 ifaces, err := m.NetworkInterfaces() 542 c.Assert(err, gc.IsNil) 543 c.Assert(ifaces, gc.HasLen, 2) 544 545 // Cleanup. 546 c.Assert(m.EnsureDead(), gc.IsNil) 547 s.checkStopInstances(c, inst) 548 s.waitRemoved(c, m) 549 } 550 551 func (s *ProvisionerSuite) TestSetInstanceInfoFailureSetsErrorStatusAndStopsInstanceButKeepsGoing(c *gc.C) { 552 p := s.newEnvironProvisioner(c) 553 defer stop(c, p) 554 555 // Add and provision a machine with networks specified. 556 networks := []string{"bad-net1"} 557 // "bad-" prefix for networks causes dummy provider to report 558 // invalid network.Info. 559 expectNetworkInfo := []network.Info{ 560 {ProviderId: "bad-net1", NetworkName: "bad-net1", CIDR: "invalid"}, 561 } 562 m, err := s.addMachineWithRequestedNetworks(networks, constraints.Value{}) 563 c.Assert(err, gc.IsNil) 564 inst := s.checkStartInstanceCustom( 565 c, m, "pork", constraints.Value{}, 566 networks, expectNetworkInfo, false) 567 568 // Ensure machine error status was set. 569 t0 := time.Now() 570 for time.Since(t0) < coretesting.LongWait { 571 // And check the machine status is set to error. 572 status, info, _, err := m.Status() 573 c.Assert(err, gc.IsNil) 574 if status == params.StatusPending { 575 time.Sleep(coretesting.ShortWait) 576 continue 577 } 578 c.Assert(status, gc.Equals, params.StatusError) 579 c.Assert(info, gc.Matches, `aborted instance "dummyenv-0": cannot add network "bad-net1": invalid CIDR address: invalid`) 580 break 581 } 582 s.checkStopInstances(c, inst) 583 584 // Make sure the task didn't stop with an error 585 died := make(chan error) 586 go func() { 587 died <- p.Wait() 588 }() 589 select { 590 case <-time.After(coretesting.LongWait): 591 case err = <-died: 592 c.Fatalf("provisioner task died unexpectedly with err: %v", err) 593 } 594 595 // Restart the PA to make sure the machine is not retried. 596 stop(c, p) 597 p = s.newEnvironProvisioner(c) 598 defer stop(c, p) 599 600 s.checkNoOperations(c) 601 } 602 603 func (s *ProvisionerSuite) TestProvisioningDoesNotOccurWithAnInvalidEnvironment(c *gc.C) { 604 s.invalidateEnvironment(c) 605 606 p := s.newEnvironProvisioner(c) 607 defer stop(c, p) 608 609 // try to create a machine 610 _, err := s.addMachine() 611 c.Assert(err, gc.IsNil) 612 613 // the PA should not create it 614 s.checkNoOperations(c) 615 } 616 617 func (s *ProvisionerSuite) TestProvisioningOccursWithFixedEnvironment(c *gc.C) { 618 s.invalidateEnvironment(c) 619 620 p := s.newEnvironProvisioner(c) 621 defer stop(c, p) 622 623 // try to create a machine 624 m, err := s.addMachine() 625 c.Assert(err, gc.IsNil) 626 627 // the PA should not create it 628 s.checkNoOperations(c) 629 630 err = s.fixEnvironment(c) 631 c.Assert(err, gc.IsNil) 632 633 s.checkStartInstance(c, m) 634 } 635 636 func (s *ProvisionerSuite) TestProvisioningDoesOccurAfterInvalidEnvironmentPublished(c *gc.C) { 637 p := s.newEnvironProvisioner(c) 638 defer stop(c, p) 639 640 // place a new machine into the state 641 m, err := s.addMachine() 642 c.Assert(err, gc.IsNil) 643 644 s.checkStartInstance(c, m) 645 646 s.invalidateEnvironment(c) 647 648 // create a second machine 649 m, err = s.addMachine() 650 c.Assert(err, gc.IsNil) 651 652 // the PA should create it using the old environment 653 s.checkStartInstance(c, m) 654 } 655 656 func (s *ProvisionerSuite) TestProvisioningDoesNotProvisionTheSameMachineAfterRestart(c *gc.C) { 657 p := s.newEnvironProvisioner(c) 658 defer stop(c, p) 659 660 // create a machine 661 m, err := s.addMachine() 662 c.Assert(err, gc.IsNil) 663 s.checkStartInstance(c, m) 664 665 // restart the PA 666 stop(c, p) 667 p = s.newEnvironProvisioner(c) 668 defer stop(c, p) 669 670 // check that there is only one machine provisioned. 671 machines, err := s.State.AllMachines() 672 c.Assert(err, gc.IsNil) 673 c.Check(len(machines), gc.Equals, 2) 674 c.Check(machines[0].Id(), gc.Equals, "0") 675 c.Check(machines[1].CheckProvisioned("fake_nonce"), jc.IsFalse) 676 677 // the PA should not create it a second time 678 s.checkNoOperations(c) 679 } 680 681 func (s *ProvisionerSuite) TestProvisioningStopsInstances(c *gc.C) { 682 p := s.newEnvironProvisioner(c) 683 defer stop(c, p) 684 685 // create a machine 686 m0, err := s.addMachine() 687 c.Assert(err, gc.IsNil) 688 i0 := s.checkStartInstance(c, m0) 689 690 // create a second machine 691 m1, err := s.addMachine() 692 c.Assert(err, gc.IsNil) 693 i1 := s.checkStartInstance(c, m1) 694 stop(c, p) 695 696 // mark the first machine as dead 697 c.Assert(m0.EnsureDead(), gc.IsNil) 698 699 // remove the second machine entirely 700 c.Assert(m1.EnsureDead(), gc.IsNil) 701 c.Assert(m1.Remove(), gc.IsNil) 702 703 // start a new provisioner to shut them both down 704 p = s.newEnvironProvisioner(c) 705 defer stop(c, p) 706 s.checkStopInstances(c, i0, i1) 707 s.waitRemoved(c, m0) 708 } 709 710 func (s *ProvisionerSuite) TestDyingMachines(c *gc.C) { 711 p := s.newEnvironProvisioner(c) 712 defer stop(c, p) 713 714 // provision a machine 715 m0, err := s.addMachine() 716 c.Assert(err, gc.IsNil) 717 s.checkStartInstance(c, m0) 718 719 // stop the provisioner and make the machine dying 720 stop(c, p) 721 err = m0.Destroy() 722 c.Assert(err, gc.IsNil) 723 724 // add a new, dying, unprovisioned machine 725 m1, err := s.addMachine() 726 c.Assert(err, gc.IsNil) 727 err = m1.Destroy() 728 c.Assert(err, gc.IsNil) 729 730 // start the provisioner and wait for it to reap the useless machine 731 p = s.newEnvironProvisioner(c) 732 defer stop(c, p) 733 s.checkNoOperations(c) 734 s.waitRemoved(c, m1) 735 736 // verify the other one's still fine 737 err = m0.Refresh() 738 c.Assert(err, gc.IsNil) 739 c.Assert(m0.Life(), gc.Equals, state.Dying) 740 } 741 742 func (s *ProvisionerSuite) TestProvisioningRecoversAfterInvalidEnvironmentPublished(c *gc.C) { 743 p := s.newEnvironProvisioner(c) 744 defer stop(c, p) 745 746 // place a new machine into the state 747 m, err := s.addMachine() 748 c.Assert(err, gc.IsNil) 749 s.checkStartInstance(c, m) 750 751 s.invalidateEnvironment(c) 752 s.BackingState.StartSync() 753 754 // create a second machine 755 m, err = s.addMachine() 756 c.Assert(err, gc.IsNil) 757 758 // the PA should create it using the old environment 759 s.checkStartInstance(c, m) 760 761 err = s.fixEnvironment(c) 762 c.Assert(err, gc.IsNil) 763 764 // insert our observer 765 cfgObserver := make(chan *config.Config, 1) 766 provisioner.SetObserver(p, cfgObserver) 767 768 err = s.State.UpdateEnvironConfig(map[string]interface{}{"secret": "beef"}, nil, nil) 769 c.Assert(err, gc.IsNil) 770 771 s.BackingState.StartSync() 772 773 // wait for the PA to load the new configuration 774 select { 775 case <-cfgObserver: 776 case <-time.After(coretesting.LongWait): 777 c.Fatalf("PA did not action config change") 778 } 779 780 // create a third machine 781 m, err = s.addMachine() 782 c.Assert(err, gc.IsNil) 783 784 // the PA should create it using the new environment 785 s.checkStartInstanceCustom(c, m, "beef", s.defaultConstraints, nil, nil, true) 786 } 787 788 func (s *ProvisionerSuite) TestProvisioningSafeMode(c *gc.C) { 789 p := s.newEnvironProvisioner(c) 790 defer stop(c, p) 791 792 // create a machine 793 m0, err := s.addMachine() 794 c.Assert(err, gc.IsNil) 795 i0 := s.checkStartInstance(c, m0) 796 797 // create a second machine 798 m1, err := s.addMachine() 799 c.Assert(err, gc.IsNil) 800 i1 := s.checkStartInstance(c, m1) 801 stop(c, p) 802 803 // mark the first machine as dead 804 c.Assert(m0.EnsureDead(), gc.IsNil) 805 806 // remove the second machine entirely from state 807 c.Assert(m1.EnsureDead(), gc.IsNil) 808 c.Assert(m1.Remove(), gc.IsNil) 809 810 // turn on safe mode 811 attrs := map[string]interface{}{"provisioner-safe-mode": true} 812 err = s.State.UpdateEnvironConfig(attrs, nil, nil) 813 c.Assert(err, gc.IsNil) 814 815 // start a new provisioner to shut down only the machine still in state. 816 p = s.newEnvironProvisioner(c) 817 defer stop(c, p) 818 s.checkStopSomeInstances(c, []instance.Instance{i0}, []instance.Instance{i1}) 819 s.waitRemoved(c, m0) 820 } 821 822 func (s *ProvisionerSuite) TestProvisioningSafeModeChange(c *gc.C) { 823 p := s.newEnvironProvisioner(c) 824 defer stop(c, p) 825 826 // First check that safe mode is initially off. 827 828 // create a machine 829 m0, err := s.addMachine() 830 c.Assert(err, gc.IsNil) 831 i0 := s.checkStartInstance(c, m0) 832 833 // create a second machine 834 m1, err := s.addMachine() 835 c.Assert(err, gc.IsNil) 836 i1 := s.checkStartInstance(c, m1) 837 838 // mark the first machine as dead 839 c.Assert(m0.EnsureDead(), gc.IsNil) 840 841 // remove the second machine entirely from state 842 c.Assert(m1.EnsureDead(), gc.IsNil) 843 c.Assert(m1.Remove(), gc.IsNil) 844 845 s.checkStopInstances(c, i0, i1) 846 s.waitRemoved(c, m0) 847 848 // insert our observer 849 cfgObserver := make(chan *config.Config, 1) 850 provisioner.SetObserver(p, cfgObserver) 851 852 // turn on safe mode 853 attrs := map[string]interface{}{"provisioner-safe-mode": true} 854 err = s.State.UpdateEnvironConfig(attrs, nil, nil) 855 c.Assert(err, gc.IsNil) 856 857 s.BackingState.StartSync() 858 859 // wait for the PA to load the new configuration 860 select { 861 case <-cfgObserver: 862 case <-time.After(coretesting.LongWait): 863 c.Fatalf("PA did not action config change") 864 } 865 866 // Now check that the provisioner has noticed safe mode is on. 867 868 // create a machine 869 m3, err := s.addMachine() 870 c.Assert(err, gc.IsNil) 871 i3 := s.checkStartInstance(c, m3) 872 873 // create an instance out of band 874 i4 := s.startUnknownInstance(c, "999") 875 876 // mark the machine as dead 877 c.Assert(m3.EnsureDead(), gc.IsNil) 878 879 // check the machine's instance is stopped, and the other isn't 880 s.checkStopSomeInstances(c, []instance.Instance{i3}, []instance.Instance{i4}) 881 s.waitRemoved(c, m3) 882 } 883 884 func (s *ProvisionerSuite) newProvisionerTask(c *gc.C, safeMode bool, broker environs.InstanceBroker) provisioner.ProvisionerTask { 885 machineWatcher, err := s.provisioner.WatchEnvironMachines() 886 c.Assert(err, gc.IsNil) 887 retryWatcher, err := s.provisioner.WatchMachineErrorRetry() 888 c.Assert(err, gc.IsNil) 889 auth, err := environs.NewAPIAuthenticator(s.provisioner) 890 c.Assert(err, gc.IsNil) 891 return provisioner.NewProvisionerTask( 892 "machine-0", safeMode, s.provisioner, 893 machineWatcher, retryWatcher, broker, auth) 894 } 895 896 func (s *ProvisionerSuite) TestTurningOffSafeModeReapsUnknownInstances(c *gc.C) { 897 task := s.newProvisionerTask(c, true, s.APIConn.Environ) 898 defer stop(c, task) 899 900 // Initially create a machine, and an unknown instance, with safe mode on. 901 m0, err := s.addMachine() 902 c.Assert(err, gc.IsNil) 903 i0 := s.checkStartInstance(c, m0) 904 i1 := s.startUnknownInstance(c, "999") 905 906 // mark the first machine as dead 907 c.Assert(m0.EnsureDead(), gc.IsNil) 908 909 // with safe mode on, only one of the machines is stopped. 910 s.checkStopSomeInstances(c, []instance.Instance{i0}, []instance.Instance{i1}) 911 s.waitRemoved(c, m0) 912 913 // turn off safe mode and check that the other machine is now stopped also. 914 task.SetSafeMode(false) 915 s.checkStopInstances(c, i1) 916 } 917 918 func (s *ProvisionerSuite) TestProvisionerRetriesTransientErrors(c *gc.C) { 919 s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond) 920 var e environs.Environ = &mockBroker{Environ: s.APIConn.Environ, retryCount: make(map[string]int)} 921 task := s.newProvisionerTask(c, false, e) 922 defer stop(c, task) 923 924 // Provision some machines, some will be started first time, 925 // another will require retries. 926 m1, err := s.addMachine() 927 c.Assert(err, gc.IsNil) 928 s.checkStartInstance(c, m1) 929 m2, err := s.addMachine() 930 c.Assert(err, gc.IsNil) 931 s.checkStartInstance(c, m2) 932 m3, err := s.addMachine() 933 c.Assert(err, gc.IsNil) 934 m4, err := s.addMachine() 935 c.Assert(err, gc.IsNil) 936 937 // mockBroker will fail to start machine-3 several times; 938 // keep setting the transient flag to retry until the 939 // instance has started. 940 thatsAllFolks := make(chan struct{}) 941 go func() { 942 for { 943 select { 944 case <-thatsAllFolks: 945 return 946 case <-time.After(coretesting.ShortWait): 947 err := m3.SetStatus(params.StatusError, "info", params.StatusData{"transient": true}) 948 c.Assert(err, gc.IsNil) 949 } 950 } 951 }() 952 s.checkStartInstance(c, m3) 953 close(thatsAllFolks) 954 955 // Machine 4 is never provisioned. 956 status, _, _, err := m4.Status() 957 c.Assert(err, gc.IsNil) 958 c.Assert(status, gc.Equals, params.StatusError) 959 _, err = m4.InstanceId() 960 c.Assert(err, jc.Satisfies, state.IsNotProvisionedError) 961 } 962 963 type mockBroker struct { 964 environs.Environ 965 retryCount map[string]int 966 } 967 968 func (b *mockBroker) StartInstance(args environs.StartInstanceParams) (instance.Instance, *instance.HardwareCharacteristics, []network.Info, error) { 969 // All machines except machines 3, 4 are provisioned successfully the first time. 970 // Machines 3 is provisioned after some attempts have been made. 971 // Machine 4 is never provisioned. 972 id := args.MachineConfig.MachineId 973 retries := b.retryCount[id] 974 if (id != "3" && id != "4") || retries > 2 { 975 return b.Environ.StartInstance(args) 976 } else { 977 b.retryCount[id] = retries + 1 978 } 979 return nil, nil, nil, fmt.Errorf("error: some error") 980 } 981 982 func (b *mockBroker) GetToolsSources() ([]simplestreams.DataSource, error) { 983 return b.Environ.(tools.SupportsCustomSources).GetToolsSources() 984 }