github.com/mattyw/juju@v0.0.0-20140610034352-732aecd63861/worker/provisioner/provisioner_test.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package provisioner_test 5 6 import ( 7 "fmt" 8 "strings" 9 "time" 10 11 "github.com/juju/errors" 12 "github.com/juju/names" 13 jc "github.com/juju/testing/checkers" 14 "github.com/juju/utils" 15 "github.com/juju/utils/set" 16 gc "launchpad.net/gocheck" 17 18 "github.com/juju/juju/constraints" 19 "github.com/juju/juju/environs" 20 "github.com/juju/juju/environs/config" 21 "github.com/juju/juju/environs/network" 22 "github.com/juju/juju/environs/simplestreams" 23 "github.com/juju/juju/environs/tools" 24 "github.com/juju/juju/instance" 25 "github.com/juju/juju/juju/testing" 26 "github.com/juju/juju/provider/dummy" 27 "github.com/juju/juju/state" 28 "github.com/juju/juju/state/api" 29 "github.com/juju/juju/state/api/params" 30 apiprovisioner "github.com/juju/juju/state/api/provisioner" 31 apiserverprovisioner "github.com/juju/juju/state/apiserver/provisioner" 32 coretesting "github.com/juju/juju/testing" 33 "github.com/juju/juju/worker/provisioner" 34 ) 35 36 type CommonProvisionerSuite struct { 37 testing.JujuConnSuite 38 op <-chan dummy.Operation 39 cfg *config.Config 40 // defaultConstraints are used when adding a machine and then later in test assertions. 41 defaultConstraints constraints.Value 42 43 st *api.State 44 provisioner *apiprovisioner.State 45 } 46 47 type ProvisionerSuite struct { 48 CommonProvisionerSuite 49 } 50 51 var _ = gc.Suite(&ProvisionerSuite{}) 52 53 var veryShortAttempt = utils.AttemptStrategy{ 54 Total: 1 * time.Second, 55 Delay: 80 * time.Millisecond, 56 } 57 58 func (s *CommonProvisionerSuite) SetUpSuite(c *gc.C) { 59 s.JujuConnSuite.SetUpSuite(c) 60 s.defaultConstraints = constraints.MustParse("arch=amd64 mem=4G cpu-cores=1 root-disk=8G") 61 } 62 63 func (s *CommonProvisionerSuite) SetUpTest(c *gc.C) { 64 // Disable the default state policy, because the 65 // provisioner needs to be able to test pathological 66 // scenarios where a machine exists in state with 67 // invalid environment config. 68 dummy.SetStatePolicy(nil) 69 70 s.JujuConnSuite.SetUpTest(c) 71 // Create the operations channel with more than enough space 72 // for those tests that don't listen on it. 73 op := make(chan dummy.Operation, 500) 74 dummy.Listen(op) 75 s.op = op 76 77 cfg, err := s.State.EnvironConfig() 78 c.Assert(err, gc.IsNil) 79 s.cfg = cfg 80 } 81 82 func (s *CommonProvisionerSuite) APILogin(c *gc.C, machine *state.Machine) { 83 if s.st != nil { 84 c.Assert(s.st.Close(), gc.IsNil) 85 } 86 password, err := utils.RandomPassword() 87 c.Assert(err, gc.IsNil) 88 err = machine.SetPassword(password) 89 c.Assert(err, gc.IsNil) 90 err = machine.SetProvisioned("i-fake", "fake_nonce", nil) 91 c.Assert(err, gc.IsNil) 92 s.st = s.OpenAPIAsMachine(c, machine.Tag(), password, "fake_nonce") 93 c.Assert(s.st, gc.NotNil) 94 c.Logf("API: login as %q successful", machine.Tag()) 95 s.provisioner = s.st.Provisioner() 96 c.Assert(s.provisioner, gc.NotNil) 97 } 98 99 // breakDummyProvider changes the environment config in state in a way 100 // that causes the given environMethod of the dummy provider to return 101 // an error, which is also returned as a message to be checked. 102 func breakDummyProvider(c *gc.C, st *state.State, environMethod string) string { 103 attrs := map[string]interface{}{"broken": environMethod} 104 err := st.UpdateEnvironConfig(attrs, nil, nil) 105 c.Assert(err, gc.IsNil) 106 return fmt.Sprintf("dummy.%s is broken", environMethod) 107 } 108 109 // setupEnvironmentManager adds an environment manager machine and login to the API. 110 func (s *CommonProvisionerSuite) setupEnvironmentManager(c *gc.C) { 111 machine, err := s.State.AddMachine("quantal", state.JobManageEnviron) 112 c.Assert(err, gc.IsNil) 113 c.Assert(machine.Id(), gc.Equals, "0") 114 err = machine.SetAddresses(instance.NewAddress("0.1.2.3", instance.NetworkUnknown)) 115 c.Assert(err, gc.IsNil) 116 s.APILogin(c, machine) 117 } 118 119 // invalidateEnvironment alters the environment configuration 120 // so the Settings returned from the watcher will not pass 121 // validation. 122 func (s *CommonProvisionerSuite) invalidateEnvironment(c *gc.C) { 123 st, err := state.Open(s.StateInfo(c), state.DefaultDialOpts(), state.Policy(nil)) 124 c.Assert(err, gc.IsNil) 125 defer st.Close() 126 attrs := map[string]interface{}{"type": "unknown"} 127 err = st.UpdateEnvironConfig(attrs, nil, nil) 128 c.Assert(err, gc.IsNil) 129 } 130 131 // fixEnvironment undoes the work of invalidateEnvironment. 132 func (s *CommonProvisionerSuite) fixEnvironment(c *gc.C) error { 133 st, err := state.Open(s.StateInfo(c), state.DefaultDialOpts(), state.Policy(nil)) 134 c.Assert(err, gc.IsNil) 135 defer st.Close() 136 attrs := map[string]interface{}{"type": s.cfg.AllAttrs()["type"]} 137 return st.UpdateEnvironConfig(attrs, nil, nil) 138 } 139 140 // stopper is stoppable. 141 type stopper interface { 142 Stop() error 143 } 144 145 // stop stops a stopper. 146 func stop(c *gc.C, s stopper) { 147 c.Assert(s.Stop(), gc.IsNil) 148 } 149 150 func (s *CommonProvisionerSuite) startUnknownInstance(c *gc.C, id string) instance.Instance { 151 instance, _ := testing.AssertStartInstance(c, s.Conn.Environ, id) 152 select { 153 case o := <-s.op: 154 switch o := o.(type) { 155 case dummy.OpStartInstance: 156 default: 157 c.Fatalf("unexpected operation %#v", o) 158 } 159 case <-time.After(coretesting.LongWait): 160 c.Fatalf("timed out waiting for startinstance operation") 161 } 162 return instance 163 } 164 165 func (s *CommonProvisionerSuite) checkStartInstance(c *gc.C, m *state.Machine) instance.Instance { 166 return s.checkStartInstanceCustom(c, m, "pork", s.defaultConstraints, nil, nil, true) 167 } 168 169 func (s *CommonProvisionerSuite) checkStartInstanceCustom(c *gc.C, m *state.Machine, secret string, cons constraints.Value, networks []string, networkInfo []network.Info, waitInstanceId bool) (inst instance.Instance) { 170 s.BackingState.StartSync() 171 for { 172 select { 173 case o := <-s.op: 174 switch o := o.(type) { 175 case dummy.OpStartInstance: 176 inst = o.Instance 177 if waitInstanceId { 178 s.waitInstanceId(c, m, inst.Id()) 179 } 180 181 // Check the instance was started with the expected params. 182 c.Assert(o.MachineId, gc.Equals, m.Id()) 183 nonceParts := strings.SplitN(o.MachineNonce, ":", 2) 184 c.Assert(nonceParts, gc.HasLen, 2) 185 c.Assert(nonceParts[0], gc.Equals, names.MachineTag("0")) 186 c.Assert(nonceParts[1], jc.Satisfies, utils.IsValidUUIDString) 187 c.Assert(o.Secret, gc.Equals, secret) 188 c.Assert(o.Networks, jc.DeepEquals, networks) 189 c.Assert(o.NetworkInfo, jc.DeepEquals, networkInfo) 190 191 // All provisioned machines in this test suite have 192 // their hardware characteristics attributes set to 193 // the same values as the constraints due to the dummy 194 // environment being used. 195 if !constraints.IsEmpty(&cons) { 196 c.Assert(o.Constraints, gc.DeepEquals, cons) 197 hc, err := m.HardwareCharacteristics() 198 c.Assert(err, gc.IsNil) 199 c.Assert(*hc, gc.DeepEquals, instance.HardwareCharacteristics{ 200 Arch: cons.Arch, 201 Mem: cons.Mem, 202 RootDisk: cons.RootDisk, 203 CpuCores: cons.CpuCores, 204 CpuPower: cons.CpuPower, 205 Tags: cons.Tags, 206 }) 207 } 208 return 209 default: 210 c.Logf("ignoring unexpected operation %#v", o) 211 } 212 case <-time.After(2 * time.Second): 213 c.Fatalf("provisioner did not start an instance") 214 return 215 } 216 } 217 return 218 } 219 220 // checkNoOperations checks that the environ was not operated upon. 221 func (s *CommonProvisionerSuite) checkNoOperations(c *gc.C) { 222 s.BackingState.StartSync() 223 select { 224 case o := <-s.op: 225 c.Fatalf("unexpected operation %#v", o) 226 case <-time.After(coretesting.ShortWait): 227 return 228 } 229 } 230 231 // checkStopInstances checks that an instance has been stopped. 232 func (s *CommonProvisionerSuite) checkStopInstances(c *gc.C, instances ...instance.Instance) { 233 s.checkStopSomeInstances(c, instances, nil) 234 } 235 236 // checkStopSomeInstances checks that instancesToStop are stopped while instancesToKeep are not. 237 func (s *CommonProvisionerSuite) checkStopSomeInstances(c *gc.C, 238 instancesToStop []instance.Instance, instancesToKeep []instance.Instance) { 239 240 s.BackingState.StartSync() 241 instanceIdsToStop := set.NewStrings() 242 for _, instance := range instancesToStop { 243 instanceIdsToStop.Add(string(instance.Id())) 244 } 245 instanceIdsToKeep := set.NewStrings() 246 for _, instance := range instancesToKeep { 247 instanceIdsToKeep.Add(string(instance.Id())) 248 } 249 // Continue checking for stop instance calls until all the instances we 250 // are waiting on to finish, actually finish, or we time out. 251 for !instanceIdsToStop.IsEmpty() { 252 select { 253 case o := <-s.op: 254 switch o := o.(type) { 255 case dummy.OpStopInstances: 256 for _, id := range o.Ids { 257 instId := string(id) 258 instanceIdsToStop.Remove(instId) 259 if instanceIdsToKeep.Contains(instId) { 260 c.Errorf("provisioner unexpectedly stopped instance %s", instId) 261 } 262 } 263 default: 264 c.Fatalf("unexpected operation %#v", o) 265 return 266 } 267 case <-time.After(2 * time.Second): 268 c.Fatalf("provisioner did not stop an instance") 269 return 270 } 271 } 272 } 273 274 func (s *CommonProvisionerSuite) waitMachine(c *gc.C, m *state.Machine, check func() bool) { 275 // TODO(jam): We need to grow a new method on NotifyWatcherC 276 // that calls StartSync while waiting for changes, then 277 // waitMachine and waitHardwareCharacteristics can use that 278 // instead 279 w := m.Watch() 280 defer stop(c, w) 281 timeout := time.After(coretesting.LongWait) 282 resync := time.After(0) 283 for { 284 select { 285 case <-w.Changes(): 286 if check() { 287 return 288 } 289 case <-resync: 290 resync = time.After(coretesting.ShortWait) 291 s.BackingState.StartSync() 292 case <-timeout: 293 c.Fatalf("machine %v wait timed out", m) 294 } 295 } 296 } 297 298 func (s *CommonProvisionerSuite) waitHardwareCharacteristics(c *gc.C, m *state.Machine, check func() bool) { 299 w := m.WatchHardwareCharacteristics() 300 defer stop(c, w) 301 timeout := time.After(coretesting.LongWait) 302 resync := time.After(0) 303 for { 304 select { 305 case <-w.Changes(): 306 if check() { 307 return 308 } 309 case <-resync: 310 resync = time.After(coretesting.ShortWait) 311 s.BackingState.StartSync() 312 case <-timeout: 313 c.Fatalf("hardware characteristics for machine %v wait timed out", m) 314 } 315 } 316 } 317 318 // waitRemoved waits for the supplied machine to be removed from state. 319 func (s *CommonProvisionerSuite) waitRemoved(c *gc.C, m *state.Machine) { 320 s.waitMachine(c, m, func() bool { 321 err := m.Refresh() 322 if errors.IsNotFound(err) { 323 return true 324 } 325 c.Assert(err, gc.IsNil) 326 c.Logf("machine %v is still %s", m, m.Life()) 327 return false 328 }) 329 } 330 331 // waitInstanceId waits until the supplied machine has an instance id, then 332 // asserts it is as expected. 333 func (s *CommonProvisionerSuite) waitInstanceId(c *gc.C, m *state.Machine, expect instance.Id) { 334 s.waitHardwareCharacteristics(c, m, func() bool { 335 if actual, err := m.InstanceId(); err == nil { 336 c.Assert(actual, gc.Equals, expect) 337 return true 338 } else if !state.IsNotProvisionedError(err) { 339 // We don't expect any errors. 340 panic(err) 341 } 342 c.Logf("machine %v is still unprovisioned", m) 343 return false 344 }) 345 } 346 347 func (s *CommonProvisionerSuite) newEnvironProvisioner(c *gc.C) provisioner.Provisioner { 348 machineTag := "machine-0" 349 agentConfig := s.AgentConfigForTag(c, machineTag) 350 return provisioner.NewEnvironProvisioner(s.provisioner, agentConfig) 351 } 352 353 func (s *CommonProvisionerSuite) addMachine() (*state.Machine, error) { 354 return s.addMachineWithRequestedNetworks(nil, s.defaultConstraints) 355 } 356 357 func (s *CommonProvisionerSuite) addMachineWithRequestedNetworks(networks []string, cons constraints.Value) (*state.Machine, error) { 358 return s.BackingState.AddOneMachine(state.MachineTemplate{ 359 Series: coretesting.FakeDefaultSeries, 360 Jobs: []state.MachineJob{state.JobHostUnits}, 361 Constraints: cons, 362 RequestedNetworks: networks, 363 }) 364 } 365 366 func (s *ProvisionerSuite) SetUpTest(c *gc.C) { 367 s.CommonProvisionerSuite.SetUpTest(c) 368 s.CommonProvisionerSuite.setupEnvironmentManager(c) 369 } 370 371 func (s *ProvisionerSuite) TestProvisionerStartStop(c *gc.C) { 372 p := s.newEnvironProvisioner(c) 373 c.Assert(p.Stop(), gc.IsNil) 374 } 375 376 func (s *ProvisionerSuite) TestSimple(c *gc.C) { 377 p := s.newEnvironProvisioner(c) 378 defer stop(c, p) 379 380 // Check that an instance is provisioned when the machine is created... 381 m, err := s.addMachine() 382 c.Assert(err, gc.IsNil) 383 instance := s.checkStartInstance(c, m) 384 385 // ...and removed, along with the machine, when the machine is Dead. 386 c.Assert(m.EnsureDead(), gc.IsNil) 387 s.checkStopInstances(c, instance) 388 s.waitRemoved(c, m) 389 } 390 391 func (s *ProvisionerSuite) TestConstraints(c *gc.C) { 392 // Create a machine with non-standard constraints. 393 m, err := s.addMachine() 394 c.Assert(err, gc.IsNil) 395 cons := constraints.MustParse("mem=8G arch=amd64 cpu-cores=2 root-disk=10G") 396 err = m.SetConstraints(cons) 397 c.Assert(err, gc.IsNil) 398 399 // Start a provisioner and check those constraints are used. 400 p := s.newEnvironProvisioner(c) 401 defer stop(c, p) 402 s.checkStartInstanceCustom(c, m, "pork", cons, nil, nil, true) 403 } 404 405 func (s *ProvisionerSuite) TestProvisionerSetsErrorStatusWhenNoToolsAreAvailable(c *gc.C) { 406 p := s.newEnvironProvisioner(c) 407 defer stop(c, p) 408 409 // Check that an instance is not provisioned when the machine is created... 410 m, err := s.BackingState.AddOneMachine(state.MachineTemplate{ 411 // We need a valid series that has no tools uploaded 412 Series: "raring", 413 Jobs: []state.MachineJob{state.JobHostUnits}, 414 Constraints: s.defaultConstraints, 415 }) 416 c.Assert(err, gc.IsNil) 417 s.checkNoOperations(c) 418 419 t0 := time.Now() 420 for time.Since(t0) < coretesting.LongWait { 421 // And check the machine status is set to error. 422 status, info, _, err := m.Status() 423 c.Assert(err, gc.IsNil) 424 if status == params.StatusPending { 425 time.Sleep(coretesting.ShortWait) 426 continue 427 } 428 c.Assert(status, gc.Equals, params.StatusError) 429 c.Assert(info, gc.Equals, "no matching tools available") 430 break 431 } 432 433 // Restart the PA to make sure the machine is skipped again. 434 stop(c, p) 435 p = s.newEnvironProvisioner(c) 436 defer stop(c, p) 437 s.checkNoOperations(c) 438 } 439 440 func (s *ProvisionerSuite) TestProvisionerSetsErrorStatusWhenStartInstanceFailed(c *gc.C) { 441 brokenMsg := breakDummyProvider(c, s.State, "StartInstance") 442 p := s.newEnvironProvisioner(c) 443 defer stop(c, p) 444 445 // Check that an instance is not provisioned when the machine is created... 446 m, err := s.addMachine() 447 c.Assert(err, gc.IsNil) 448 s.checkNoOperations(c) 449 450 t0 := time.Now() 451 for time.Since(t0) < coretesting.LongWait { 452 // And check the machine status is set to error. 453 status, info, _, err := m.Status() 454 c.Assert(err, gc.IsNil) 455 if status == params.StatusPending { 456 time.Sleep(coretesting.ShortWait) 457 continue 458 } 459 c.Assert(status, gc.Equals, params.StatusError) 460 c.Assert(info, gc.Equals, brokenMsg) 461 break 462 } 463 464 // Unbreak the environ config. 465 err = s.fixEnvironment(c) 466 c.Assert(err, gc.IsNil) 467 468 // Restart the PA to make sure the machine is skipped again. 469 stop(c, p) 470 p = s.newEnvironProvisioner(c) 471 defer stop(c, p) 472 s.checkNoOperations(c) 473 } 474 475 func (s *ProvisionerSuite) TestProvisioningDoesNotOccurForContainers(c *gc.C) { 476 p := s.newEnvironProvisioner(c) 477 defer stop(c, p) 478 479 // create a machine to host the container. 480 m, err := s.addMachine() 481 c.Assert(err, gc.IsNil) 482 inst := s.checkStartInstance(c, m) 483 484 // make a container on the machine we just created 485 template := state.MachineTemplate{ 486 Series: coretesting.FakeDefaultSeries, 487 Jobs: []state.MachineJob{state.JobHostUnits}, 488 } 489 container, err := s.State.AddMachineInsideMachine(template, m.Id(), instance.LXC) 490 c.Assert(err, gc.IsNil) 491 492 // the PA should not attempt to create it 493 s.checkNoOperations(c) 494 495 // cleanup 496 c.Assert(container.EnsureDead(), gc.IsNil) 497 c.Assert(container.Remove(), gc.IsNil) 498 c.Assert(m.EnsureDead(), gc.IsNil) 499 s.checkStopInstances(c, inst) 500 s.waitRemoved(c, m) 501 } 502 503 func (s *ProvisionerSuite) TestProvisioningMachinesWithRequestedNetworks(c *gc.C) { 504 p := s.newEnvironProvisioner(c) 505 defer stop(c, p) 506 507 // Add and provision a machine with networks specified. 508 requestedNetworks := []string{"net1", "net2"} 509 cons := constraints.MustParse(s.defaultConstraints.String(), "networks=^net3,^net4") 510 expectNetworkInfo := []network.Info{{ 511 MACAddress: "aa:bb:cc:dd:ee:f0", 512 InterfaceName: "eth0", 513 ProviderId: "net1", 514 NetworkName: "net1", 515 VLANTag: 0, 516 CIDR: "0.1.2.0/24", 517 IsVirtual: false, 518 }, { 519 MACAddress: "aa:bb:cc:dd:ee:f1", 520 InterfaceName: "eth1", 521 ProviderId: "net2", 522 NetworkName: "net2", 523 VLANTag: 1, 524 CIDR: "0.2.2.0/24", 525 IsVirtual: true, 526 }} 527 m, err := s.addMachineWithRequestedNetworks(requestedNetworks, cons) 528 c.Assert(err, gc.IsNil) 529 inst := s.checkStartInstanceCustom( 530 c, m, "pork", cons, 531 requestedNetworks, 532 expectNetworkInfo, true) 533 534 _, err = s.State.Network("net1") 535 c.Assert(err, gc.IsNil) 536 _, err = s.State.Network("net2") 537 c.Assert(err, gc.IsNil) 538 _, err = s.State.Network("net3") 539 c.Assert(err, jc.Satisfies, errors.IsNotFound) 540 _, err = s.State.Network("net4") 541 c.Assert(err, jc.Satisfies, errors.IsNotFound) 542 ifaces, err := m.NetworkInterfaces() 543 c.Assert(err, gc.IsNil) 544 c.Assert(ifaces, gc.HasLen, 2) 545 546 // Cleanup. 547 c.Assert(m.EnsureDead(), gc.IsNil) 548 s.checkStopInstances(c, inst) 549 s.waitRemoved(c, m) 550 } 551 552 func (s *ProvisionerSuite) TestSetInstanceInfoFailureSetsErrorStatusAndStopsInstanceButKeepsGoing(c *gc.C) { 553 p := s.newEnvironProvisioner(c) 554 defer stop(c, p) 555 556 // Add and provision a machine with networks specified. 557 networks := []string{"bad-net1"} 558 // "bad-" prefix for networks causes dummy provider to report 559 // invalid network.Info. 560 expectNetworkInfo := []network.Info{ 561 {ProviderId: "bad-net1", NetworkName: "bad-net1", CIDR: "invalid"}, 562 } 563 m, err := s.addMachineWithRequestedNetworks(networks, constraints.Value{}) 564 c.Assert(err, gc.IsNil) 565 inst := s.checkStartInstanceCustom( 566 c, m, "pork", constraints.Value{}, 567 networks, expectNetworkInfo, false) 568 569 // Ensure machine error status was set. 570 t0 := time.Now() 571 for time.Since(t0) < coretesting.LongWait { 572 // And check the machine status is set to error. 573 status, info, _, err := m.Status() 574 c.Assert(err, gc.IsNil) 575 if status == params.StatusPending { 576 time.Sleep(coretesting.ShortWait) 577 continue 578 } 579 c.Assert(status, gc.Equals, params.StatusError) 580 c.Assert(info, gc.Matches, `aborted instance "dummyenv-0": cannot add network "bad-net1": invalid CIDR address: invalid`) 581 break 582 } 583 s.checkStopInstances(c, inst) 584 585 // Make sure the task didn't stop with an error 586 died := make(chan error) 587 go func() { 588 died <- p.Wait() 589 }() 590 select { 591 case <-time.After(coretesting.LongWait): 592 case err = <-died: 593 c.Fatalf("provisioner task died unexpectedly with err: %v", err) 594 } 595 596 // Restart the PA to make sure the machine is not retried. 597 stop(c, p) 598 p = s.newEnvironProvisioner(c) 599 defer stop(c, p) 600 601 s.checkNoOperations(c) 602 } 603 604 func (s *ProvisionerSuite) TestProvisioningDoesNotOccurWithAnInvalidEnvironment(c *gc.C) { 605 s.invalidateEnvironment(c) 606 607 p := s.newEnvironProvisioner(c) 608 defer stop(c, p) 609 610 // try to create a machine 611 _, err := s.addMachine() 612 c.Assert(err, gc.IsNil) 613 614 // the PA should not create it 615 s.checkNoOperations(c) 616 } 617 618 func (s *ProvisionerSuite) TestProvisioningOccursWithFixedEnvironment(c *gc.C) { 619 s.invalidateEnvironment(c) 620 621 p := s.newEnvironProvisioner(c) 622 defer stop(c, p) 623 624 // try to create a machine 625 m, err := s.addMachine() 626 c.Assert(err, gc.IsNil) 627 628 // the PA should not create it 629 s.checkNoOperations(c) 630 631 err = s.fixEnvironment(c) 632 c.Assert(err, gc.IsNil) 633 634 s.checkStartInstance(c, m) 635 } 636 637 func (s *ProvisionerSuite) TestProvisioningDoesOccurAfterInvalidEnvironmentPublished(c *gc.C) { 638 p := s.newEnvironProvisioner(c) 639 defer stop(c, p) 640 641 // place a new machine into the state 642 m, err := s.addMachine() 643 c.Assert(err, gc.IsNil) 644 645 s.checkStartInstance(c, m) 646 647 s.invalidateEnvironment(c) 648 649 // create a second machine 650 m, err = s.addMachine() 651 c.Assert(err, gc.IsNil) 652 653 // the PA should create it using the old environment 654 s.checkStartInstance(c, m) 655 } 656 657 func (s *ProvisionerSuite) TestProvisioningDoesNotProvisionTheSameMachineAfterRestart(c *gc.C) { 658 p := s.newEnvironProvisioner(c) 659 defer stop(c, p) 660 661 // create a machine 662 m, err := s.addMachine() 663 c.Assert(err, gc.IsNil) 664 s.checkStartInstance(c, m) 665 666 // restart the PA 667 stop(c, p) 668 p = s.newEnvironProvisioner(c) 669 defer stop(c, p) 670 671 // check that there is only one machine provisioned. 672 machines, err := s.State.AllMachines() 673 c.Assert(err, gc.IsNil) 674 c.Check(len(machines), gc.Equals, 2) 675 c.Check(machines[0].Id(), gc.Equals, "0") 676 c.Check(machines[1].CheckProvisioned("fake_nonce"), jc.IsFalse) 677 678 // the PA should not create it a second time 679 s.checkNoOperations(c) 680 } 681 682 func (s *ProvisionerSuite) TestProvisioningStopsInstances(c *gc.C) { 683 p := s.newEnvironProvisioner(c) 684 defer stop(c, p) 685 686 // create a machine 687 m0, err := s.addMachine() 688 c.Assert(err, gc.IsNil) 689 i0 := s.checkStartInstance(c, m0) 690 691 // create a second machine 692 m1, err := s.addMachine() 693 c.Assert(err, gc.IsNil) 694 i1 := s.checkStartInstance(c, m1) 695 stop(c, p) 696 697 // mark the first machine as dead 698 c.Assert(m0.EnsureDead(), gc.IsNil) 699 700 // remove the second machine entirely 701 c.Assert(m1.EnsureDead(), gc.IsNil) 702 c.Assert(m1.Remove(), gc.IsNil) 703 704 // start a new provisioner to shut them both down 705 p = s.newEnvironProvisioner(c) 706 defer stop(c, p) 707 s.checkStopInstances(c, i0, i1) 708 s.waitRemoved(c, m0) 709 } 710 711 func (s *ProvisionerSuite) TestDyingMachines(c *gc.C) { 712 p := s.newEnvironProvisioner(c) 713 defer stop(c, p) 714 715 // provision a machine 716 m0, err := s.addMachine() 717 c.Assert(err, gc.IsNil) 718 s.checkStartInstance(c, m0) 719 720 // stop the provisioner and make the machine dying 721 stop(c, p) 722 err = m0.Destroy() 723 c.Assert(err, gc.IsNil) 724 725 // add a new, dying, unprovisioned machine 726 m1, err := s.addMachine() 727 c.Assert(err, gc.IsNil) 728 err = m1.Destroy() 729 c.Assert(err, gc.IsNil) 730 731 // start the provisioner and wait for it to reap the useless machine 732 p = s.newEnvironProvisioner(c) 733 defer stop(c, p) 734 s.checkNoOperations(c) 735 s.waitRemoved(c, m1) 736 737 // verify the other one's still fine 738 err = m0.Refresh() 739 c.Assert(err, gc.IsNil) 740 c.Assert(m0.Life(), gc.Equals, state.Dying) 741 } 742 743 func (s *ProvisionerSuite) TestProvisioningRecoversAfterInvalidEnvironmentPublished(c *gc.C) { 744 p := s.newEnvironProvisioner(c) 745 defer stop(c, p) 746 747 // place a new machine into the state 748 m, err := s.addMachine() 749 c.Assert(err, gc.IsNil) 750 s.checkStartInstance(c, m) 751 752 s.invalidateEnvironment(c) 753 s.BackingState.StartSync() 754 755 // create a second machine 756 m, err = s.addMachine() 757 c.Assert(err, gc.IsNil) 758 759 // the PA should create it using the old environment 760 s.checkStartInstance(c, m) 761 762 err = s.fixEnvironment(c) 763 c.Assert(err, gc.IsNil) 764 765 // insert our observer 766 cfgObserver := make(chan *config.Config, 1) 767 provisioner.SetObserver(p, cfgObserver) 768 769 err = s.State.UpdateEnvironConfig(map[string]interface{}{"secret": "beef"}, nil, nil) 770 c.Assert(err, gc.IsNil) 771 772 s.BackingState.StartSync() 773 774 // wait for the PA to load the new configuration 775 select { 776 case <-cfgObserver: 777 case <-time.After(coretesting.LongWait): 778 c.Fatalf("PA did not action config change") 779 } 780 781 // create a third machine 782 m, err = s.addMachine() 783 c.Assert(err, gc.IsNil) 784 785 // the PA should create it using the new environment 786 s.checkStartInstanceCustom(c, m, "beef", s.defaultConstraints, nil, nil, true) 787 } 788 789 func (s *ProvisionerSuite) TestProvisioningSafeMode(c *gc.C) { 790 p := s.newEnvironProvisioner(c) 791 defer stop(c, p) 792 793 // create a machine 794 m0, err := s.addMachine() 795 c.Assert(err, gc.IsNil) 796 i0 := s.checkStartInstance(c, m0) 797 798 // create a second machine 799 m1, err := s.addMachine() 800 c.Assert(err, gc.IsNil) 801 i1 := s.checkStartInstance(c, m1) 802 stop(c, p) 803 804 // mark the first machine as dead 805 c.Assert(m0.EnsureDead(), gc.IsNil) 806 807 // remove the second machine entirely from state 808 c.Assert(m1.EnsureDead(), gc.IsNil) 809 c.Assert(m1.Remove(), gc.IsNil) 810 811 // turn on safe mode 812 attrs := map[string]interface{}{"provisioner-safe-mode": true} 813 err = s.State.UpdateEnvironConfig(attrs, nil, nil) 814 c.Assert(err, gc.IsNil) 815 816 // start a new provisioner to shut down only the machine still in state. 817 p = s.newEnvironProvisioner(c) 818 defer stop(c, p) 819 s.checkStopSomeInstances(c, []instance.Instance{i0}, []instance.Instance{i1}) 820 s.waitRemoved(c, m0) 821 } 822 823 func (s *ProvisionerSuite) TestProvisioningSafeModeChange(c *gc.C) { 824 p := s.newEnvironProvisioner(c) 825 defer stop(c, p) 826 827 // First check that safe mode is initially off. 828 829 // create a machine 830 m0, err := s.addMachine() 831 c.Assert(err, gc.IsNil) 832 i0 := s.checkStartInstance(c, m0) 833 834 // create a second machine 835 m1, err := s.addMachine() 836 c.Assert(err, gc.IsNil) 837 i1 := s.checkStartInstance(c, m1) 838 839 // mark the first machine as dead 840 c.Assert(m0.EnsureDead(), gc.IsNil) 841 842 // remove the second machine entirely from state 843 c.Assert(m1.EnsureDead(), gc.IsNil) 844 c.Assert(m1.Remove(), gc.IsNil) 845 846 s.checkStopInstances(c, i0, i1) 847 s.waitRemoved(c, m0) 848 849 // insert our observer 850 cfgObserver := make(chan *config.Config, 1) 851 provisioner.SetObserver(p, cfgObserver) 852 853 // turn on safe mode 854 attrs := map[string]interface{}{"provisioner-safe-mode": true} 855 err = s.State.UpdateEnvironConfig(attrs, nil, nil) 856 c.Assert(err, gc.IsNil) 857 858 s.BackingState.StartSync() 859 860 // wait for the PA to load the new configuration 861 select { 862 case <-cfgObserver: 863 case <-time.After(coretesting.LongWait): 864 c.Fatalf("PA did not action config change") 865 } 866 867 // Now check that the provisioner has noticed safe mode is on. 868 869 // create a machine 870 m3, err := s.addMachine() 871 c.Assert(err, gc.IsNil) 872 i3 := s.checkStartInstance(c, m3) 873 874 // create an instance out of band 875 i4 := s.startUnknownInstance(c, "999") 876 877 // mark the machine as dead 878 c.Assert(m3.EnsureDead(), gc.IsNil) 879 880 // check the machine's instance is stopped, and the other isn't 881 s.checkStopSomeInstances(c, []instance.Instance{i3}, []instance.Instance{i4}) 882 s.waitRemoved(c, m3) 883 } 884 885 func (s *ProvisionerSuite) newProvisionerTask(c *gc.C, safeMode bool, broker environs.InstanceBroker) provisioner.ProvisionerTask { 886 machineWatcher, err := s.provisioner.WatchEnvironMachines() 887 c.Assert(err, gc.IsNil) 888 retryWatcher, err := s.provisioner.WatchMachineErrorRetry() 889 c.Assert(err, gc.IsNil) 890 auth, err := environs.NewAPIAuthenticator(s.provisioner) 891 c.Assert(err, gc.IsNil) 892 return provisioner.NewProvisionerTask( 893 "machine-0", safeMode, s.provisioner, 894 machineWatcher, retryWatcher, broker, auth) 895 } 896 897 func (s *ProvisionerSuite) TestTurningOffSafeModeReapsUnknownInstances(c *gc.C) { 898 task := s.newProvisionerTask(c, true, s.APIConn.Environ) 899 defer stop(c, task) 900 901 // Initially create a machine, and an unknown instance, with safe mode on. 902 m0, err := s.addMachine() 903 c.Assert(err, gc.IsNil) 904 i0 := s.checkStartInstance(c, m0) 905 i1 := s.startUnknownInstance(c, "999") 906 907 // mark the first machine as dead 908 c.Assert(m0.EnsureDead(), gc.IsNil) 909 910 // with safe mode on, only one of the machines is stopped. 911 s.checkStopSomeInstances(c, []instance.Instance{i0}, []instance.Instance{i1}) 912 s.waitRemoved(c, m0) 913 914 // turn off safe mode and check that the other machine is now stopped also. 915 task.SetSafeMode(false) 916 s.checkStopInstances(c, i1) 917 } 918 919 func (s *ProvisionerSuite) TestProvisionerRetriesTransientErrors(c *gc.C) { 920 s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond) 921 var e environs.Environ = &mockBroker{Environ: s.APIConn.Environ, retryCount: make(map[string]int)} 922 task := s.newProvisionerTask(c, false, e) 923 defer stop(c, task) 924 925 // Provision some machines, some will be started first time, 926 // another will require retries. 927 m1, err := s.addMachine() 928 c.Assert(err, gc.IsNil) 929 s.checkStartInstance(c, m1) 930 m2, err := s.addMachine() 931 c.Assert(err, gc.IsNil) 932 s.checkStartInstance(c, m2) 933 m3, err := s.addMachine() 934 c.Assert(err, gc.IsNil) 935 m4, err := s.addMachine() 936 c.Assert(err, gc.IsNil) 937 938 // mockBroker will fail to start machine-3 several times; 939 // keep setting the transient flag to retry until the 940 // instance has started. 941 thatsAllFolks := make(chan struct{}) 942 go func() { 943 for { 944 select { 945 case <-thatsAllFolks: 946 return 947 case <-time.After(coretesting.ShortWait): 948 err := m3.SetStatus(params.StatusError, "info", params.StatusData{"transient": true}) 949 c.Assert(err, gc.IsNil) 950 } 951 } 952 }() 953 s.checkStartInstance(c, m3) 954 close(thatsAllFolks) 955 956 // Machine 4 is never provisioned. 957 status, _, _, err := m4.Status() 958 c.Assert(err, gc.IsNil) 959 c.Assert(status, gc.Equals, params.StatusError) 960 _, err = m4.InstanceId() 961 c.Assert(err, jc.Satisfies, state.IsNotProvisionedError) 962 } 963 964 type mockBroker struct { 965 environs.Environ 966 retryCount map[string]int 967 } 968 969 func (b *mockBroker) StartInstance(args environs.StartInstanceParams) (instance.Instance, *instance.HardwareCharacteristics, []network.Info, error) { 970 // All machines except machines 3, 4 are provisioned successfully the first time. 971 // Machines 3 is provisioned after some attempts have been made. 972 // Machine 4 is never provisioned. 973 id := args.MachineConfig.MachineId 974 retries := b.retryCount[id] 975 if (id != "3" && id != "4") || retries > 2 { 976 return b.Environ.StartInstance(args) 977 } else { 978 b.retryCount[id] = retries + 1 979 } 980 return nil, nil, nil, fmt.Errorf("error: some error") 981 } 982 983 func (b *mockBroker) GetToolsSources() ([]simplestreams.DataSource, error) { 984 return b.Environ.(tools.SupportsCustomSources).GetToolsSources() 985 }