github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/worker/provisioner/provisioner_test.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package provisioner_test 5 6 import ( 7 stdcontext "context" 8 "fmt" 9 "strings" 10 "sync" 11 "time" 12 13 "github.com/juju/collections/set" 14 "github.com/juju/errors" 15 "github.com/juju/loggo" 16 "github.com/juju/names/v5" 17 jc "github.com/juju/testing/checkers" 18 "github.com/juju/utils/v3" 19 "github.com/juju/version/v2" 20 "github.com/juju/worker/v3" 21 "github.com/juju/worker/v3/workertest" 22 gc "gopkg.in/check.v1" 23 24 "github.com/juju/juju/agent" 25 "github.com/juju/juju/api" 26 apiprovisioner "github.com/juju/juju/api/agent/provisioner" 27 apiserverprovisioner "github.com/juju/juju/apiserver/facades/agent/provisioner" 28 "github.com/juju/juju/controller/authentication" 29 "github.com/juju/juju/core/arch" 30 "github.com/juju/juju/core/constraints" 31 "github.com/juju/juju/core/instance" 32 "github.com/juju/juju/core/life" 33 "github.com/juju/juju/core/model" 34 corenetwork "github.com/juju/juju/core/network" 35 coreos "github.com/juju/juju/core/os" 36 "github.com/juju/juju/core/os/ostype" 37 "github.com/juju/juju/core/status" 38 "github.com/juju/juju/environs" 39 "github.com/juju/juju/environs/config" 40 "github.com/juju/juju/environs/context" 41 "github.com/juju/juju/environs/filestorage" 42 "github.com/juju/juju/environs/imagemetadata" 43 imagetesting "github.com/juju/juju/environs/imagemetadata/testing" 44 "github.com/juju/juju/environs/instances" 45 "github.com/juju/juju/environs/simplestreams" 46 sstesting "github.com/juju/juju/environs/simplestreams/testing" 47 envtesting "github.com/juju/juju/environs/testing" 48 "github.com/juju/juju/environs/tools" 49 "github.com/juju/juju/juju/testing" 50 providercommon "github.com/juju/juju/provider/common" 51 "github.com/juju/juju/provider/dummy" 52 "github.com/juju/juju/rpc/params" 53 "github.com/juju/juju/state" 54 "github.com/juju/juju/state/cloudimagemetadata" 55 "github.com/juju/juju/storage" 56 "github.com/juju/juju/storage/poolmanager" 57 coretesting "github.com/juju/juju/testing" 58 coretools "github.com/juju/juju/tools" 59 "github.com/juju/juju/worker/provisioner" 60 ) 61 62 type CommonProvisionerSuite struct { 63 testing.JujuConnSuite 64 op <-chan dummy.Operation 65 cfg *config.Config 66 // defaultConstraints are used when adding a machine and then later in test assertions. 67 defaultConstraints constraints.Value 68 69 st api.Connection 70 provisioner *apiprovisioner.State 71 callCtx context.ProviderCallContext 72 } 73 74 func (s *CommonProvisionerSuite) assertProvisionerObservesConfigChanges(c *gc.C, p provisioner.Provisioner) { 75 // Inject our observer into the provisioner 76 cfgObserver := make(chan *config.Config) 77 provisioner.SetObserver(p, cfgObserver) 78 79 // Switch to reaping on All machines. 80 attrs := map[string]interface{}{ 81 config.ProvisionerHarvestModeKey: config.HarvestAll.String(), 82 } 83 err := s.Model.UpdateModelConfig(attrs, nil) 84 c.Assert(err, jc.ErrorIsNil) 85 86 // Wait for the PA to load the new configuration. We wait for the change we expect 87 // like this because sometimes we pick up the initial harvest config (destroyed) 88 // rather than the one we change to (all). 89 var received []string 90 timeout := time.After(coretesting.LongWait) 91 for { 92 select { 93 case newCfg := <-cfgObserver: 94 if newCfg.ProvisionerHarvestMode().String() == config.HarvestAll.String() { 95 return 96 } 97 received = append(received, newCfg.ProvisionerHarvestMode().String()) 98 case <-time.After(coretesting.ShortWait): 99 case <-timeout: 100 if len(received) == 0 { 101 c.Fatalf("PA did not action config change") 102 } else { 103 c.Fatalf("timed out waiting for config to change to '%s', received %+v", 104 config.HarvestAll.String(), received) 105 } 106 } 107 } 108 } 109 110 func (s *CommonProvisionerSuite) assertProvisionerObservesConfigChangesWorkerCount(c *gc.C, p provisioner.Provisioner, container bool) { 111 // Inject our observer into the provisioner 112 cfgObserver := make(chan *config.Config) 113 provisioner.SetObserver(p, cfgObserver) 114 115 // Switch to reaping on All machines. 116 attrs := map[string]interface{}{} 117 if container { 118 attrs[config.NumContainerProvisionWorkersKey] = 10 119 } else { 120 attrs[config.NumProvisionWorkersKey] = 42 121 } 122 err := s.Model.UpdateModelConfig(attrs, nil) 123 c.Assert(err, jc.ErrorIsNil) 124 125 // Wait for the PA to load the new configuration. We wait for the change we expect 126 // like this because sometimes we pick up the initial harvest config (destroyed) 127 // rather than the one we change to (all). 128 var received []int 129 timeout := time.After(coretesting.LongWait) 130 for { 131 select { 132 case newCfg := <-cfgObserver: 133 if container { 134 if newCfg.NumContainerProvisionWorkers() == 10 { 135 return 136 } 137 received = append(received, newCfg.NumContainerProvisionWorkers()) 138 } else { 139 if newCfg.NumProvisionWorkers() == 42 { 140 return 141 } 142 received = append(received, newCfg.NumProvisionWorkers()) 143 } 144 case <-timeout: 145 if len(received) == 0 { 146 c.Fatalf("PA did not action config change") 147 } else { 148 c.Fatalf("timed out waiting for config to change to '%s', received %+v", 149 config.HarvestAll.String(), received) 150 } 151 } 152 } 153 } 154 155 type ProvisionerSuite struct { 156 CommonProvisionerSuite 157 } 158 159 var _ = gc.Suite(&ProvisionerSuite{}) 160 161 func (s *CommonProvisionerSuite) SetUpSuite(c *gc.C) { 162 s.JujuConnSuite.SetUpSuite(c) 163 s.defaultConstraints = constraints.MustParse("arch=amd64 mem=4G cores=1 root-disk=8G") 164 } 165 166 func (s *CommonProvisionerSuite) SetUpTest(c *gc.C) { 167 s.JujuConnSuite.SetUpTest(c) 168 169 // We do not want to pull published image metadata for tests... 170 imagetesting.PatchOfficialDataSources(&s.CleanupSuite, "") 171 // We want an image to start test instances 172 err := s.State.CloudImageMetadataStorage.SaveMetadata([]cloudimagemetadata.Metadata{{ 173 MetadataAttributes: cloudimagemetadata.MetadataAttributes{ 174 Region: "region", 175 Version: "22.04", 176 Arch: "amd64", 177 VirtType: "", 178 RootStorageType: "", 179 Source: "test", 180 Stream: "released", 181 }, 182 Priority: 10, 183 ImageId: "-999", 184 }}) 185 c.Assert(err, jc.ErrorIsNil) 186 187 // Create the operations channel with more than enough space 188 // for those tests that don't listen on it. 189 op := make(chan dummy.Operation, 500) 190 dummy.Listen(op) 191 s.op = op 192 193 cfg, err := s.Model.ModelConfig() 194 c.Assert(err, jc.ErrorIsNil) 195 s.cfg = cfg 196 197 s.callCtx = context.NewEmptyCloudCallContext() 198 199 // Create a machine for the dummy bootstrap instance, 200 // so the provisioner doesn't destroy it. 201 insts, err := s.Environ.Instances(s.callCtx, []instance.Id{dummy.BootstrapInstanceId}) 202 c.Assert(err, jc.ErrorIsNil) 203 addrs, err := insts[0].Addresses(s.callCtx) 204 c.Assert(err, jc.ErrorIsNil) 205 206 pAddrs := make(corenetwork.SpaceAddresses, len(addrs)) 207 for i, addr := range addrs { 208 pAddrs[i] = corenetwork.SpaceAddress{MachineAddress: addr.MachineAddress} 209 } 210 211 machine, err := s.State.AddOneMachine(state.MachineTemplate{ 212 Addresses: pAddrs, 213 Base: state.UbuntuBase("12.10"), 214 Nonce: agent.BootstrapNonce, 215 InstanceId: dummy.BootstrapInstanceId, 216 Jobs: []state.MachineJob{state.JobManageModel}, 217 }) 218 c.Assert(err, jc.ErrorIsNil) 219 c.Assert(machine.Id(), gc.Equals, "0") 220 221 current := coretesting.CurrentVersion() 222 err = machine.SetAgentVersion(current) 223 c.Assert(err, jc.ErrorIsNil) 224 225 password, err := utils.RandomPassword() 226 c.Assert(err, jc.ErrorIsNil) 227 err = machine.SetPassword(password) 228 c.Assert(err, jc.ErrorIsNil) 229 230 s.st = s.OpenAPIAsMachine(c, machine.Tag(), password, agent.BootstrapNonce) 231 c.Assert(s.st, gc.NotNil) 232 c.Logf("API: login as %q successful", machine.Tag()) 233 s.provisioner = apiprovisioner.NewState(s.st) 234 c.Assert(s.provisioner, gc.NotNil) 235 236 } 237 238 func (s *CommonProvisionerSuite) startUnknownInstance(c *gc.C, id string) instances.Instance { 239 instance, _ := testing.AssertStartInstance(c, s.Environ, s.callCtx, s.ControllerConfig.ControllerUUID(), id) 240 select { 241 case o := <-s.op: 242 switch o := o.(type) { 243 case dummy.OpStartInstance: 244 default: 245 c.Fatalf("unexpected operation %#v", o) 246 } 247 case <-time.After(coretesting.LongWait): 248 c.Fatalf("timed out waiting for startinstance operation") 249 } 250 return instance 251 } 252 253 func (s *CommonProvisionerSuite) checkStartInstance(c *gc.C, m *state.Machine) instances.Instance { 254 retVal := s.checkStartInstancesCustom(c, []*state.Machine{m}, "pork", s.defaultConstraints, 255 nil, nil, nil, nil, nil, nil, true) 256 return retVal[m.Id()] 257 } 258 259 func (s *CommonProvisionerSuite) checkStartInstanceCustom( 260 c *gc.C, m *state.Machine, 261 secret string, cons constraints.Value, 262 networkInfo corenetwork.InterfaceInfos, 263 subnetsToZones map[corenetwork.Id][]string, 264 rootDisk *storage.VolumeParams, 265 volumes []storage.Volume, 266 volumeAttachments []storage.VolumeAttachment, 267 checkPossibleTools coretools.List, 268 waitInstanceId bool, 269 ) instances.Instance { 270 retVal := s.checkStartInstancesCustom(c, []*state.Machine{m}, 271 secret, cons, networkInfo, subnetsToZones, rootDisk, volumes, 272 volumeAttachments, checkPossibleTools, waitInstanceId) 273 return retVal[m.Id()] 274 } 275 276 func (s *CommonProvisionerSuite) checkStartInstances(c *gc.C, machines []*state.Machine) map[string]instances.Instance { 277 return s.checkStartInstancesCustom(c, machines, "pork", s.defaultConstraints, nil, nil, 278 nil, nil, nil, nil, true) 279 } 280 281 // checkStartInstanceCustom takes a slice of Machines. A 282 // map of machine Ids to instances is returned 283 func (s *CommonProvisionerSuite) checkStartInstancesCustom( 284 c *gc.C, machines []*state.Machine, 285 secret string, cons constraints.Value, 286 networkInfo corenetwork.InterfaceInfos, 287 subnetsToZones map[corenetwork.Id][]string, 288 rootDisk *storage.VolumeParams, 289 volumes []storage.Volume, 290 volumeAttachments []storage.VolumeAttachment, 291 checkPossibleTools coretools.List, 292 waitInstanceId bool, 293 ) ( 294 returnInstances map[string]instances.Instance, 295 ) { 296 returnInstances = make(map[string]instances.Instance, len(machines)) 297 found := 0 298 for { 299 select { 300 case o := <-s.op: 301 switch o := o.(type) { 302 case dummy.OpStartInstance: 303 inst := o.Instance 304 305 var m *state.Machine 306 for _, machine := range machines { 307 if machine.Id() == o.MachineId { 308 m = machine 309 found += 1 310 break 311 } 312 } 313 c.Assert(m, gc.NotNil) 314 if waitInstanceId { 315 s.waitInstanceId(c, m, inst.Id()) 316 } 317 318 // Check the instance was started with the expected params. 319 c.Assert(o.MachineId, gc.Equals, m.Id()) 320 nonceParts := strings.SplitN(o.MachineNonce, ":", 2) 321 c.Assert(nonceParts, gc.HasLen, 2) 322 c.Assert(nonceParts[0], gc.Equals, names.NewMachineTag("0").String()) 323 c.Assert(nonceParts[1], jc.Satisfies, utils.IsValidUUIDString) 324 c.Assert(o.Secret, gc.Equals, secret) 325 c.Assert(o.SubnetsToZones, jc.DeepEquals, subnetsToZones) 326 c.Assert(o.NetworkInfo, jc.DeepEquals, networkInfo) 327 c.Assert(o.RootDisk, jc.DeepEquals, rootDisk) 328 c.Assert(o.Volumes, jc.DeepEquals, volumes) 329 c.Assert(o.VolumeAttachments, jc.DeepEquals, volumeAttachments) 330 331 var jobs []model.MachineJob 332 for _, job := range m.Jobs() { 333 jobs = append(jobs, job.ToParams()) 334 } 335 c.Assert(o.Jobs, jc.SameContents, jobs) 336 337 if checkPossibleTools != nil { 338 for _, t := range o.PossibleTools { 339 url := fmt.Sprintf("https://%s/model/%s/tools/%s", 340 s.st.Addr(), coretesting.ModelTag.Id(), t.Version) 341 c.Check(t.URL, gc.Equals, url) 342 t.URL = "" 343 } 344 for _, t := range checkPossibleTools { 345 t.URL = "" 346 } 347 c.Assert(o.PossibleTools, gc.DeepEquals, checkPossibleTools) 348 } 349 350 // All provisioned machines in this test suite have 351 // their hardware characteristics attributes set to 352 // the same values as the constraints due to the dummy 353 // environment being used. 354 if !constraints.IsEmpty(&cons) { 355 c.Assert(o.Constraints, gc.DeepEquals, cons) 356 hc, err := m.HardwareCharacteristics() 357 c.Assert(err, jc.ErrorIsNil) 358 // At this point we don't care what the AvailabilityZone is, 359 // it can be a few different valid things. 360 zone := hc.AvailabilityZone 361 hc.AvailabilityZone = nil 362 c.Assert(*hc, gc.DeepEquals, instance.HardwareCharacteristics{ 363 Arch: cons.Arch, 364 Mem: cons.Mem, 365 RootDisk: cons.RootDisk, 366 CpuCores: cons.CpuCores, 367 CpuPower: cons.CpuPower, 368 Tags: cons.Tags, 369 }) 370 hc.AvailabilityZone = zone 371 } 372 returnInstances[m.Id()] = inst 373 if found == len(machines) { 374 return 375 } 376 break 377 default: 378 c.Logf("ignoring unexpected operation %#v", o) 379 } 380 case <-time.After(2 * time.Second): 381 c.Fatalf("provisioner did not start an instance") 382 return 383 } 384 } 385 } 386 387 // checkNoOperations checks that the environ was not operated upon. 388 func (s *CommonProvisionerSuite) checkNoOperations(c *gc.C) { 389 select { 390 case o := <-s.op: 391 c.Fatalf("unexpected operation %+v", o) 392 case <-time.After(coretesting.ShortWait): 393 return 394 } 395 } 396 397 // checkStopInstances checks that an instance has been stopped. 398 func (s *CommonProvisionerSuite) checkStopInstances(c *gc.C, instances ...instances.Instance) { 399 s.checkStopSomeInstances(c, instances, nil) 400 } 401 402 // checkStopSomeInstances checks that instancesToStop are stopped while instancesToKeep are not. 403 func (s *CommonProvisionerSuite) checkStopSomeInstances(c *gc.C, 404 instancesToStop []instances.Instance, instancesToKeep []instances.Instance) { 405 406 instanceIdsToStop := set.NewStrings() 407 for _, instance := range instancesToStop { 408 instanceIdsToStop.Add(string(instance.Id())) 409 } 410 instanceIdsToKeep := set.NewStrings() 411 for _, instance := range instancesToKeep { 412 instanceIdsToKeep.Add(string(instance.Id())) 413 } 414 // Continue checking for stop instance calls until all the instances we 415 // are waiting on to finish, actually finish, or we time out. 416 for !instanceIdsToStop.IsEmpty() { 417 select { 418 case o := <-s.op: 419 switch o := o.(type) { 420 case dummy.OpStopInstances: 421 for _, id := range o.Ids { 422 instId := string(id) 423 instanceIdsToStop.Remove(instId) 424 if instanceIdsToKeep.Contains(instId) { 425 c.Errorf("provisioner unexpectedly stopped instance %s", instId) 426 } 427 } 428 default: 429 c.Fatalf("unexpected operation %#v", o) 430 return 431 } 432 case <-time.After(2 * time.Second): 433 c.Fatalf("provisioner did not stop an instance") 434 return 435 } 436 } 437 } 438 439 func (s *CommonProvisionerSuite) waitForWatcher(c *gc.C, w state.NotifyWatcher, name string, check func() bool) { 440 // TODO(jam): We need to grow a new method on NotifyWatcherC 441 // that calls StartSync while waiting for changes, then 442 // waitMachine and waitHardwareCharacteristics can use that 443 // instead 444 defer workertest.CleanKill(c, w) 445 timeout := time.After(coretesting.LongWait) 446 resync := time.After(0) 447 for { 448 select { 449 case <-w.Changes(): 450 if check() { 451 return 452 } 453 case <-resync: 454 resync = time.After(coretesting.ShortWait) 455 456 case <-timeout: 457 c.Fatalf("%v wait timed out", name) 458 } 459 } 460 } 461 462 func (s *CommonProvisionerSuite) waitHardwareCharacteristics(c *gc.C, m *state.Machine, check func() bool) { 463 w := m.WatchInstanceData() 464 name := fmt.Sprintf("hardware characteristics for machine %v", m) 465 s.waitForWatcher(c, w, name, check) 466 } 467 468 // waitForRemovalMark waits for the supplied machine to be marked for removal. 469 func (s *CommonProvisionerSuite) waitForRemovalMark(c *gc.C, m *state.Machine) { 470 w := s.BackingState.WatchMachineRemovals() 471 name := fmt.Sprintf("machine %v marked for removal", m) 472 s.waitForWatcher(c, w, name, func() bool { 473 removals, err := s.BackingState.AllMachineRemovals() 474 c.Assert(err, jc.ErrorIsNil) 475 for _, removal := range removals { 476 if removal == m.Id() { 477 return true 478 } 479 } 480 return false 481 }) 482 } 483 484 // waitInstanceId waits until the supplied machine has an instance id, then 485 // asserts it is as expected. 486 func (s *CommonProvisionerSuite) waitInstanceId(c *gc.C, m *state.Machine, expect instance.Id) { 487 s.waitHardwareCharacteristics(c, m, func() bool { 488 if actual, err := m.InstanceId(); err == nil { 489 c.Assert(actual, gc.Equals, expect) 490 return true 491 } else if !errors.IsNotProvisioned(err) { 492 // We don't expect any errors. 493 panic(err) 494 } else { 495 c.Logf("got not provisioned error while waiting: %v", err) 496 } 497 return false 498 }) 499 } 500 501 func (s *CommonProvisionerSuite) newEnvironProvisioner(c *gc.C) provisioner.Provisioner { 502 machineTag := names.NewMachineTag("0") 503 agentConfig := s.AgentConfigForTag(c, machineTag) 504 apiState := apiprovisioner.NewState(s.st) 505 w, err := provisioner.NewEnvironProvisioner(apiState, agentConfig, loggo.GetLogger("test"), s.Environ, &credentialAPIForTest{}) 506 c.Assert(err, jc.ErrorIsNil) 507 return w 508 } 509 510 func (s *CommonProvisionerSuite) addMachine() (*state.Machine, error) { 511 return s.addMachineWithConstraints(s.defaultConstraints) 512 } 513 514 func (s *CommonProvisionerSuite) addMachineWithConstraints(cons constraints.Value) (*state.Machine, error) { 515 return s.BackingState.AddOneMachine(state.MachineTemplate{ 516 Base: state.DefaultLTSBase(), 517 Jobs: []state.MachineJob{state.JobHostUnits}, 518 Constraints: cons, 519 }) 520 } 521 522 func (s *CommonProvisionerSuite) addMachines(number int) ([]*state.Machine, error) { 523 templates := make([]state.MachineTemplate, number) 524 for i := range templates { 525 templates[i] = state.MachineTemplate{ 526 Base: state.DefaultLTSBase(), 527 Jobs: []state.MachineJob{state.JobHostUnits}, 528 Constraints: s.defaultConstraints, 529 } 530 } 531 return s.BackingState.AddMachines(templates...) 532 } 533 534 func (s *CommonProvisionerSuite) enableHA(c *gc.C, n int) []*state.Machine { 535 changes, err := s.BackingState.EnableHA(n, s.defaultConstraints, state.DefaultLTSBase(), nil) 536 c.Assert(err, jc.ErrorIsNil) 537 added := make([]*state.Machine, len(changes.Added)) 538 for i, mid := range changes.Added { 539 m, err := s.BackingState.Machine(mid) 540 c.Assert(err, jc.ErrorIsNil) 541 added[i] = m 542 } 543 return added 544 } 545 546 func (s *ProvisionerSuite) TestProvisionerStartStop(c *gc.C) { 547 p := s.newEnvironProvisioner(c) 548 workertest.CleanKill(c, p) 549 } 550 551 func (s *ProvisionerSuite) TestSimple(c *gc.C) { 552 p := s.newEnvironProvisioner(c) 553 defer workertest.CleanKill(c, p) 554 555 // Check that an instance is provisioned when the machine is created... 556 m, err := s.addMachine() 557 c.Assert(err, jc.ErrorIsNil) 558 instance := s.checkStartInstance(c, m) 559 560 // ...and removed, along with the machine, when the machine is Dead. 561 c.Assert(m.EnsureDead(), gc.IsNil) 562 s.checkStopInstances(c, instance) 563 s.waitForRemovalMark(c, m) 564 } 565 566 func (s *ProvisionerSuite) TestConstraints(c *gc.C) { 567 // Create a machine with non-standard constraints. 568 m, err := s.addMachine() 569 c.Assert(err, jc.ErrorIsNil) 570 cons := constraints.MustParse("mem=8G arch=amd64 cores=2 root-disk=10G") 571 err = m.SetConstraints(cons) 572 c.Assert(err, jc.ErrorIsNil) 573 574 // Start a provisioner and check those constraints are used. 575 p := s.newEnvironProvisioner(c) 576 defer workertest.CleanKill(c, p) 577 578 s.checkStartInstanceCustom(c, m, "pork", cons, nil, nil, nil, nil, nil, nil, true) 579 } 580 581 func (s *ProvisionerSuite) TestPossibleTools(c *gc.C) { 582 583 storageDir := c.MkDir() 584 s.PatchValue(&tools.DefaultBaseURL, storageDir) 585 stor, err := filestorage.NewFileStorageWriter(storageDir) 586 c.Assert(err, jc.ErrorIsNil) 587 currentVersion := version.MustParseBinary("1.2.3-ubuntu-amd64") 588 589 // The current version is determined by the current model's agent 590 // version when locating tools to provision an added unit 591 attrs := map[string]interface{}{ 592 config.AgentVersionKey: currentVersion.Number.String(), 593 } 594 err = s.Model.UpdateModelConfig(attrs, nil) 595 c.Assert(err, jc.ErrorIsNil) 596 597 s.PatchValue(&arch.HostArch, func() string { return currentVersion.Arch }) 598 s.PatchValue(&coreos.HostOS, func() ostype.OSType { return ostype.Ubuntu }) 599 600 // Upload some plausible matches, and some that should be filtered out. 601 compatibleVersion := version.MustParseBinary("1.2.3-quantal-arm64") 602 ignoreVersion1 := version.MustParseBinary("1.2.4-ubuntu-arm64") 603 ignoreVersion2 := version.MustParseBinary("1.2.3-windows-arm64") 604 availableVersions := []version.Binary{ 605 currentVersion, compatibleVersion, ignoreVersion1, ignoreVersion2, 606 } 607 envtesting.AssertUploadFakeToolsVersions(c, stor, s.cfg.AgentStream(), s.cfg.AgentStream(), availableVersions...) 608 609 // Extract the tools that we expect to actually match. 610 ss := simplestreams.NewSimpleStreams(sstesting.TestDataSourceFactory()) 611 expectedList, err := tools.FindTools(ss, s.Environ, -1, -1, []string{s.cfg.AgentStream()}, coretools.Filter{ 612 Number: currentVersion.Number, 613 OSType: "ubuntu", 614 }) 615 c.Assert(err, jc.ErrorIsNil) 616 617 // Create the machine and check the tools that get passed into StartInstance. 618 machine, err := s.BackingState.AddOneMachine(state.MachineTemplate{ 619 Base: state.UbuntuBase("12.10"), 620 Jobs: []state.MachineJob{state.JobHostUnits}, 621 }) 622 c.Assert(err, jc.ErrorIsNil) 623 624 provisioner := s.newEnvironProvisioner(c) 625 defer workertest.CleanKill(c, provisioner) 626 s.checkStartInstanceCustom( 627 c, machine, "pork", constraints.Value{}, 628 nil, nil, nil, nil, nil, expectedList, true, 629 ) 630 } 631 632 var validCloudInitUserData = ` 633 packages: 634 - 'python-keystoneclient' 635 - 'python-glanceclient' 636 preruncmd: 637 - mkdir /tmp/preruncmd 638 - mkdir /tmp/preruncmd2 639 postruncmd: 640 - mkdir /tmp/postruncmd 641 - mkdir /tmp/postruncmd2 642 package_upgrade: false 643 `[1:] 644 645 func (s *ProvisionerSuite) TestSetUpToStartMachine(c *gc.C) { 646 attrs := map[string]interface{}{ 647 config.CloudInitUserDataKey: validCloudInitUserData, 648 } 649 650 err := s.Model.UpdateModelConfig(attrs, nil) 651 c.Assert(err, jc.ErrorIsNil) 652 653 task := s.newProvisionerTask( 654 c, 655 config.HarvestAll, 656 s.Environ, 657 s.provisioner, 658 &mockDistributionGroupFinder{}, 659 mockToolsFinder{}, 660 ) 661 defer workertest.CleanKill(c, task) 662 663 machine, err := s.addMachine() 664 c.Assert(err, jc.ErrorIsNil) 665 666 mRes, err := s.provisioner.Machines(machine.MachineTag()) 667 c.Assert(err, gc.IsNil) 668 c.Assert(mRes, gc.HasLen, 1) 669 c.Assert(mRes[0].Err, gc.IsNil) 670 apiMachine := mRes[0].Machine 671 672 pRes, err := s.provisioner.ProvisioningInfo([]names.MachineTag{machine.MachineTag()}) 673 c.Assert(err, gc.IsNil) 674 c.Assert(pRes.Results, gc.HasLen, 1) 675 676 v, err := apiMachine.ModelAgentVersion() 677 c.Assert(err, jc.ErrorIsNil) 678 679 startInstanceParams, err := provisioner.SetupToStartMachine(task, apiMachine, v, pRes.Results[0]) 680 c.Assert(err, jc.ErrorIsNil) 681 cloudInitUserData := startInstanceParams.InstanceConfig.CloudInitUserData 682 c.Assert(cloudInitUserData, gc.DeepEquals, map[string]interface{}{ 683 "packages": []interface{}{"python-keystoneclient", "python-glanceclient"}, 684 "preruncmd": []interface{}{"mkdir /tmp/preruncmd", "mkdir /tmp/preruncmd2"}, 685 "postruncmd": []interface{}{"mkdir /tmp/postruncmd", "mkdir /tmp/postruncmd2"}, 686 "package_upgrade": false}, 687 ) 688 } 689 690 func (s *ProvisionerSuite) TestProvisionerSetsErrorStatusWhenNoToolsAreAvailable(c *gc.C) { 691 p := s.newEnvironProvisioner(c) 692 defer workertest.CleanKill(c, p) 693 694 // Check that an instance is not provisioned when the machine is created... 695 m, err := s.BackingState.AddOneMachine(state.MachineTemplate{ 696 // We need a valid series that has no tools uploaded 697 Base: state.Base{OS: "centos", Channel: "7"}, 698 Jobs: []state.MachineJob{state.JobHostUnits}, 699 Constraints: s.defaultConstraints, 700 }) 701 c.Assert(err, jc.ErrorIsNil) 702 s.checkNoOperations(c) 703 704 // Ensure machine error status was set, and the error matches 705 agentStatus, instanceStatus := s.waitUntilMachineNotPending(c, m) 706 c.Check(agentStatus.Status, gc.Equals, status.Error) 707 c.Check(agentStatus.Message, gc.Equals, "no matching agent binaries available") 708 c.Check(instanceStatus.Status, gc.Equals, status.ProvisioningError) 709 c.Check(instanceStatus.Message, gc.Equals, "no matching agent binaries available") 710 711 // Restart the PA to make sure the machine is skipped again. 712 workertest.CleanKill(c, p) 713 p = s.newEnvironProvisioner(c) 714 defer workertest.CleanKill(c, p) 715 s.checkNoOperations(c) 716 } 717 718 func (s *ProvisionerSuite) waitUntilMachineNotPending(c *gc.C, m *state.Machine) (status.StatusInfo, status.StatusInfo) { 719 t0 := time.Now() 720 for time.Since(t0) < 10*coretesting.LongWait { 721 agentStatusInfo, err := m.Status() 722 c.Assert(err, jc.ErrorIsNil) 723 if agentStatusInfo.Status == status.Pending { 724 time.Sleep(coretesting.ShortWait) 725 continue 726 } 727 instanceStatusInfo, err := m.InstanceStatus() 728 c.Assert(err, jc.ErrorIsNil) 729 // officially InstanceStatus is only supposed to be Provisioning, but 730 // all current Providers have their unknown state as Pending. 731 if instanceStatusInfo.Status == status.Provisioning || 732 instanceStatusInfo.Status == status.Pending { 733 time.Sleep(coretesting.ShortWait) 734 continue 735 } 736 return agentStatusInfo, instanceStatusInfo 737 } 738 c.Fatalf("machine %q stayed in pending", m.Id()) 739 // Satisfy Go, Fatal should be a panic anyway 740 return status.StatusInfo{}, status.StatusInfo{} 741 } 742 743 func (s *ProvisionerSuite) TestProvisionerFailedStartInstanceWithInjectedCreationError(c *gc.C) { 744 // Set the retry delay to 0, and retry count to 2 to keep tests short 745 s.PatchValue(provisioner.RetryStrategyDelay, 0*time.Second) 746 s.PatchValue(provisioner.RetryStrategyCount, 2) 747 748 // create the error injection channel 749 errorInjectionChannel := make(chan error, 3) 750 751 p := s.newEnvironProvisioner(c) 752 defer workertest.CleanKill(c, p) 753 754 // patch the dummy provider error injection channel 755 cleanup := dummy.PatchTransientErrorInjectionChannel(errorInjectionChannel) 756 defer cleanup() 757 758 retryableError := environs.ZoneIndependentError( 759 errors.New("container failed to start and was destroyed"), 760 ) 761 destroyError := environs.ZoneIndependentError( 762 errors.New("container failed to start and failed to destroy: manual cleanup of containers needed"), 763 ) 764 // send the error message three times, because the provisioner will retry twice as patched above. 765 errorInjectionChannel <- retryableError 766 errorInjectionChannel <- retryableError 767 errorInjectionChannel <- destroyError 768 769 m, err := s.addMachine() 770 c.Assert(err, jc.ErrorIsNil) 771 s.checkNoOperations(c) 772 773 agentStatus, instanceStatus := s.waitUntilMachineNotPending(c, m) 774 // check that the status matches the error message 775 c.Check(agentStatus.Status, gc.Equals, status.Error) 776 c.Check(agentStatus.Message, gc.Equals, destroyError.Error()) 777 c.Check(instanceStatus.Status, gc.Equals, status.ProvisioningError) 778 c.Check(instanceStatus.Message, gc.Equals, destroyError.Error()) 779 } 780 781 func (s *ProvisionerSuite) TestProvisionerSucceedStartInstanceWithInjectedRetryableCreationError(c *gc.C) { 782 // Set the retry delay to 0, and retry count to 2 to keep tests short 783 s.PatchValue(provisioner.RetryStrategyDelay, 0*time.Second) 784 s.PatchValue(provisioner.RetryStrategyCount, 2) 785 786 // create the error injection channel 787 errorInjectionChannel := make(chan error, 1) 788 c.Assert(errorInjectionChannel, gc.NotNil) 789 790 p := s.newEnvironProvisioner(c) 791 defer workertest.CleanKill(c, p) 792 793 // patch the dummy provider error injection channel 794 cleanup := dummy.PatchTransientErrorInjectionChannel(errorInjectionChannel) 795 defer cleanup() 796 797 // send the error message once 798 // - instance creation should succeed 799 retryableError := errors.New("container failed to start and was destroyed") 800 errorInjectionChannel <- retryableError 801 802 m, err := s.addMachine() 803 c.Assert(err, jc.ErrorIsNil) 804 s.checkStartInstance(c, m) 805 } 806 807 func (s *ProvisionerSuite) TestProvisionerStopRetryingIfDying(c *gc.C) { 808 // Create the error injection channel and inject 809 // a retryable error 810 errorInjectionChannel := make(chan error, 1) 811 812 p := s.newEnvironProvisioner(c) 813 // Don't refer the stop. We will manually stop and verify the result. 814 815 // patch the dummy provider error injection channel 816 cleanup := dummy.PatchTransientErrorInjectionChannel(errorInjectionChannel) 817 defer cleanup() 818 819 retryableError := errors.New("container failed to start and was destroyed") 820 errorInjectionChannel <- retryableError 821 822 m, err := s.addMachine() 823 c.Assert(err, jc.ErrorIsNil) 824 825 time.Sleep(coretesting.ShortWait) 826 827 workertest.CleanKill(c, p) 828 statusInfo, err := m.Status() 829 c.Assert(err, jc.ErrorIsNil) 830 c.Check(statusInfo.Status, gc.Equals, status.Pending) 831 statusInfo, err = m.InstanceStatus() 832 c.Assert(err, jc.ErrorIsNil) 833 if statusInfo.Status != status.Pending && statusInfo.Status != status.Provisioning { 834 c.Errorf("statusInfo.Status was %q not one of %q or %q", 835 statusInfo.Status, status.Pending, status.Provisioning) 836 } 837 s.checkNoOperations(c) 838 } 839 840 func (s *ProvisionerSuite) TestProvisioningDoesNotOccurForLXD(c *gc.C) { 841 p := s.newEnvironProvisioner(c) 842 defer workertest.CleanKill(c, p) 843 844 // create a machine to host the container. 845 m, err := s.addMachine() 846 c.Assert(err, jc.ErrorIsNil) 847 inst := s.checkStartInstance(c, m) 848 849 // make a container on the machine we just created 850 template := state.MachineTemplate{ 851 Base: state.DefaultLTSBase(), 852 Jobs: []state.MachineJob{state.JobHostUnits}, 853 } 854 container, err := s.State.AddMachineInsideMachine(template, m.Id(), instance.LXD) 855 c.Assert(err, jc.ErrorIsNil) 856 857 // the PA should not attempt to create it 858 s.checkNoOperations(c) 859 860 // cleanup 861 c.Assert(container.EnsureDead(), gc.IsNil) 862 c.Assert(container.Remove(), gc.IsNil) 863 c.Assert(m.EnsureDead(), gc.IsNil) 864 s.checkStopInstances(c, inst) 865 s.waitForRemovalMark(c, m) 866 } 867 868 func (s *ProvisionerSuite) TestProvisioningDoesNotOccurForKVM(c *gc.C) { 869 p := s.newEnvironProvisioner(c) 870 defer workertest.CleanKill(c, p) 871 872 // create a machine to host the container. 873 m, err := s.addMachine() 874 c.Assert(err, jc.ErrorIsNil) 875 inst := s.checkStartInstance(c, m) 876 877 // make a container on the machine we just created 878 template := state.MachineTemplate{ 879 Base: state.DefaultLTSBase(), 880 Jobs: []state.MachineJob{state.JobHostUnits}, 881 } 882 container, err := s.State.AddMachineInsideMachine(template, m.Id(), instance.KVM) 883 c.Assert(err, jc.ErrorIsNil) 884 885 // the PA should not attempt to create it 886 s.checkNoOperations(c) 887 888 // cleanup 889 c.Assert(container.EnsureDead(), gc.IsNil) 890 c.Assert(container.Remove(), gc.IsNil) 891 c.Assert(m.EnsureDead(), gc.IsNil) 892 s.checkStopInstances(c, inst) 893 s.waitForRemovalMark(c, m) 894 } 895 896 type MachineClassifySuite struct { 897 } 898 899 var _ = gc.Suite(&MachineClassifySuite{}) 900 901 type MockMachine struct { 902 life life.Value 903 status status.Status 904 id string 905 idErr error 906 ensureDeadErr error 907 statusErr error 908 } 909 910 func (m *MockMachine) Life() life.Value { 911 return m.life 912 } 913 914 func (m *MockMachine) InstanceId() (instance.Id, error) { 915 return instance.Id(m.id), m.idErr 916 } 917 918 func (m *MockMachine) InstanceNames() (instance.Id, string, error) { 919 instId, err := m.InstanceId() 920 return instId, "", err 921 } 922 923 func (m *MockMachine) EnsureDead() error { 924 return m.ensureDeadErr 925 } 926 927 func (m *MockMachine) Status() (status.Status, string, error) { 928 return m.status, "", m.statusErr 929 } 930 931 func (m *MockMachine) InstanceStatus() (status.Status, string, error) { 932 return m.status, "", m.statusErr 933 } 934 935 func (m *MockMachine) Id() string { 936 return m.id 937 } 938 939 type machineClassificationTest struct { 940 description string 941 life life.Value 942 status status.Status 943 idErr string 944 ensureDeadErr string 945 expectErrCode string 946 expectErrFmt string 947 statusErr string 948 classification provisioner.MachineClassification 949 } 950 951 var machineClassificationTestsNoMaintenance = machineClassificationTest{ 952 description: "Machine doesn't need maintaining", 953 life: life.Alive, 954 status: status.Started, 955 classification: provisioner.None, 956 } 957 958 func (s *MachineClassifySuite) TestMachineClassification(c *gc.C) { 959 test := func(t machineClassificationTest, id string) { 960 // Run a sub-test from the test table 961 s2e := func(s string) error { 962 // Little helper to turn a non-empty string into a useful error for "ErrorMatches" 963 if s != "" { 964 return ¶ms.Error{Code: s} 965 } 966 return nil 967 } 968 969 c.Logf("%s: %s", id, t.description) 970 machine := MockMachine{t.life, t.status, id, s2e(t.idErr), s2e(t.ensureDeadErr), s2e(t.statusErr)} 971 classification, err := provisioner.ClassifyMachine(loggo.GetLogger("test"), &machine) 972 if err != nil { 973 c.Assert(err, gc.ErrorMatches, fmt.Sprintf(t.expectErrFmt, machine.Id())) 974 } else { 975 c.Assert(err, gc.Equals, s2e(t.expectErrCode)) 976 } 977 c.Assert(classification, gc.Equals, t.classification) 978 } 979 980 test(machineClassificationTestsNoMaintenance, "0") 981 } 982 983 func (s *ProvisionerSuite) TestProvisioningMachinesWithSpacesSuccess(c *gc.C) { 984 p := s.newEnvironProvisioner(c) 985 defer workertest.CleanKill(c, p) 986 987 // Add the spaces used in constraints. 988 space1, err := s.State.AddSpace("space1", "", nil, false) 989 c.Assert(err, jc.ErrorIsNil) 990 space2, err := s.State.AddSpace("space2", "", nil, false) 991 c.Assert(err, jc.ErrorIsNil) 992 993 // Add 1 subnet into space1, and 2 into space2. 994 // Each subnet is in a matching zone (e.g "subnet-#" in "zone#"). 995 testing.AddSubnetsWithTemplate(c, s.State, 3, corenetwork.SubnetInfo{ 996 CIDR: "10.10.{{.}}.0/24", 997 ProviderId: "subnet-{{.}}", 998 AvailabilityZones: []string{"zone{{.}}"}, 999 SpaceID: fmt.Sprintf("{{if (lt . 2)}}%s{{else}}%s{{end}}", space1.Id(), space2.Id()), 1000 VLANTag: 42, 1001 }) 1002 1003 // Add and provision a machine with spaces specified. 1004 cons := constraints.MustParse( 1005 s.defaultConstraints.String(), "spaces=space2,^space1", 1006 ) 1007 // The dummy provider simulates 2 subnets per included space. 1008 expectedSubnetsToZones := map[corenetwork.Id][]string{ 1009 "subnet-0": {"zone0"}, 1010 "subnet-1": {"zone1"}, 1011 } 1012 m, err := s.addMachineWithConstraints(cons) 1013 c.Assert(err, jc.ErrorIsNil) 1014 inst := s.checkStartInstanceCustom( 1015 c, m, "pork", cons, 1016 nil, 1017 expectedSubnetsToZones, 1018 nil, nil, nil, nil, true, 1019 ) 1020 1021 // Cleanup. 1022 c.Assert(m.EnsureDead(), gc.IsNil) 1023 s.checkStopInstances(c, inst) 1024 s.waitForRemovalMark(c, m) 1025 } 1026 1027 func (s *ProvisionerSuite) testProvisioningFailsAndSetsErrorStatusForConstraints( 1028 c *gc.C, 1029 cons constraints.Value, 1030 expectedErrorStatus string, 1031 ) { 1032 machine, err := s.addMachineWithConstraints(cons) 1033 c.Assert(err, jc.ErrorIsNil) 1034 1035 // Start the PA. 1036 p := s.newEnvironProvisioner(c) 1037 defer workertest.CleanKill(c, p) 1038 1039 // Expect StartInstance to fail. 1040 s.checkNoOperations(c) 1041 1042 // Ensure machine error status was set, and the error matches 1043 agentStatus, instanceStatus := s.waitUntilMachineNotPending(c, machine) 1044 c.Check(agentStatus.Status, gc.Equals, status.Error) 1045 c.Check(agentStatus.Message, gc.Equals, expectedErrorStatus) 1046 c.Check(instanceStatus.Status, gc.Equals, status.ProvisioningError) 1047 c.Check(instanceStatus.Message, gc.Equals, expectedErrorStatus) 1048 1049 // Make sure the task didn't stop with an error 1050 died := make(chan error) 1051 go func() { 1052 died <- p.Wait() 1053 }() 1054 select { 1055 case <-time.After(coretesting.ShortWait): 1056 case err := <-died: 1057 c.Fatalf("provisioner task died unexpectedly with err: %v", err) 1058 } 1059 1060 // Restart the PA to make sure the machine is not retried. 1061 workertest.CleanKill(c, p) 1062 p = s.newEnvironProvisioner(c) 1063 defer workertest.CleanKill(c, p) 1064 1065 s.checkNoOperations(c) 1066 } 1067 1068 func (s *ProvisionerSuite) TestProvisioningMachinesFailsWithUnknownSpaces(c *gc.C) { 1069 cons := constraints.MustParse( 1070 s.defaultConstraints.String(), "spaces=missing,missing-too,^ignored-too", 1071 ) 1072 expectedErrorStatus := `matching subnets to zones: space "missing" not found` 1073 s.testProvisioningFailsAndSetsErrorStatusForConstraints(c, cons, expectedErrorStatus) 1074 } 1075 1076 func (s *ProvisionerSuite) TestProvisioningMachinesFailsWithEmptySpaces(c *gc.C) { 1077 _, err := s.State.AddSpace("empty", "", nil, false) 1078 c.Assert(err, jc.ErrorIsNil) 1079 cons := constraints.MustParse( 1080 s.defaultConstraints.String(), "spaces=empty", 1081 ) 1082 expectedErrorStatus := `matching subnets to zones: ` + 1083 `cannot use space "empty" as deployment target: no subnets` 1084 s.testProvisioningFailsAndSetsErrorStatusForConstraints(c, cons, expectedErrorStatus) 1085 } 1086 1087 func (s *CommonProvisionerSuite) addMachineWithRequestedVolumes(volumes []state.HostVolumeParams, cons constraints.Value) (*state.Machine, error) { 1088 return s.BackingState.AddOneMachine(state.MachineTemplate{ 1089 Base: state.DefaultLTSBase(), 1090 Jobs: []state.MachineJob{state.JobHostUnits}, 1091 Constraints: cons, 1092 Volumes: volumes, 1093 }) 1094 } 1095 1096 func (s *ProvisionerSuite) TestProvisioningMachinesWithRequestedRootDisk(c *gc.C) { 1097 // Set up a persistent pool. 1098 poolManager := poolmanager.New(state.NewStateSettings(s.State), s.Environ) 1099 _, err := poolManager.Create("persistent-pool", "static", map[string]interface{}{"persistent": true}) 1100 c.Assert(err, jc.ErrorIsNil) 1101 1102 p := s.newEnvironProvisioner(c) 1103 defer workertest.CleanKill(c, p) 1104 1105 cons := constraints.MustParse("root-disk-source=persistent-pool " + s.defaultConstraints.String()) 1106 m, err := s.BackingState.AddOneMachine(state.MachineTemplate{ 1107 Base: state.DefaultLTSBase(), 1108 Jobs: []state.MachineJob{state.JobHostUnits}, 1109 Constraints: cons, 1110 }) 1111 c.Assert(err, jc.ErrorIsNil) 1112 1113 inst := s.checkStartInstanceCustom( 1114 c, m, "pork", cons, 1115 nil, nil, 1116 &storage.VolumeParams{ 1117 Provider: "static", 1118 Attributes: map[string]interface{}{"persistent": true}, 1119 }, 1120 nil, 1121 nil, 1122 nil, true, 1123 ) 1124 1125 // Cleanup. 1126 c.Assert(m.EnsureDead(), gc.IsNil) 1127 s.checkStopInstances(c, inst) 1128 s.waitForRemovalMark(c, m) 1129 } 1130 1131 func (s *ProvisionerSuite) TestProvisioningMachinesWithRequestedVolumes(c *gc.C) { 1132 // Set up a persistent pool. 1133 poolManager := poolmanager.New(state.NewStateSettings(s.State), s.Environ) 1134 _, err := poolManager.Create("persistent-pool", "static", map[string]interface{}{"persistent": true}) 1135 c.Assert(err, jc.ErrorIsNil) 1136 1137 p := s.newEnvironProvisioner(c) 1138 defer workertest.CleanKill(c, p) 1139 1140 // Add a machine with volumes to state. 1141 requestedVolumes := []state.HostVolumeParams{{ 1142 Volume: state.VolumeParams{Pool: "static", Size: 1024}, 1143 Attachment: state.VolumeAttachmentParams{}, 1144 }, { 1145 Volume: state.VolumeParams{Pool: "persistent-pool", Size: 2048}, 1146 Attachment: state.VolumeAttachmentParams{}, 1147 }, { 1148 Volume: state.VolumeParams{Pool: "persistent-pool", Size: 4096}, 1149 Attachment: state.VolumeAttachmentParams{}, 1150 }} 1151 m, err := s.addMachineWithRequestedVolumes(requestedVolumes, s.defaultConstraints) 1152 c.Assert(err, jc.ErrorIsNil) 1153 1154 // Provision volume-2, so that it is attached rather than created. 1155 sb, err := state.NewStorageBackend(s.State) 1156 c.Assert(err, jc.ErrorIsNil) 1157 err = sb.SetVolumeInfo(names.NewVolumeTag("2"), state.VolumeInfo{ 1158 Pool: "persistent-pool", 1159 VolumeId: "vol-ume", 1160 Size: 4096, 1161 }) 1162 c.Assert(err, jc.ErrorIsNil) 1163 1164 // Provision the machine, checking the volume and volume attachment arguments. 1165 expectedVolumes := []storage.Volume{{ 1166 names.NewVolumeTag("0"), 1167 storage.VolumeInfo{ 1168 Size: 1024, 1169 }, 1170 }, { 1171 names.NewVolumeTag("1"), 1172 storage.VolumeInfo{ 1173 Size: 2048, 1174 Persistent: true, 1175 }, 1176 }} 1177 expectedVolumeAttachments := []storage.VolumeAttachment{{ 1178 Volume: names.NewVolumeTag("2"), 1179 Machine: m.MachineTag(), 1180 VolumeAttachmentInfo: storage.VolumeAttachmentInfo{ 1181 DeviceName: "sdb", 1182 }, 1183 }} 1184 inst := s.checkStartInstanceCustom( 1185 c, m, "pork", s.defaultConstraints, 1186 nil, nil, nil, 1187 expectedVolumes, 1188 expectedVolumeAttachments, 1189 nil, true, 1190 ) 1191 1192 // Cleanup. 1193 c.Assert(m.EnsureDead(), gc.IsNil) 1194 s.checkStopInstances(c, inst) 1195 s.waitForRemovalMark(c, m) 1196 } 1197 1198 func (s *ProvisionerSuite) TestProvisioningDoesNotProvisionTheSameMachineAfterRestart(c *gc.C) { 1199 p := s.newEnvironProvisioner(c) 1200 defer workertest.CleanKill(c, p) 1201 1202 // create a machine 1203 m, err := s.addMachine() 1204 c.Assert(err, jc.ErrorIsNil) 1205 s.checkStartInstance(c, m) 1206 1207 // restart the PA 1208 workertest.CleanKill(c, p) 1209 p = s.newEnvironProvisioner(c) 1210 defer workertest.CleanKill(c, p) 1211 1212 // check that there is only one machine provisioned. 1213 machines, err := s.State.AllMachines() 1214 c.Assert(err, jc.ErrorIsNil) 1215 c.Check(len(machines), gc.Equals, 2) 1216 c.Check(machines[0].Id(), gc.Equals, "0") 1217 c.Check(machines[1].CheckProvisioned("fake_nonce"), jc.IsFalse) 1218 1219 // the PA should not create it a second time 1220 s.checkNoOperations(c) 1221 } 1222 1223 func (s *ProvisionerSuite) TestDyingMachines(c *gc.C) { 1224 p := s.newEnvironProvisioner(c) 1225 defer workertest.CleanKill(c, p) 1226 1227 // provision a machine 1228 m0, err := s.addMachine() 1229 c.Assert(err, jc.ErrorIsNil) 1230 s.checkStartInstance(c, m0) 1231 1232 // stop the provisioner and make the machine dying 1233 workertest.CleanKill(c, p) 1234 err = m0.Destroy() 1235 c.Assert(err, jc.ErrorIsNil) 1236 1237 // add a new, dying, unprovisioned machine 1238 m1, err := s.addMachine() 1239 c.Assert(err, jc.ErrorIsNil) 1240 err = m1.Destroy() 1241 c.Assert(err, jc.ErrorIsNil) 1242 1243 // start the provisioner and wait for it to reap the useless machine 1244 p = s.newEnvironProvisioner(c) 1245 defer workertest.CleanKill(c, p) 1246 s.checkNoOperations(c) 1247 s.waitForRemovalMark(c, m1) 1248 1249 // verify the other one's still fine 1250 err = m0.Refresh() 1251 c.Assert(err, jc.ErrorIsNil) 1252 c.Assert(m0.Life(), gc.Equals, state.Dying) 1253 } 1254 1255 type mockTaskAPI struct { 1256 provisioner.TaskAPI 1257 } 1258 1259 func (mock *mockTaskAPI) Machines(tags ...names.MachineTag) ([]apiprovisioner.MachineResult, error) { 1260 return nil, fmt.Errorf("error") 1261 } 1262 1263 func (*mockTaskAPI) MachinesWithTransientErrors() ([]apiprovisioner.MachineStatusResult, error) { 1264 return nil, fmt.Errorf("error") 1265 } 1266 1267 type mockDistributionGroupFinder struct { 1268 groups map[names.MachineTag][]string 1269 } 1270 1271 func (mock *mockDistributionGroupFinder) DistributionGroupByMachineId( 1272 tags ...names.MachineTag, 1273 ) ([]apiprovisioner.DistributionGroupResult, error) { 1274 result := make([]apiprovisioner.DistributionGroupResult, len(tags)) 1275 if len(mock.groups) == 0 { 1276 for i := range tags { 1277 result[i] = apiprovisioner.DistributionGroupResult{MachineIds: []string{}} 1278 } 1279 } else { 1280 for i, tag := range tags { 1281 if dg, ok := mock.groups[tag]; ok { 1282 result[i] = apiprovisioner.DistributionGroupResult{MachineIds: dg} 1283 } else { 1284 result[i] = apiprovisioner.DistributionGroupResult{ 1285 MachineIds: []string{}, Err: ¶ms.Error{Code: params.CodeNotFound, Message: "Fail"}} 1286 } 1287 } 1288 } 1289 return result, nil 1290 } 1291 1292 func (s *ProvisionerSuite) TestMachineErrorsRetainInstances(c *gc.C) { 1293 task := s.newProvisionerTask( 1294 c, 1295 config.HarvestAll, 1296 s.Environ, 1297 s.provisioner, 1298 &mockDistributionGroupFinder{}, 1299 mockToolsFinder{}, 1300 ) 1301 defer workertest.CleanKill(c, task) 1302 1303 // create a machine 1304 m0, err := s.addMachine() 1305 c.Assert(err, jc.ErrorIsNil) 1306 s.checkStartInstance(c, m0) 1307 1308 // create an instance out of band 1309 s.startUnknownInstance(c, "999") 1310 1311 // start the provisioner and ensure it doesn't kill any 1312 // instances if there are errors getting machines. 1313 task = s.newProvisionerTask( 1314 c, 1315 config.HarvestAll, 1316 s.Environ, 1317 &mockTaskAPI{}, 1318 &mockDistributionGroupFinder{}, 1319 &mockToolsFinder{}, 1320 ) 1321 defer func() { 1322 err := worker.Stop(task) 1323 c.Assert(err, gc.ErrorMatches, ".*getting machine.*") 1324 }() 1325 s.checkNoOperations(c) 1326 } 1327 1328 func (s *ProvisionerSuite) TestEnvironProvisionerObservesConfigChanges(c *gc.C) { 1329 p := s.newEnvironProvisioner(c) 1330 defer workertest.CleanKill(c, p) 1331 s.assertProvisionerObservesConfigChanges(c, p) 1332 } 1333 1334 func (s *ProvisionerSuite) TestEnvironProvisionerObservesConfigChangesWorkerCount(c *gc.C) { 1335 p := s.newEnvironProvisioner(c) 1336 defer workertest.CleanKill(c, p) 1337 s.assertProvisionerObservesConfigChangesWorkerCount(c, p, false) 1338 } 1339 1340 func (s *ProvisionerSuite) newProvisionerTask( 1341 c *gc.C, 1342 harvestingMethod config.HarvestMode, 1343 broker environs.InstanceBroker, 1344 taskAPI provisioner.TaskAPI, 1345 distributionGroupFinder provisioner.DistributionGroupFinder, 1346 toolsFinder provisioner.ToolsFinder, 1347 ) provisioner.ProvisionerTask { 1348 1349 retryStrategy := provisioner.NewRetryStrategy(0*time.Second, 0) 1350 1351 return s.newProvisionerTaskWithRetryStrategy(c, harvestingMethod, broker, 1352 taskAPI, distributionGroupFinder, toolsFinder, retryStrategy) 1353 } 1354 1355 func (s *ProvisionerSuite) newProvisionerTaskWithRetryStrategy( 1356 c *gc.C, 1357 harvestingMethod config.HarvestMode, 1358 broker environs.InstanceBroker, 1359 taskAPI provisioner.TaskAPI, 1360 distributionGroupFinder provisioner.DistributionGroupFinder, 1361 toolsFinder provisioner.ToolsFinder, 1362 retryStrategy provisioner.RetryStrategy, 1363 ) provisioner.ProvisionerTask { 1364 1365 machineWatcher, err := s.provisioner.WatchModelMachines() 1366 c.Assert(err, jc.ErrorIsNil) 1367 retryWatcher, err := s.provisioner.WatchMachineErrorRetry() 1368 c.Assert(err, jc.ErrorIsNil) 1369 auth, err := authentication.NewAPIAuthenticator(s.provisioner) 1370 c.Assert(err, jc.ErrorIsNil) 1371 1372 w, err := provisioner.NewProvisionerTask(provisioner.TaskConfig{ 1373 ControllerUUID: s.ControllerConfig.ControllerUUID(), 1374 HostTag: names.NewMachineTag("0"), 1375 Logger: loggo.GetLogger("test"), 1376 HarvestMode: harvestingMethod, 1377 TaskAPI: taskAPI, 1378 DistributionGroupFinder: distributionGroupFinder, 1379 ToolsFinder: toolsFinder, 1380 MachineWatcher: machineWatcher, 1381 RetryWatcher: retryWatcher, 1382 Broker: broker, 1383 Auth: auth, 1384 ImageStream: imagemetadata.ReleasedStream, 1385 RetryStartInstanceStrategy: retryStrategy, 1386 CloudCallContextFunc: func(_ stdcontext.Context) context.ProviderCallContext { return s.callCtx }, 1387 NumProvisionWorkers: numProvisionWorkersForTesting, 1388 }) 1389 c.Assert(err, jc.ErrorIsNil) 1390 return w 1391 } 1392 1393 func (s *ProvisionerSuite) TestHarvestNoneReapsNothing(c *gc.C) { 1394 1395 task := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{}) 1396 defer workertest.CleanKill(c, task) 1397 task.SetHarvestMode(config.HarvestNone) 1398 1399 // Create a machine and an unknown instance. 1400 m0, err := s.addMachine() 1401 c.Assert(err, jc.ErrorIsNil) 1402 s.checkStartInstance(c, m0) 1403 s.startUnknownInstance(c, "999") 1404 1405 // Mark the first machine as dead. 1406 c.Assert(m0.EnsureDead(), gc.IsNil) 1407 1408 // Ensure we're doing nothing. 1409 s.checkNoOperations(c) 1410 } 1411 1412 func (s *ProvisionerSuite) TestHarvestUnknownReapsOnlyUnknown(c *gc.C) { 1413 task := s.newProvisionerTask(c, 1414 config.HarvestDestroyed, 1415 s.Environ, 1416 s.provisioner, 1417 &mockDistributionGroupFinder{}, 1418 mockToolsFinder{}, 1419 ) 1420 defer workertest.CleanKill(c, task) 1421 task.SetHarvestMode(config.HarvestUnknown) 1422 1423 // Create a machine and an unknown instance. 1424 m0, err := s.addMachine() 1425 c.Assert(err, jc.ErrorIsNil) 1426 i0 := s.checkStartInstance(c, m0) 1427 i1 := s.startUnknownInstance(c, "999") 1428 1429 // Mark the first machine as dead. 1430 c.Assert(m0.EnsureDead(), gc.IsNil) 1431 1432 // When only harvesting unknown machines, only one of the machines 1433 // is stopped. 1434 s.checkStopSomeInstances(c, []instances.Instance{i1}, []instances.Instance{i0}) 1435 s.waitForRemovalMark(c, m0) 1436 } 1437 1438 func (s *ProvisionerSuite) TestHarvestDestroyedReapsOnlyDestroyed(c *gc.C) { 1439 1440 task := s.newProvisionerTask( 1441 c, 1442 config.HarvestDestroyed, 1443 s.Environ, 1444 s.provisioner, 1445 &mockDistributionGroupFinder{}, 1446 mockToolsFinder{}, 1447 ) 1448 defer workertest.CleanKill(c, task) 1449 1450 // Create a machine and an unknown instance. 1451 m0, err := s.addMachine() 1452 c.Assert(err, jc.ErrorIsNil) 1453 i0 := s.checkStartInstance(c, m0) 1454 i1 := s.startUnknownInstance(c, "999") 1455 1456 // Mark the first machine as dead. 1457 c.Assert(m0.EnsureDead(), gc.IsNil) 1458 1459 // When only harvesting destroyed machines, only one of the 1460 // machines is stopped. 1461 s.checkStopSomeInstances(c, []instances.Instance{i0}, []instances.Instance{i1}) 1462 s.waitForRemovalMark(c, m0) 1463 } 1464 1465 func (s *ProvisionerSuite) TestHarvestAllReapsAllTheThings(c *gc.C) { 1466 1467 task := s.newProvisionerTask(c, 1468 config.HarvestDestroyed, 1469 s.Environ, 1470 s.provisioner, 1471 &mockDistributionGroupFinder{}, 1472 mockToolsFinder{}, 1473 ) 1474 defer workertest.CleanKill(c, task) 1475 task.SetHarvestMode(config.HarvestAll) 1476 1477 // Create a machine and an unknown instance. 1478 m0, err := s.addMachine() 1479 c.Assert(err, jc.ErrorIsNil) 1480 i0 := s.checkStartInstance(c, m0) 1481 i1 := s.startUnknownInstance(c, "999") 1482 1483 // Mark the first machine as dead. 1484 c.Assert(m0.EnsureDead(), gc.IsNil) 1485 1486 // Everything must die! 1487 s.checkStopSomeInstances(c, []instances.Instance{i0, i1}, []instances.Instance{}) 1488 s.waitForRemovalMark(c, m0) 1489 } 1490 1491 func (s *ProvisionerSuite) TestProvisionerObservesMachineJobs(c *gc.C) { 1492 s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond) 1493 broker := &mockBroker{Environ: s.Environ, retryCount: make(map[string]int), 1494 startInstanceFailureInfo: map[string]mockBrokerFailures{ 1495 "3": {whenSucceed: 2, err: fmt.Errorf("error: some error")}, 1496 "4": {whenSucceed: 2, err: fmt.Errorf("error: some error")}, 1497 }, 1498 } 1499 task := s.newProvisionerTask(c, config.HarvestAll, broker, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{}) 1500 defer workertest.CleanKill(c, task) 1501 1502 added := s.enableHA(c, 3) 1503 c.Assert(added, gc.HasLen, 2) 1504 s.checkStartInstances(c, added) 1505 } 1506 1507 func assertAvailabilityZoneMachines(c *gc.C, 1508 machines []*state.Machine, 1509 failedAZMachines []*state.Machine, 1510 obtained []provisioner.AvailabilityZoneMachine, 1511 ) { 1512 if len(machines) > 0 { 1513 // Do machine zones match AvailabilityZoneMachine 1514 for _, m := range machines { 1515 zone, err := m.AvailabilityZone() 1516 c.Assert(err, jc.ErrorIsNil) 1517 found := 0 1518 for _, zoneInfo := range obtained { 1519 if zone == zoneInfo.ZoneName { 1520 c.Assert(zoneInfo.MachineIds.Contains(m.Id()), gc.Equals, true, gc.Commentf( 1521 "machine %q not found in list for zone %q; zone list: %#v", m.Id(), zone, zoneInfo, 1522 )) 1523 found += 1 1524 } 1525 } 1526 c.Assert(found, gc.Equals, 1) 1527 } 1528 } 1529 if len(failedAZMachines) > 0 { 1530 for _, m := range failedAZMachines { 1531 // Is the failed machine listed as failed in at least one zone? 1532 failedZones := 0 1533 for _, zoneInfo := range obtained { 1534 if zoneInfo.FailedMachineIds.Contains(m.Id()) { 1535 failedZones += 1 1536 } 1537 } 1538 c.Assert(failedZones, jc.GreaterThan, 0) 1539 } 1540 } 1541 } 1542 1543 // assertAvailabilityZoneMachinesDistribution checks to see if the 1544 // machines have been distributed over the zones (with a maximum delta 1545 // between the max and min number of machines of maxDelta). This check 1546 // method works where there are no machine errors in the test case. 1547 // 1548 // Which machine will be in which zone is dependent on the order in 1549 // which they are provisioned, therefore almost impossible to predict. 1550 func assertAvailabilityZoneMachinesDistribution(c *gc.C, obtained []provisioner.AvailabilityZoneMachine, maxDelta int) { 1551 // Are the machines evenly distributed? No zone should have 1552 // 2 machines more than any other zone. 1553 min, max := 1, 0 1554 counts := make(map[string]int) 1555 for _, zone := range obtained { 1556 count := zone.MachineIds.Size() 1557 counts[zone.ZoneName] = count 1558 if min > count { 1559 min = count 1560 } 1561 if max < count { 1562 max = count 1563 } 1564 } 1565 c.Assert(max-min, jc.LessThan, maxDelta+1, gc.Commentf("min = %d, max = %d, counts = %v", min, max, counts)) 1566 } 1567 1568 // checkAvailabilityZoneMachinesDistributionGroups checks to see if 1569 // the distribution groups have been honored. 1570 func checkAvailabilityZoneMachinesDistributionGroups(c *gc.C, groups map[names.MachineTag][]string, obtained []provisioner.AvailabilityZoneMachine) error { 1571 // The set containing the machines in a distribution group and the 1572 // machine whose distribution group this is, should not be in the 1573 // same AZ, unless there are more machines in the set, than AZs. 1574 // If there are more machines in the set than AZs, each AZ should have 1575 // the number of machines in the set divided by the number of AZ in it, 1576 // or 1 less than that number. 1577 // 1578 // e.g. if there are 5 machines in the set and 3 AZ, each AZ should have 1579 // 2 or 1 machines from the set in it. 1580 obtainedZoneCount := len(obtained) 1581 for tag, group := range groups { 1582 maxMachineInZoneCount := 1 1583 applicationMachinesCount := len(group) + 1 1584 if applicationMachinesCount > obtainedZoneCount { 1585 maxMachineInZoneCount = applicationMachinesCount / obtainedZoneCount 1586 } 1587 for _, z := range obtained { 1588 if z.MachineIds.Contains(tag.Id()) { 1589 intersection := z.MachineIds.Intersection(set.NewStrings(group...)) 1590 machineCount := intersection.Size() + 1 1591 // For appropriate machine distribution, the number of machines in the 1592 // zone should be the same as maxMachineInZoneCount or 1 less. 1593 if machineCount == maxMachineInZoneCount || machineCount == maxMachineInZoneCount-1 { 1594 break 1595 } 1596 return errors.Errorf("%+v has too many of %s and %s", z.MachineIds, tag.Id(), group) 1597 } 1598 } 1599 } 1600 return nil 1601 } 1602 1603 func (s *ProvisionerSuite) TestAvailabilityZoneMachinesStartMachines(c *gc.C) { 1604 // Per provider dummy, there will be 3 available availability zones. 1605 task := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{}) 1606 defer workertest.CleanKill(c, task) 1607 1608 machines, err := s.addMachines(4) 1609 c.Assert(err, jc.ErrorIsNil) 1610 s.checkStartInstances(c, machines) 1611 1612 availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task) 1613 assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachines) 1614 assertAvailabilityZoneMachinesDistribution(c, availabilityZoneMachines, 1) 1615 } 1616 1617 func (s *ProvisionerSuite) TestAvailabilityZoneMachinesStartMachinesAZFailures(c *gc.C) { 1618 // Per provider dummy, there will be 3 available availability zones. 1619 s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond) 1620 e := &mockBroker{ 1621 Environ: s.Environ, 1622 retryCount: make(map[string]int), 1623 startInstanceFailureInfo: map[string]mockBrokerFailures{ 1624 "2": {whenSucceed: 1, err: errors.New("zing")}, 1625 }, 1626 } 1627 retryStrategy := provisioner.NewRetryStrategy(5*time.Millisecond, 2) 1628 task := s.newProvisionerTaskWithRetryStrategy(c, config.HarvestDestroyed, 1629 e, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{}, retryStrategy) 1630 defer workertest.CleanKill(c, task) 1631 1632 machines, err := s.addMachines(4) 1633 c.Assert(err, jc.ErrorIsNil) 1634 s.checkStartInstances(c, machines) 1635 1636 availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task) 1637 assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachines) 1638 1639 // The reason maxDelta is 2 here is because in certain failure cases this 1640 // may start two machines on each of two zones, and none on the other (if 1641 // the failing machine is started second or third, and the subsequent 1642 // machines are started before markMachineFailedInAZ() is called). See 1643 // https://github.com/juju/juju/pull/12267 for more detail. 1644 assertAvailabilityZoneMachinesDistribution(c, availabilityZoneMachines, 2) 1645 } 1646 1647 func (s *ProvisionerSuite) TestAvailabilityZoneMachinesStartMachinesWithDG(c *gc.C) { 1648 // Per provider dummy, there will be 3 available availability zones. 1649 s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond) 1650 dgFinder := &mockDistributionGroupFinder{groups: map[names.MachineTag][]string{ 1651 names.NewMachineTag("1"): {"3, 4"}, 1652 names.NewMachineTag("2"): {}, 1653 names.NewMachineTag("3"): {"1, 4"}, 1654 names.NewMachineTag("4"): {"1, 3"}, 1655 names.NewMachineTag("5"): {}, 1656 }} 1657 1658 task := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, dgFinder, mockToolsFinder{}) 1659 defer workertest.CleanKill(c, task) 1660 1661 machines, err := s.addMachines(5) 1662 c.Assert(err, jc.ErrorIsNil) 1663 s.checkStartInstances(c, machines) 1664 1665 // 1, 2, 4 should be in different zones 1666 availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task) 1667 assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachines) 1668 c.Assert(checkAvailabilityZoneMachinesDistributionGroups(c, dgFinder.groups, availabilityZoneMachines), jc.ErrorIsNil) 1669 } 1670 1671 func (s *ProvisionerSuite) TestAvailabilityZoneMachinesStartMachinesAZFailuresWithDG(c *gc.C) { 1672 // Per provider dummy, there will be 3 available availability zones. 1673 s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond) 1674 e := &mockBroker{ 1675 Environ: s.Environ, 1676 retryCount: make(map[string]int), 1677 startInstanceFailureInfo: map[string]mockBrokerFailures{ 1678 "2": {whenSucceed: 1, err: errors.New("zing")}, 1679 }, 1680 } 1681 dgFinder := &mockDistributionGroupFinder{groups: map[names.MachineTag][]string{ 1682 names.NewMachineTag("1"): {"4", "5"}, 1683 names.NewMachineTag("2"): {"3"}, 1684 names.NewMachineTag("3"): {"2"}, 1685 names.NewMachineTag("4"): {"1", "5"}, 1686 names.NewMachineTag("5"): {"1", "4"}, 1687 }} 1688 retryStrategy := provisioner.NewRetryStrategy(0*time.Second, 2) 1689 task := s.newProvisionerTaskWithRetryStrategy(c, config.HarvestDestroyed, 1690 e, s.provisioner, dgFinder, mockToolsFinder{}, retryStrategy) 1691 defer workertest.CleanKill(c, task) 1692 1693 machines, err := s.addMachines(5) 1694 c.Assert(err, jc.ErrorIsNil) 1695 s.checkStartInstances(c, machines) 1696 1697 availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task) 1698 assertAvailabilityZoneMachines(c, machines, []*state.Machine{machines[1]}, availabilityZoneMachines) 1699 c.Assert(checkAvailabilityZoneMachinesDistributionGroups(c, dgFinder.groups, availabilityZoneMachines), jc.ErrorIsNil) 1700 } 1701 1702 func (s *ProvisionerSuite) TestProvisioningMachinesSingleMachineDGFailure(c *gc.C) { 1703 // If a single machine fails getting the distribution group, 1704 // ensure the other machines are still provisioned. 1705 dgFinder := &mockDistributionGroupFinder{ 1706 groups: map[names.MachineTag][]string{ 1707 names.NewMachineTag("2"): {"3", "5"}, 1708 names.NewMachineTag("3"): {"2", "5"}, 1709 names.NewMachineTag("4"): {"1"}, 1710 names.NewMachineTag("5"): {"2", "3"}, 1711 }, 1712 } 1713 task := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, dgFinder, mockToolsFinder{}) 1714 defer workertest.CleanKill(c, task) 1715 1716 machines, err := s.addMachines(5) 1717 c.Assert(err, jc.ErrorIsNil) 1718 1719 s.checkStartInstances(c, machines[1:]) 1720 _, err = machines[0].InstanceId() 1721 c.Assert(err, jc.Satisfies, errors.IsNotProvisioned) 1722 1723 availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task) 1724 assertAvailabilityZoneMachines(c, machines[1:], nil, availabilityZoneMachines) 1725 c.Assert(checkAvailabilityZoneMachinesDistributionGroups(c, dgFinder.groups, availabilityZoneMachines), jc.ErrorIsNil) 1726 } 1727 1728 func (s *ProvisionerSuite) TestAvailabilityZoneMachinesStopMachines(c *gc.C) { 1729 // Per provider dummy, there will be 3 available availability zones. 1730 task := s.newProvisionerTask( 1731 c, config.HarvestDestroyed, s.Environ, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{}) 1732 defer workertest.CleanKill(c, task) 1733 1734 machines, err := s.addMachines(4) 1735 c.Assert(err, jc.ErrorIsNil) 1736 s.checkStartInstances(c, machines) 1737 1738 availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task) 1739 assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachines) 1740 assertAvailabilityZoneMachinesDistribution(c, availabilityZoneMachines, 1) 1741 1742 c.Assert(machines[0].EnsureDead(), gc.IsNil) 1743 s.waitForRemovalMark(c, machines[0]) 1744 1745 assertAvailabilityZoneMachines(c, machines[1:], nil, provisioner.GetCopyAvailabilityZoneMachines(task)) 1746 } 1747 1748 func (s *ProvisionerSuite) TestProvisioningMachinesFailMachine(c *gc.C) { 1749 e := &mockBroker{ 1750 Environ: s.Environ, 1751 retryCount: make(map[string]int), 1752 startInstanceFailureInfo: map[string]mockBrokerFailures{ 1753 "2": {whenSucceed: 2, err: errors.New("fail provisioning for TestAvailabilityZoneMachinesFailMachine")}, 1754 }, 1755 } 1756 task := s.newProvisionerTask(c, config.HarvestDestroyed, 1757 e, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{}) 1758 defer workertest.CleanKill(c, task) 1759 1760 machines, err := s.addMachines(4) 1761 c.Assert(err, jc.ErrorIsNil) 1762 mFail := machines[1] 1763 machines = append(machines[:1], machines[2:]...) 1764 s.checkStartInstances(c, machines) 1765 _, err = mFail.InstanceId() 1766 c.Assert(err, jc.Satisfies, errors.IsNotProvisioned) 1767 1768 availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task) 1769 assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachines) 1770 assertAvailabilityZoneMachinesDistribution(c, availabilityZoneMachines, 1) 1771 } 1772 1773 func (s *ProvisionerSuite) TestAvailabilityZoneMachinesRestartTask(c *gc.C) { 1774 // Per provider dummy, there will be 3 available availability zones. 1775 task := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{}) 1776 defer workertest.CleanKill(c, task) 1777 1778 machines, err := s.addMachines(4) 1779 c.Assert(err, jc.ErrorIsNil) 1780 s.checkStartInstances(c, machines) 1781 1782 availabilityZoneMachinesBefore := provisioner.GetCopyAvailabilityZoneMachines(task) 1783 assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachinesBefore) 1784 assertAvailabilityZoneMachinesDistribution(c, availabilityZoneMachinesBefore, 1) 1785 1786 workertest.CleanKill(c, task) 1787 newTask := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{}) 1788 defer workertest.CleanKill(c, newTask) 1789 1790 // Verify provisionerTask.availabilityZoneMachines is the same before and 1791 // after the provisionerTask is restarted. 1792 availabilityZoneMachinesAfter := provisioner.GetCopyAvailabilityZoneMachines(task) 1793 c.Assert(availabilityZoneMachinesBefore, jc.DeepEquals, availabilityZoneMachinesAfter) 1794 } 1795 1796 func (s *ProvisionerSuite) TestProvisioningMachinesClearAZFailures(c *gc.C) { 1797 s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond) 1798 e := &mockBroker{ 1799 Environ: s.Environ, 1800 retryCount: make(map[string]int), 1801 startInstanceFailureInfo: map[string]mockBrokerFailures{ 1802 "1": {whenSucceed: 3, err: errors.New("zing")}, 1803 }, 1804 } 1805 retryStrategy := provisioner.NewRetryStrategy(5*time.Millisecond, 4) 1806 task := s.newProvisionerTaskWithRetryStrategy(c, config.HarvestDestroyed, 1807 e, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{}, retryStrategy) 1808 defer workertest.CleanKill(c, task) 1809 1810 machine, err := s.addMachine() 1811 c.Assert(err, jc.ErrorIsNil) 1812 s.checkStartInstance(c, machine) 1813 count := e.getRetryCount(machine.Id()) 1814 c.Assert(count, gc.Equals, 3) 1815 machineAZ, err := machine.AvailabilityZone() 1816 c.Assert(err, jc.ErrorIsNil) 1817 // Zones 3 and 4 have the same machine count, one is picked at random. 1818 c.Assert(set.NewStrings("zone3", "zone4").Contains(machineAZ), jc.IsTrue) 1819 } 1820 1821 func (s *ProvisionerSuite) TestProvisioningMachinesDerivedAZ(c *gc.C) { 1822 s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond) 1823 e := &mockBroker{ 1824 Environ: s.Environ, 1825 retryCount: make(map[string]int), 1826 startInstanceFailureInfo: map[string]mockBrokerFailures{ 1827 "2": {whenSucceed: 3, err: errors.New("zing")}, 1828 "3": {whenSucceed: 1, err: errors.New("zing")}, 1829 "5": {whenSucceed: 1, err: environs.ZoneIndependentError(errors.New("arf"))}, 1830 }, 1831 derivedAZ: map[string][]string{ 1832 "1": {"fail-zone"}, 1833 "2": {"zone4"}, 1834 "3": {"zone1", "zone4"}, 1835 "4": {"zone1"}, 1836 "5": {"zone3"}, 1837 }, 1838 } 1839 retryStrategy := provisioner.NewRetryStrategy(5*time.Millisecond, 2) 1840 task := s.newProvisionerTaskWithRetryStrategy(c, config.HarvestDestroyed, 1841 e, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{}, retryStrategy) 1842 defer workertest.CleanKill(c, task) 1843 1844 machines, err := s.addMachines(5) 1845 c.Assert(err, jc.ErrorIsNil) 1846 mFail := machines[:2] 1847 mSucceed := machines[2:] 1848 1849 s.checkStartInstances(c, mSucceed) 1850 c.Assert(e.getRetryCount(mSucceed[0].Id()), gc.Equals, 1) 1851 c.Assert(e.getRetryCount(mSucceed[2].Id()), gc.Equals, 1) 1852 1853 // This synchronisation addresses a potential race condition. 1854 // It can happen that upon successful return from checkStartInstances 1855 // The machine(s) arranged for provisioning failure have not yet been 1856 // retried the specified number of times; so we wait. 1857 id := mFail[1].Id() 1858 timeout := time.After(coretesting.LongWait) 1859 for e.getRetryCount(id) < 3 { 1860 select { 1861 case <-timeout: 1862 c.Fatalf("Failed provision of %q did not retry 3 times", id) 1863 default: 1864 } 1865 } 1866 1867 _, err = mFail[0].InstanceId() 1868 c.Assert(err, jc.Satisfies, errors.IsNotProvisioned) 1869 _, err = mFail[1].InstanceId() 1870 c.Assert(err, jc.Satisfies, errors.IsNotProvisioned) 1871 1872 availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task) 1873 assertAvailabilityZoneMachines(c, mSucceed, nil, availabilityZoneMachines) 1874 1875 for i, zone := range []string{"zone1", "zone3"} { 1876 machineAZ, err := mSucceed[i+1].AvailabilityZone() 1877 c.Assert(err, jc.ErrorIsNil) 1878 c.Assert(machineAZ, gc.Equals, zone) 1879 } 1880 } 1881 1882 func (s *ProvisionerSuite) TestProvisioningMachinesNoZonedEnviron(c *gc.C) { 1883 // Make sure the provisioner still works for providers which do not 1884 // implement the ZonedEnviron interface. 1885 noZonedEnvironBroker := &mockNoZonedEnvironBroker{Environ: s.Environ} 1886 task := s.newProvisionerTask(c, 1887 config.HarvestDestroyed, 1888 noZonedEnvironBroker, 1889 s.provisioner, 1890 &mockDistributionGroupFinder{}, 1891 mockToolsFinder{}) 1892 defer workertest.CleanKill(c, task) 1893 1894 machines, err := s.addMachines(4) 1895 c.Assert(err, jc.ErrorIsNil) 1896 s.checkStartInstances(c, machines) 1897 1898 expected := provisioner.GetCopyAvailabilityZoneMachines(task) 1899 c.Assert(expected, gc.HasLen, 0) 1900 } 1901 1902 type mockNoZonedEnvironBroker struct { 1903 environs.Environ 1904 } 1905 1906 func (b *mockNoZonedEnvironBroker) StartInstance(ctx context.ProviderCallContext, args environs.StartInstanceParams) (*environs.StartInstanceResult, error) { 1907 return b.Environ.StartInstance(ctx, args) 1908 } 1909 1910 type mockBroker struct { 1911 environs.Environ 1912 1913 mu sync.Mutex 1914 retryCount map[string]int 1915 startInstanceFailureInfo map[string]mockBrokerFailures 1916 derivedAZ map[string][]string 1917 } 1918 1919 type mockBrokerFailures struct { 1920 err error 1921 whenSucceed int 1922 } 1923 1924 func (b *mockBroker) StartInstance(ctx context.ProviderCallContext, args environs.StartInstanceParams) (*environs.StartInstanceResult, error) { 1925 // All machines are provisioned successfully the first time unless 1926 // mock.startInstanceFailureInfo is configured. 1927 // 1928 id := args.InstanceConfig.MachineId 1929 b.mu.Lock() 1930 defer b.mu.Unlock() 1931 retries := b.retryCount[id] 1932 whenSucceed := 0 1933 var returnError error 1934 if failureInfo, ok := b.startInstanceFailureInfo[id]; ok { 1935 whenSucceed = failureInfo.whenSucceed 1936 returnError = failureInfo.err 1937 } 1938 if retries == whenSucceed { 1939 return b.Environ.StartInstance(ctx, args) 1940 } else { 1941 b.retryCount[id] = retries + 1 1942 } 1943 return nil, returnError 1944 } 1945 1946 func (b *mockBroker) getRetryCount(id string) int { 1947 b.mu.Lock() 1948 retries := b.retryCount[id] 1949 b.mu.Unlock() 1950 return retries 1951 } 1952 1953 // ZonedEnviron necessary for provisionerTask.populateAvailabilityZoneMachines where 1954 // mockBroker used. 1955 1956 func (b *mockBroker) AvailabilityZones(ctx context.ProviderCallContext) (corenetwork.AvailabilityZones, error) { 1957 return b.Environ.(providercommon.ZonedEnviron).AvailabilityZones(ctx) 1958 } 1959 1960 func (b *mockBroker) InstanceAvailabilityZoneNames(ctx context.ProviderCallContext, ids []instance.Id) (map[instance.Id]string, error) { 1961 return b.Environ.(providercommon.ZonedEnviron).InstanceAvailabilityZoneNames(ctx, ids) 1962 } 1963 1964 func (b *mockBroker) DeriveAvailabilityZones(ctx context.ProviderCallContext, args environs.StartInstanceParams) ([]string, error) { 1965 id := args.InstanceConfig.MachineId 1966 b.mu.Lock() 1967 defer b.mu.Unlock() 1968 if derivedAZ, ok := b.derivedAZ[id]; ok { 1969 return derivedAZ, nil 1970 } 1971 return b.Environ.(providercommon.ZonedEnviron).DeriveAvailabilityZones(ctx, args) 1972 } 1973 1974 type mockToolsFinder struct { 1975 } 1976 1977 func (f mockToolsFinder) FindTools(number version.Number, os string, a string) (coretools.List, error) { 1978 v, err := version.ParseBinary(fmt.Sprintf("%s-%s-%s", number, os, arch.HostArch())) 1979 if err != nil { 1980 return nil, err 1981 } 1982 if a == "" { 1983 return nil, errors.New("missing arch") 1984 } 1985 v.Arch = a 1986 return coretools.List{&coretools.Tools{Version: v}}, nil 1987 }