github.com/wallyworld/juju@v0.0.0-20161013125918-6cf1bc9d917a/worker/provisioner/provisioner_task.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package provisioner 5 6 import ( 7 "fmt" 8 "time" 9 10 "github.com/juju/errors" 11 "github.com/juju/utils" 12 "gopkg.in/juju/names.v2" 13 14 apiprovisioner "github.com/juju/juju/api/provisioner" 15 "github.com/juju/juju/apiserver/common/networkingcommon" 16 "github.com/juju/juju/apiserver/params" 17 "github.com/juju/juju/cloudconfig/instancecfg" 18 "github.com/juju/juju/constraints" 19 "github.com/juju/juju/controller" 20 "github.com/juju/juju/controller/authentication" 21 "github.com/juju/juju/environs" 22 "github.com/juju/juju/environs/config" 23 "github.com/juju/juju/environs/imagemetadata" 24 "github.com/juju/juju/environs/simplestreams" 25 "github.com/juju/juju/instance" 26 "github.com/juju/juju/network" 27 "github.com/juju/juju/state" 28 "github.com/juju/juju/state/multiwatcher" 29 "github.com/juju/juju/status" 30 "github.com/juju/juju/storage" 31 coretools "github.com/juju/juju/tools" 32 jujuversion "github.com/juju/juju/version" 33 "github.com/juju/juju/watcher" 34 "github.com/juju/juju/worker" 35 "github.com/juju/juju/worker/catacomb" 36 "github.com/juju/juju/wrench" 37 "github.com/juju/version" 38 ) 39 40 type ProvisionerTask interface { 41 worker.Worker 42 43 // SetHarvestMode sets a flag to indicate how the provisioner task 44 // should harvest machines. See config.HarvestMode for 45 // documentation of behavior. 46 SetHarvestMode(mode config.HarvestMode) 47 } 48 49 type MachineGetter interface { 50 Machine(names.MachineTag) (*apiprovisioner.Machine, error) 51 MachinesWithTransientErrors() ([]*apiprovisioner.Machine, []params.StatusResult, error) 52 } 53 54 // ToolsFinder is an interface used for finding tools to run on 55 // provisioned instances. 56 type ToolsFinder interface { 57 // FindTools returns a list of tools matching the specified 58 // version, series, and architecture. If arch is empty, the 59 // implementation is expected to use a well documented default. 60 FindTools(version version.Number, series string, arch string) (coretools.List, error) 61 } 62 63 func NewProvisionerTask( 64 controllerUUID string, 65 machineTag names.MachineTag, 66 harvestMode config.HarvestMode, 67 machineGetter MachineGetter, 68 toolsFinder ToolsFinder, 69 machineWatcher watcher.StringsWatcher, 70 retryWatcher watcher.NotifyWatcher, 71 broker environs.InstanceBroker, 72 auth authentication.AuthenticationProvider, 73 imageStream string, 74 retryStartInstanceStrategy RetryStrategy, 75 ) (ProvisionerTask, error) { 76 machineChanges := machineWatcher.Changes() 77 workers := []worker.Worker{machineWatcher} 78 var retryChanges watcher.NotifyChannel 79 if retryWatcher != nil { 80 retryChanges = retryWatcher.Changes() 81 workers = append(workers, retryWatcher) 82 } 83 task := &provisionerTask{ 84 controllerUUID: controllerUUID, 85 machineTag: machineTag, 86 machineGetter: machineGetter, 87 toolsFinder: toolsFinder, 88 machineChanges: machineChanges, 89 retryChanges: retryChanges, 90 broker: broker, 91 auth: auth, 92 harvestMode: harvestMode, 93 harvestModeChan: make(chan config.HarvestMode, 1), 94 machines: make(map[string]*apiprovisioner.Machine), 95 imageStream: imageStream, 96 retryStartInstanceStrategy: retryStartInstanceStrategy, 97 } 98 err := catacomb.Invoke(catacomb.Plan{ 99 Site: &task.catacomb, 100 Work: task.loop, 101 Init: workers, 102 }) 103 if err != nil { 104 return nil, errors.Trace(err) 105 } 106 return task, nil 107 } 108 109 type provisionerTask struct { 110 controllerUUID string 111 machineTag names.MachineTag 112 machineGetter MachineGetter 113 toolsFinder ToolsFinder 114 machineChanges watcher.StringsChannel 115 retryChanges watcher.NotifyChannel 116 broker environs.InstanceBroker 117 catacomb catacomb.Catacomb 118 auth authentication.AuthenticationProvider 119 imageStream string 120 harvestMode config.HarvestMode 121 harvestModeChan chan config.HarvestMode 122 retryStartInstanceStrategy RetryStrategy 123 // instance id -> instance 124 instances map[instance.Id]instance.Instance 125 // machine id -> machine 126 machines map[string]*apiprovisioner.Machine 127 } 128 129 // Kill implements worker.Worker.Kill. 130 func (task *provisionerTask) Kill() { 131 task.catacomb.Kill(nil) 132 } 133 134 // Wait implements worker.Worker.Wait. 135 func (task *provisionerTask) Wait() error { 136 return task.catacomb.Wait() 137 } 138 139 func (task *provisionerTask) loop() error { 140 141 // Don't allow the harvesting mode to change until we have read at 142 // least one set of changes, which will populate the task.machines 143 // map. Otherwise we will potentially see all legitimate instances 144 // as unknown. 145 var harvestModeChan chan config.HarvestMode 146 147 // When the watcher is started, it will have the initial changes be all 148 // the machines that are relevant. Also, since this is available straight 149 // away, we know there will be some changes right off the bat. 150 for { 151 select { 152 case <-task.catacomb.Dying(): 153 logger.Infof("Shutting down provisioner task %s", task.machineTag) 154 return task.catacomb.ErrDying() 155 case ids, ok := <-task.machineChanges: 156 if !ok { 157 return errors.New("machine watcher closed channel") 158 } 159 if err := task.processMachines(ids); err != nil { 160 return errors.Annotate(err, "failed to process updated machines") 161 } 162 163 // We've seen a set of changes. Enable modification of 164 // harvesting mode. 165 harvestModeChan = task.harvestModeChan 166 case harvestMode := <-harvestModeChan: 167 if harvestMode == task.harvestMode { 168 break 169 } 170 logger.Infof("harvesting mode changed to %s", harvestMode) 171 task.harvestMode = harvestMode 172 if harvestMode.HarvestUnknown() { 173 logger.Infof("harvesting unknown machines") 174 if err := task.processMachines(nil); err != nil { 175 return errors.Annotate(err, "failed to process machines after safe mode disabled") 176 } 177 } 178 case <-task.retryChanges: 179 if err := task.processMachinesWithTransientErrors(); err != nil { 180 return errors.Annotate(err, "failed to process machines with transient errors") 181 } 182 } 183 } 184 } 185 186 // SetHarvestMode implements ProvisionerTask.SetHarvestMode(). 187 func (task *provisionerTask) SetHarvestMode(mode config.HarvestMode) { 188 select { 189 case task.harvestModeChan <- mode: 190 case <-task.catacomb.Dying(): 191 } 192 } 193 194 func (task *provisionerTask) processMachinesWithTransientErrors() error { 195 machines, statusResults, err := task.machineGetter.MachinesWithTransientErrors() 196 if err != nil { 197 return nil 198 } 199 logger.Tracef("processMachinesWithTransientErrors(%v)", statusResults) 200 var pending []*apiprovisioner.Machine 201 for i, statusResult := range statusResults { 202 if statusResult.Error != nil { 203 logger.Errorf("cannot retry provisioning of machine %q: %v", statusResult.Id, statusResult.Error) 204 continue 205 } 206 machine := machines[i] 207 if err := machine.SetStatus(status.Pending, "", nil); err != nil { 208 logger.Errorf("cannot reset status of machine %q: %v", statusResult.Id, err) 209 continue 210 } 211 task.machines[machine.Tag().String()] = machine 212 pending = append(pending, machine) 213 } 214 return task.startMachines(pending) 215 } 216 217 func (task *provisionerTask) processMachines(ids []string) error { 218 logger.Tracef("processMachines(%v)", ids) 219 220 // Populate the tasks maps of current instances and machines. 221 if err := task.populateMachineMaps(ids); err != nil { 222 return err 223 } 224 225 // Find machines without an instance id or that are dead 226 pending, dead, maintain, err := task.pendingOrDeadOrMaintain(ids) 227 if err != nil { 228 return err 229 } 230 231 // Stop all machines that are dead 232 stopping := task.instancesForMachines(dead) 233 234 // Find running instances that have no machines associated 235 unknown, err := task.findUnknownInstances(stopping) 236 if err != nil { 237 return err 238 } 239 if !task.harvestMode.HarvestUnknown() { 240 logger.Infof( 241 "%s is set to %s; unknown instances not stopped %v", 242 config.ProvisionerHarvestModeKey, 243 task.harvestMode.String(), 244 instanceIds(unknown), 245 ) 246 unknown = nil 247 } 248 if task.harvestMode.HarvestNone() || !task.harvestMode.HarvestDestroyed() { 249 logger.Infof( 250 `%s is set to "%s"; will not harvest %s`, 251 config.ProvisionerHarvestModeKey, 252 task.harvestMode.String(), 253 instanceIds(stopping), 254 ) 255 stopping = nil 256 } 257 258 if len(stopping) > 0 { 259 logger.Infof("stopping known instances %v", stopping) 260 } 261 if len(unknown) > 0 { 262 logger.Infof("stopping unknown instances %v", instanceIds(unknown)) 263 } 264 // It's important that we stop unknown instances before starting 265 // pending ones, because if we start an instance and then fail to 266 // set its InstanceId on the machine we don't want to start a new 267 // instance for the same machine ID. 268 if err := task.stopInstances(append(stopping, unknown...)); err != nil { 269 return err 270 } 271 272 // Remove any dead machines from state. 273 for _, machine := range dead { 274 logger.Infof("removing dead machine %q", machine) 275 if err := machine.MarkForRemoval(); err != nil { 276 logger.Errorf("failed to remove dead machine %q", machine) 277 } 278 delete(task.machines, machine.Id()) 279 } 280 281 // Any machines that require maintenance get pinged 282 task.maintainMachines(maintain) 283 284 // Start an instance for the pending ones 285 return task.startMachines(pending) 286 } 287 288 func instanceIds(instances []instance.Instance) []string { 289 ids := make([]string, 0, len(instances)) 290 for _, inst := range instances { 291 ids = append(ids, string(inst.Id())) 292 } 293 return ids 294 } 295 296 // populateMachineMaps updates task.instances. Also updates 297 // task.machines map if a list of IDs is given. 298 func (task *provisionerTask) populateMachineMaps(ids []string) error { 299 task.instances = make(map[instance.Id]instance.Instance) 300 301 instances, err := task.broker.AllInstances() 302 if err != nil { 303 return errors.Annotate(err, "failed to get all instances from broker") 304 } 305 for _, i := range instances { 306 task.instances[i.Id()] = i 307 } 308 309 // Update the machines map with new data for each of the machines in the 310 // change list. 311 // TODO(thumper): update for API server later to get all machines in one go. 312 for _, id := range ids { 313 machineTag := names.NewMachineTag(id) 314 machine, err := task.machineGetter.Machine(machineTag) 315 switch { 316 case params.IsCodeNotFoundOrCodeUnauthorized(err): 317 logger.Debugf("machine %q not found in state", id) 318 delete(task.machines, id) 319 case err == nil: 320 task.machines[id] = machine 321 default: 322 return errors.Annotatef(err, "failed to get machine %v", id) 323 } 324 } 325 return nil 326 } 327 328 // pendingOrDead looks up machines with ids and returns those that do not 329 // have an instance id assigned yet, and also those that are dead. 330 func (task *provisionerTask) pendingOrDeadOrMaintain(ids []string) (pending, dead, maintain []*apiprovisioner.Machine, err error) { 331 for _, id := range ids { 332 machine, found := task.machines[id] 333 if !found { 334 logger.Infof("machine %q not found", id) 335 continue 336 } 337 var classification MachineClassification 338 classification, err = classifyMachine(machine) 339 if err != nil { 340 return // return the error 341 } 342 switch classification { 343 case Pending: 344 pending = append(pending, machine) 345 case Dead: 346 dead = append(dead, machine) 347 case Maintain: 348 maintain = append(maintain, machine) 349 } 350 } 351 logger.Tracef("pending machines: %v", pending) 352 logger.Tracef("dead machines: %v", dead) 353 return 354 } 355 356 type ClassifiableMachine interface { 357 Life() params.Life 358 InstanceId() (instance.Id, error) 359 EnsureDead() error 360 Status() (status.Status, string, error) 361 Id() string 362 } 363 364 type MachineClassification string 365 366 const ( 367 None MachineClassification = "none" 368 Pending MachineClassification = "Pending" 369 Dead MachineClassification = "Dead" 370 Maintain MachineClassification = "Maintain" 371 ) 372 373 func classifyMachine(machine ClassifiableMachine) ( 374 MachineClassification, error) { 375 switch machine.Life() { 376 case params.Dying: 377 if _, err := machine.InstanceId(); err == nil { 378 return None, nil 379 } else if !params.IsCodeNotProvisioned(err) { 380 return None, errors.Annotatef(err, "failed to load dying machine id:%s, details:%v", machine.Id(), machine) 381 } 382 logger.Infof("killing dying, unprovisioned machine %q", machine) 383 if err := machine.EnsureDead(); err != nil { 384 return None, errors.Annotatef(err, "failed to ensure machine dead id:%s, details:%v", machine.Id(), machine) 385 } 386 fallthrough 387 case params.Dead: 388 return Dead, nil 389 } 390 instId, err := machine.InstanceId() 391 if err != nil { 392 if !params.IsCodeNotProvisioned(err) { 393 return None, errors.Annotatef(err, "failed to load machine id:%s, details:%v", machine.Id(), machine) 394 } 395 machineStatus, _, err := machine.Status() 396 if err != nil { 397 logger.Infof("cannot get machine id:%s, details:%v, err:%v", machine.Id(), machine, err) 398 return None, nil 399 } 400 if machineStatus == status.Pending { 401 logger.Infof("found machine pending provisioning id:%s, details:%v", machine.Id(), machine) 402 return Pending, nil 403 } 404 return None, nil 405 } 406 logger.Infof("machine %s already started as instance %q", machine.Id(), instId) 407 408 if state.ContainerTypeFromId(machine.Id()) != "" { 409 return Maintain, nil 410 } 411 return None, nil 412 } 413 414 // findUnknownInstances finds instances which are not associated with a machine. 415 func (task *provisionerTask) findUnknownInstances(stopping []instance.Instance) ([]instance.Instance, error) { 416 // Make a copy of the instances we know about. 417 instances := make(map[instance.Id]instance.Instance) 418 for k, v := range task.instances { 419 instances[k] = v 420 } 421 422 for _, m := range task.machines { 423 instId, err := m.InstanceId() 424 switch { 425 case err == nil: 426 delete(instances, instId) 427 case params.IsCodeNotProvisioned(err): 428 case params.IsCodeNotFoundOrCodeUnauthorized(err): 429 default: 430 return nil, err 431 } 432 } 433 // Now remove all those instances that we are stopping already as we 434 // know about those and don't want to include them in the unknown list. 435 for _, inst := range stopping { 436 delete(instances, inst.Id()) 437 } 438 var unknown []instance.Instance 439 for _, inst := range instances { 440 unknown = append(unknown, inst) 441 } 442 return unknown, nil 443 } 444 445 // instancesForMachines returns a list of instance.Instance that represent 446 // the list of machines running in the provider. Missing machines are 447 // omitted from the list. 448 func (task *provisionerTask) instancesForMachines(machines []*apiprovisioner.Machine) []instance.Instance { 449 var instances []instance.Instance 450 for _, machine := range machines { 451 instId, err := machine.InstanceId() 452 if err == nil { 453 instance, found := task.instances[instId] 454 // If the instance is not found we can't stop it. 455 if found { 456 instances = append(instances, instance) 457 } 458 } 459 } 460 return instances 461 } 462 463 func (task *provisionerTask) stopInstances(instances []instance.Instance) error { 464 // Although calling StopInstance with an empty slice should produce no change in the 465 // provider, environs like dummy do not consider this a noop. 466 if len(instances) == 0 { 467 return nil 468 } 469 if wrench.IsActive("provisioner", "stop-instances") { 470 return errors.New("wrench in the works") 471 } 472 473 ids := make([]instance.Id, len(instances)) 474 for i, inst := range instances { 475 ids[i] = inst.Id() 476 } 477 if err := task.broker.StopInstances(ids...); err != nil { 478 return errors.Annotate(err, "broker failed to stop instances") 479 } 480 return nil 481 } 482 483 func (task *provisionerTask) constructInstanceConfig( 484 machine *apiprovisioner.Machine, 485 auth authentication.AuthenticationProvider, 486 pInfo *params.ProvisioningInfo, 487 ) (*instancecfg.InstanceConfig, error) { 488 489 stateInfo, apiInfo, err := auth.SetupAuthentication(machine) 490 if err != nil { 491 return nil, errors.Annotate(err, "failed to setup authentication") 492 } 493 494 // Generated a nonce for the new instance, with the format: "machine-#:UUID". 495 // The first part is a badge, specifying the tag of the machine the provisioner 496 // is running on, while the second part is a random UUID. 497 uuid, err := utils.NewUUID() 498 if err != nil { 499 return nil, errors.Annotate(err, "failed to generate a nonce for machine "+machine.Id()) 500 } 501 502 nonce := fmt.Sprintf("%s:%s", task.machineTag, uuid) 503 instanceConfig, err := instancecfg.NewInstanceConfig( 504 names.NewControllerTag(controller.Config(pInfo.ControllerConfig).ControllerUUID()), 505 machine.Id(), 506 nonce, 507 task.imageStream, 508 pInfo.Series, 509 apiInfo, 510 ) 511 if err != nil { 512 return nil, errors.Trace(err) 513 } 514 515 instanceConfig.Tags = pInfo.Tags 516 if len(pInfo.Jobs) > 0 { 517 instanceConfig.Jobs = pInfo.Jobs 518 } 519 520 if multiwatcher.AnyJobNeedsState(instanceConfig.Jobs...) { 521 publicKey, err := simplestreams.UserPublicSigningKey() 522 if err != nil { 523 return nil, err 524 } 525 instanceConfig.Controller = &instancecfg.ControllerConfig{ 526 PublicImageSigningKey: publicKey, 527 MongoInfo: stateInfo, 528 } 529 instanceConfig.Controller.Config = make(map[string]interface{}) 530 for k, v := range pInfo.ControllerConfig { 531 instanceConfig.Controller.Config[k] = v 532 } 533 } 534 535 return instanceConfig, nil 536 } 537 538 func constructStartInstanceParams( 539 controllerUUID string, 540 machine *apiprovisioner.Machine, 541 instanceConfig *instancecfg.InstanceConfig, 542 provisioningInfo *params.ProvisioningInfo, 543 possibleTools coretools.List, 544 ) (environs.StartInstanceParams, error) { 545 546 volumes := make([]storage.VolumeParams, len(provisioningInfo.Volumes)) 547 for i, v := range provisioningInfo.Volumes { 548 volumeTag, err := names.ParseVolumeTag(v.VolumeTag) 549 if err != nil { 550 return environs.StartInstanceParams{}, errors.Trace(err) 551 } 552 if v.Attachment == nil { 553 return environs.StartInstanceParams{}, errors.Errorf("volume params missing attachment") 554 } 555 machineTag, err := names.ParseMachineTag(v.Attachment.MachineTag) 556 if err != nil { 557 return environs.StartInstanceParams{}, errors.Trace(err) 558 } 559 if machineTag != machine.Tag() { 560 return environs.StartInstanceParams{}, errors.Errorf("volume attachment params has invalid machine tag") 561 } 562 if v.Attachment.InstanceId != "" { 563 return environs.StartInstanceParams{}, errors.Errorf("volume attachment params specifies instance ID") 564 } 565 volumes[i] = storage.VolumeParams{ 566 volumeTag, 567 v.Size, 568 storage.ProviderType(v.Provider), 569 v.Attributes, 570 v.Tags, 571 &storage.VolumeAttachmentParams{ 572 AttachmentParams: storage.AttachmentParams{ 573 Machine: machineTag, 574 ReadOnly: v.Attachment.ReadOnly, 575 }, 576 Volume: volumeTag, 577 }, 578 } 579 } 580 581 var subnetsToZones map[network.Id][]string 582 if provisioningInfo.SubnetsToZones != nil { 583 // Convert subnet provider ids from string to network.Id. 584 subnetsToZones = make(map[network.Id][]string, len(provisioningInfo.SubnetsToZones)) 585 for providerId, zones := range provisioningInfo.SubnetsToZones { 586 subnetsToZones[network.Id(providerId)] = zones 587 } 588 } 589 590 var endpointBindings map[string]network.Id 591 if len(provisioningInfo.EndpointBindings) != 0 { 592 endpointBindings = make(map[string]network.Id) 593 for endpoint, space := range provisioningInfo.EndpointBindings { 594 endpointBindings[endpoint] = network.Id(space) 595 } 596 } 597 possibleImageMetadata := make([]*imagemetadata.ImageMetadata, len(provisioningInfo.ImageMetadata)) 598 for i, metadata := range provisioningInfo.ImageMetadata { 599 possibleImageMetadata[i] = &imagemetadata.ImageMetadata{ 600 Id: metadata.ImageId, 601 Arch: metadata.Arch, 602 RegionAlias: metadata.Region, 603 RegionName: metadata.Region, 604 Storage: metadata.RootStorageType, 605 Stream: metadata.Stream, 606 VirtType: metadata.VirtType, 607 Version: metadata.Version, 608 } 609 } 610 611 return environs.StartInstanceParams{ 612 ControllerUUID: controllerUUID, 613 Constraints: provisioningInfo.Constraints, 614 Tools: possibleTools, 615 InstanceConfig: instanceConfig, 616 Placement: provisioningInfo.Placement, 617 DistributionGroup: machine.DistributionGroup, 618 Volumes: volumes, 619 SubnetsToZones: subnetsToZones, 620 EndpointBindings: endpointBindings, 621 ImageMetadata: possibleImageMetadata, 622 StatusCallback: machine.SetInstanceStatus, 623 }, nil 624 } 625 626 func (task *provisionerTask) maintainMachines(machines []*apiprovisioner.Machine) error { 627 for _, m := range machines { 628 logger.Infof("maintainMachines: %v", m) 629 startInstanceParams := environs.StartInstanceParams{} 630 startInstanceParams.InstanceConfig = &instancecfg.InstanceConfig{} 631 startInstanceParams.InstanceConfig.MachineId = m.Id() 632 if err := task.broker.MaintainInstance(startInstanceParams); err != nil { 633 return errors.Annotatef(err, "cannot maintain machine %v", m) 634 } 635 } 636 return nil 637 } 638 639 func (task *provisionerTask) startMachines(machines []*apiprovisioner.Machine) error { 640 for _, m := range machines { 641 642 pInfo, err := m.ProvisioningInfo() 643 if err != nil { 644 return task.setErrorStatus("fetching provisioning info for machine %q: %v", m, err) 645 } 646 647 instanceCfg, err := task.constructInstanceConfig(m, task.auth, pInfo) 648 if err != nil { 649 return task.setErrorStatus("creating instance config for machine %q: %v", m, err) 650 } 651 652 assocProvInfoAndMachCfg(pInfo, instanceCfg) 653 654 var arch string 655 if pInfo.Constraints.Arch != nil { 656 arch = *pInfo.Constraints.Arch 657 } 658 659 possibleTools, err := task.toolsFinder.FindTools( 660 jujuversion.Current, 661 pInfo.Series, 662 arch, 663 ) 664 if err != nil { 665 return task.setErrorStatus("cannot find tools for machine %q: %v", m, err) 666 } 667 668 startInstanceParams, err := constructStartInstanceParams( 669 task.controllerUUID, 670 m, 671 instanceCfg, 672 pInfo, 673 possibleTools, 674 ) 675 if err != nil { 676 return task.setErrorStatus("cannot construct params for machine %q: %v", m, err) 677 } 678 679 if err := task.startMachine(m, pInfo, startInstanceParams); err != nil { 680 return errors.Annotatef(err, "cannot start machine %v", m) 681 } 682 } 683 return nil 684 } 685 686 func (task *provisionerTask) setErrorStatus(message string, machine *apiprovisioner.Machine, err error) error { 687 logger.Errorf(message, machine, err) 688 if err1 := machine.SetStatus(status.Error, err.Error(), nil); err1 != nil { 689 // Something is wrong with this machine, better report it back. 690 return errors.Annotatef(err1, "cannot set error status for machine %q", machine) 691 } 692 return nil 693 } 694 695 func (task *provisionerTask) startMachine( 696 machine *apiprovisioner.Machine, 697 provisioningInfo *params.ProvisioningInfo, 698 startInstanceParams environs.StartInstanceParams, 699 ) error { 700 var result *environs.StartInstanceResult 701 for attemptsLeft := task.retryStartInstanceStrategy.retryCount; attemptsLeft >= 0; attemptsLeft-- { 702 attemptResult, err := task.broker.StartInstance(startInstanceParams) 703 if err == nil { 704 result = attemptResult 705 break 706 } else if attemptsLeft <= 0 { 707 // Set the state to error, so the machine will be skipped 708 // next time until the error is resolved, but don't return 709 // an error; just keep going with the other machines. 710 return task.setErrorStatus("cannot start instance for machine %q: %v", machine, err) 711 } 712 713 logger.Warningf("%v", errors.Annotate(err, "starting instance")) 714 retryMsg := fmt.Sprintf("will retry to start instance in %v", task.retryStartInstanceStrategy.retryDelay) 715 if err2 := machine.SetStatus(status.Pending, retryMsg, nil); err2 != nil { 716 logger.Errorf("%v", err2) 717 } 718 logger.Infof(retryMsg) 719 720 select { 721 case <-task.catacomb.Dying(): 722 return task.catacomb.ErrDying() 723 case <-time.After(task.retryStartInstanceStrategy.retryDelay): 724 } 725 } 726 727 networkConfig := networkingcommon.NetworkConfigFromInterfaceInfo(result.NetworkInfo) 728 volumes := volumesToAPIserver(result.Volumes) 729 volumeNameToAttachmentInfo := volumeAttachmentsToAPIserver(result.VolumeAttachments) 730 731 if err := machine.SetInstanceInfo( 732 result.Instance.Id(), 733 startInstanceParams.InstanceConfig.MachineNonce, 734 result.Hardware, 735 networkConfig, 736 volumes, 737 volumeNameToAttachmentInfo, 738 ); err != nil { 739 // We need to stop the instance right away here, set error status and go on. 740 if err2 := task.setErrorStatus("cannot register instance for machine %v: %v", machine, err); err2 != nil { 741 logger.Errorf("%v", errors.Annotate(err2, "cannot set machine's status")) 742 } 743 if err2 := task.broker.StopInstances(result.Instance.Id()); err2 != nil { 744 logger.Errorf("%v", errors.Annotate(err2, "after failing to set instance info")) 745 } 746 return errors.Annotate(err, "cannot set instance info") 747 } 748 749 logger.Infof( 750 "started machine %s as instance %s with hardware %q, network config %+v, volumes %v, volume attachments %v, subnets to zones %v", 751 machine, 752 result.Instance.Id(), 753 result.Hardware, 754 networkConfig, 755 volumes, 756 volumeNameToAttachmentInfo, 757 startInstanceParams.SubnetsToZones, 758 ) 759 return nil 760 } 761 762 type provisioningInfo struct { 763 Constraints constraints.Value 764 Series string 765 Placement string 766 InstanceConfig *instancecfg.InstanceConfig 767 SubnetsToZones map[string][]string 768 } 769 770 func assocProvInfoAndMachCfg( 771 provInfo *params.ProvisioningInfo, 772 instanceConfig *instancecfg.InstanceConfig, 773 ) *provisioningInfo { 774 return &provisioningInfo{ 775 Constraints: provInfo.Constraints, 776 Series: provInfo.Series, 777 Placement: provInfo.Placement, 778 InstanceConfig: instanceConfig, 779 SubnetsToZones: provInfo.SubnetsToZones, 780 } 781 } 782 783 func volumesToAPIserver(volumes []storage.Volume) []params.Volume { 784 result := make([]params.Volume, len(volumes)) 785 for i, v := range volumes { 786 result[i] = params.Volume{ 787 v.Tag.String(), 788 params.VolumeInfo{ 789 v.VolumeId, 790 v.HardwareId, 791 v.Size, 792 v.Persistent, 793 }, 794 } 795 } 796 return result 797 } 798 799 func volumeAttachmentsToAPIserver(attachments []storage.VolumeAttachment) map[string]params.VolumeAttachmentInfo { 800 result := make(map[string]params.VolumeAttachmentInfo) 801 for _, a := range attachments { 802 result[a.Volume.String()] = params.VolumeAttachmentInfo{ 803 a.DeviceName, 804 a.DeviceLink, 805 a.BusAddress, 806 a.ReadOnly, 807 } 808 } 809 return result 810 }