github.com/wallyworld/juju@v0.0.0-20161013125918-6cf1bc9d917a/worker/provisioner/provisioner_task.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package provisioner
     5  
     6  import (
     7  	"fmt"
     8  	"time"
     9  
    10  	"github.com/juju/errors"
    11  	"github.com/juju/utils"
    12  	"gopkg.in/juju/names.v2"
    13  
    14  	apiprovisioner "github.com/juju/juju/api/provisioner"
    15  	"github.com/juju/juju/apiserver/common/networkingcommon"
    16  	"github.com/juju/juju/apiserver/params"
    17  	"github.com/juju/juju/cloudconfig/instancecfg"
    18  	"github.com/juju/juju/constraints"
    19  	"github.com/juju/juju/controller"
    20  	"github.com/juju/juju/controller/authentication"
    21  	"github.com/juju/juju/environs"
    22  	"github.com/juju/juju/environs/config"
    23  	"github.com/juju/juju/environs/imagemetadata"
    24  	"github.com/juju/juju/environs/simplestreams"
    25  	"github.com/juju/juju/instance"
    26  	"github.com/juju/juju/network"
    27  	"github.com/juju/juju/state"
    28  	"github.com/juju/juju/state/multiwatcher"
    29  	"github.com/juju/juju/status"
    30  	"github.com/juju/juju/storage"
    31  	coretools "github.com/juju/juju/tools"
    32  	jujuversion "github.com/juju/juju/version"
    33  	"github.com/juju/juju/watcher"
    34  	"github.com/juju/juju/worker"
    35  	"github.com/juju/juju/worker/catacomb"
    36  	"github.com/juju/juju/wrench"
    37  	"github.com/juju/version"
    38  )
    39  
    40  type ProvisionerTask interface {
    41  	worker.Worker
    42  
    43  	// SetHarvestMode sets a flag to indicate how the provisioner task
    44  	// should harvest machines. See config.HarvestMode for
    45  	// documentation of behavior.
    46  	SetHarvestMode(mode config.HarvestMode)
    47  }
    48  
    49  type MachineGetter interface {
    50  	Machine(names.MachineTag) (*apiprovisioner.Machine, error)
    51  	MachinesWithTransientErrors() ([]*apiprovisioner.Machine, []params.StatusResult, error)
    52  }
    53  
    54  // ToolsFinder is an interface used for finding tools to run on
    55  // provisioned instances.
    56  type ToolsFinder interface {
    57  	// FindTools returns a list of tools matching the specified
    58  	// version, series, and architecture. If arch is empty, the
    59  	// implementation is expected to use a well documented default.
    60  	FindTools(version version.Number, series string, arch string) (coretools.List, error)
    61  }
    62  
    63  func NewProvisionerTask(
    64  	controllerUUID string,
    65  	machineTag names.MachineTag,
    66  	harvestMode config.HarvestMode,
    67  	machineGetter MachineGetter,
    68  	toolsFinder ToolsFinder,
    69  	machineWatcher watcher.StringsWatcher,
    70  	retryWatcher watcher.NotifyWatcher,
    71  	broker environs.InstanceBroker,
    72  	auth authentication.AuthenticationProvider,
    73  	imageStream string,
    74  	retryStartInstanceStrategy RetryStrategy,
    75  ) (ProvisionerTask, error) {
    76  	machineChanges := machineWatcher.Changes()
    77  	workers := []worker.Worker{machineWatcher}
    78  	var retryChanges watcher.NotifyChannel
    79  	if retryWatcher != nil {
    80  		retryChanges = retryWatcher.Changes()
    81  		workers = append(workers, retryWatcher)
    82  	}
    83  	task := &provisionerTask{
    84  		controllerUUID:             controllerUUID,
    85  		machineTag:                 machineTag,
    86  		machineGetter:              machineGetter,
    87  		toolsFinder:                toolsFinder,
    88  		machineChanges:             machineChanges,
    89  		retryChanges:               retryChanges,
    90  		broker:                     broker,
    91  		auth:                       auth,
    92  		harvestMode:                harvestMode,
    93  		harvestModeChan:            make(chan config.HarvestMode, 1),
    94  		machines:                   make(map[string]*apiprovisioner.Machine),
    95  		imageStream:                imageStream,
    96  		retryStartInstanceStrategy: retryStartInstanceStrategy,
    97  	}
    98  	err := catacomb.Invoke(catacomb.Plan{
    99  		Site: &task.catacomb,
   100  		Work: task.loop,
   101  		Init: workers,
   102  	})
   103  	if err != nil {
   104  		return nil, errors.Trace(err)
   105  	}
   106  	return task, nil
   107  }
   108  
   109  type provisionerTask struct {
   110  	controllerUUID             string
   111  	machineTag                 names.MachineTag
   112  	machineGetter              MachineGetter
   113  	toolsFinder                ToolsFinder
   114  	machineChanges             watcher.StringsChannel
   115  	retryChanges               watcher.NotifyChannel
   116  	broker                     environs.InstanceBroker
   117  	catacomb                   catacomb.Catacomb
   118  	auth                       authentication.AuthenticationProvider
   119  	imageStream                string
   120  	harvestMode                config.HarvestMode
   121  	harvestModeChan            chan config.HarvestMode
   122  	retryStartInstanceStrategy RetryStrategy
   123  	// instance id -> instance
   124  	instances map[instance.Id]instance.Instance
   125  	// machine id -> machine
   126  	machines map[string]*apiprovisioner.Machine
   127  }
   128  
   129  // Kill implements worker.Worker.Kill.
   130  func (task *provisionerTask) Kill() {
   131  	task.catacomb.Kill(nil)
   132  }
   133  
   134  // Wait implements worker.Worker.Wait.
   135  func (task *provisionerTask) Wait() error {
   136  	return task.catacomb.Wait()
   137  }
   138  
   139  func (task *provisionerTask) loop() error {
   140  
   141  	// Don't allow the harvesting mode to change until we have read at
   142  	// least one set of changes, which will populate the task.machines
   143  	// map. Otherwise we will potentially see all legitimate instances
   144  	// as unknown.
   145  	var harvestModeChan chan config.HarvestMode
   146  
   147  	// When the watcher is started, it will have the initial changes be all
   148  	// the machines that are relevant. Also, since this is available straight
   149  	// away, we know there will be some changes right off the bat.
   150  	for {
   151  		select {
   152  		case <-task.catacomb.Dying():
   153  			logger.Infof("Shutting down provisioner task %s", task.machineTag)
   154  			return task.catacomb.ErrDying()
   155  		case ids, ok := <-task.machineChanges:
   156  			if !ok {
   157  				return errors.New("machine watcher closed channel")
   158  			}
   159  			if err := task.processMachines(ids); err != nil {
   160  				return errors.Annotate(err, "failed to process updated machines")
   161  			}
   162  
   163  			// We've seen a set of changes. Enable modification of
   164  			// harvesting mode.
   165  			harvestModeChan = task.harvestModeChan
   166  		case harvestMode := <-harvestModeChan:
   167  			if harvestMode == task.harvestMode {
   168  				break
   169  			}
   170  			logger.Infof("harvesting mode changed to %s", harvestMode)
   171  			task.harvestMode = harvestMode
   172  			if harvestMode.HarvestUnknown() {
   173  				logger.Infof("harvesting unknown machines")
   174  				if err := task.processMachines(nil); err != nil {
   175  					return errors.Annotate(err, "failed to process machines after safe mode disabled")
   176  				}
   177  			}
   178  		case <-task.retryChanges:
   179  			if err := task.processMachinesWithTransientErrors(); err != nil {
   180  				return errors.Annotate(err, "failed to process machines with transient errors")
   181  			}
   182  		}
   183  	}
   184  }
   185  
   186  // SetHarvestMode implements ProvisionerTask.SetHarvestMode().
   187  func (task *provisionerTask) SetHarvestMode(mode config.HarvestMode) {
   188  	select {
   189  	case task.harvestModeChan <- mode:
   190  	case <-task.catacomb.Dying():
   191  	}
   192  }
   193  
   194  func (task *provisionerTask) processMachinesWithTransientErrors() error {
   195  	machines, statusResults, err := task.machineGetter.MachinesWithTransientErrors()
   196  	if err != nil {
   197  		return nil
   198  	}
   199  	logger.Tracef("processMachinesWithTransientErrors(%v)", statusResults)
   200  	var pending []*apiprovisioner.Machine
   201  	for i, statusResult := range statusResults {
   202  		if statusResult.Error != nil {
   203  			logger.Errorf("cannot retry provisioning of machine %q: %v", statusResult.Id, statusResult.Error)
   204  			continue
   205  		}
   206  		machine := machines[i]
   207  		if err := machine.SetStatus(status.Pending, "", nil); err != nil {
   208  			logger.Errorf("cannot reset status of machine %q: %v", statusResult.Id, err)
   209  			continue
   210  		}
   211  		task.machines[machine.Tag().String()] = machine
   212  		pending = append(pending, machine)
   213  	}
   214  	return task.startMachines(pending)
   215  }
   216  
   217  func (task *provisionerTask) processMachines(ids []string) error {
   218  	logger.Tracef("processMachines(%v)", ids)
   219  
   220  	// Populate the tasks maps of current instances and machines.
   221  	if err := task.populateMachineMaps(ids); err != nil {
   222  		return err
   223  	}
   224  
   225  	// Find machines without an instance id or that are dead
   226  	pending, dead, maintain, err := task.pendingOrDeadOrMaintain(ids)
   227  	if err != nil {
   228  		return err
   229  	}
   230  
   231  	// Stop all machines that are dead
   232  	stopping := task.instancesForMachines(dead)
   233  
   234  	// Find running instances that have no machines associated
   235  	unknown, err := task.findUnknownInstances(stopping)
   236  	if err != nil {
   237  		return err
   238  	}
   239  	if !task.harvestMode.HarvestUnknown() {
   240  		logger.Infof(
   241  			"%s is set to %s; unknown instances not stopped %v",
   242  			config.ProvisionerHarvestModeKey,
   243  			task.harvestMode.String(),
   244  			instanceIds(unknown),
   245  		)
   246  		unknown = nil
   247  	}
   248  	if task.harvestMode.HarvestNone() || !task.harvestMode.HarvestDestroyed() {
   249  		logger.Infof(
   250  			`%s is set to "%s"; will not harvest %s`,
   251  			config.ProvisionerHarvestModeKey,
   252  			task.harvestMode.String(),
   253  			instanceIds(stopping),
   254  		)
   255  		stopping = nil
   256  	}
   257  
   258  	if len(stopping) > 0 {
   259  		logger.Infof("stopping known instances %v", stopping)
   260  	}
   261  	if len(unknown) > 0 {
   262  		logger.Infof("stopping unknown instances %v", instanceIds(unknown))
   263  	}
   264  	// It's important that we stop unknown instances before starting
   265  	// pending ones, because if we start an instance and then fail to
   266  	// set its InstanceId on the machine we don't want to start a new
   267  	// instance for the same machine ID.
   268  	if err := task.stopInstances(append(stopping, unknown...)); err != nil {
   269  		return err
   270  	}
   271  
   272  	// Remove any dead machines from state.
   273  	for _, machine := range dead {
   274  		logger.Infof("removing dead machine %q", machine)
   275  		if err := machine.MarkForRemoval(); err != nil {
   276  			logger.Errorf("failed to remove dead machine %q", machine)
   277  		}
   278  		delete(task.machines, machine.Id())
   279  	}
   280  
   281  	// Any machines that require maintenance get pinged
   282  	task.maintainMachines(maintain)
   283  
   284  	// Start an instance for the pending ones
   285  	return task.startMachines(pending)
   286  }
   287  
   288  func instanceIds(instances []instance.Instance) []string {
   289  	ids := make([]string, 0, len(instances))
   290  	for _, inst := range instances {
   291  		ids = append(ids, string(inst.Id()))
   292  	}
   293  	return ids
   294  }
   295  
   296  // populateMachineMaps updates task.instances. Also updates
   297  // task.machines map if a list of IDs is given.
   298  func (task *provisionerTask) populateMachineMaps(ids []string) error {
   299  	task.instances = make(map[instance.Id]instance.Instance)
   300  
   301  	instances, err := task.broker.AllInstances()
   302  	if err != nil {
   303  		return errors.Annotate(err, "failed to get all instances from broker")
   304  	}
   305  	for _, i := range instances {
   306  		task.instances[i.Id()] = i
   307  	}
   308  
   309  	// Update the machines map with new data for each of the machines in the
   310  	// change list.
   311  	// TODO(thumper): update for API server later to get all machines in one go.
   312  	for _, id := range ids {
   313  		machineTag := names.NewMachineTag(id)
   314  		machine, err := task.machineGetter.Machine(machineTag)
   315  		switch {
   316  		case params.IsCodeNotFoundOrCodeUnauthorized(err):
   317  			logger.Debugf("machine %q not found in state", id)
   318  			delete(task.machines, id)
   319  		case err == nil:
   320  			task.machines[id] = machine
   321  		default:
   322  			return errors.Annotatef(err, "failed to get machine %v", id)
   323  		}
   324  	}
   325  	return nil
   326  }
   327  
   328  // pendingOrDead looks up machines with ids and returns those that do not
   329  // have an instance id assigned yet, and also those that are dead.
   330  func (task *provisionerTask) pendingOrDeadOrMaintain(ids []string) (pending, dead, maintain []*apiprovisioner.Machine, err error) {
   331  	for _, id := range ids {
   332  		machine, found := task.machines[id]
   333  		if !found {
   334  			logger.Infof("machine %q not found", id)
   335  			continue
   336  		}
   337  		var classification MachineClassification
   338  		classification, err = classifyMachine(machine)
   339  		if err != nil {
   340  			return // return the error
   341  		}
   342  		switch classification {
   343  		case Pending:
   344  			pending = append(pending, machine)
   345  		case Dead:
   346  			dead = append(dead, machine)
   347  		case Maintain:
   348  			maintain = append(maintain, machine)
   349  		}
   350  	}
   351  	logger.Tracef("pending machines: %v", pending)
   352  	logger.Tracef("dead machines: %v", dead)
   353  	return
   354  }
   355  
   356  type ClassifiableMachine interface {
   357  	Life() params.Life
   358  	InstanceId() (instance.Id, error)
   359  	EnsureDead() error
   360  	Status() (status.Status, string, error)
   361  	Id() string
   362  }
   363  
   364  type MachineClassification string
   365  
   366  const (
   367  	None     MachineClassification = "none"
   368  	Pending  MachineClassification = "Pending"
   369  	Dead     MachineClassification = "Dead"
   370  	Maintain MachineClassification = "Maintain"
   371  )
   372  
   373  func classifyMachine(machine ClassifiableMachine) (
   374  	MachineClassification, error) {
   375  	switch machine.Life() {
   376  	case params.Dying:
   377  		if _, err := machine.InstanceId(); err == nil {
   378  			return None, nil
   379  		} else if !params.IsCodeNotProvisioned(err) {
   380  			return None, errors.Annotatef(err, "failed to load dying machine id:%s, details:%v", machine.Id(), machine)
   381  		}
   382  		logger.Infof("killing dying, unprovisioned machine %q", machine)
   383  		if err := machine.EnsureDead(); err != nil {
   384  			return None, errors.Annotatef(err, "failed to ensure machine dead id:%s, details:%v", machine.Id(), machine)
   385  		}
   386  		fallthrough
   387  	case params.Dead:
   388  		return Dead, nil
   389  	}
   390  	instId, err := machine.InstanceId()
   391  	if err != nil {
   392  		if !params.IsCodeNotProvisioned(err) {
   393  			return None, errors.Annotatef(err, "failed to load machine id:%s, details:%v", machine.Id(), machine)
   394  		}
   395  		machineStatus, _, err := machine.Status()
   396  		if err != nil {
   397  			logger.Infof("cannot get machine id:%s, details:%v, err:%v", machine.Id(), machine, err)
   398  			return None, nil
   399  		}
   400  		if machineStatus == status.Pending {
   401  			logger.Infof("found machine pending provisioning id:%s, details:%v", machine.Id(), machine)
   402  			return Pending, nil
   403  		}
   404  		return None, nil
   405  	}
   406  	logger.Infof("machine %s already started as instance %q", machine.Id(), instId)
   407  
   408  	if state.ContainerTypeFromId(machine.Id()) != "" {
   409  		return Maintain, nil
   410  	}
   411  	return None, nil
   412  }
   413  
   414  // findUnknownInstances finds instances which are not associated with a machine.
   415  func (task *provisionerTask) findUnknownInstances(stopping []instance.Instance) ([]instance.Instance, error) {
   416  	// Make a copy of the instances we know about.
   417  	instances := make(map[instance.Id]instance.Instance)
   418  	for k, v := range task.instances {
   419  		instances[k] = v
   420  	}
   421  
   422  	for _, m := range task.machines {
   423  		instId, err := m.InstanceId()
   424  		switch {
   425  		case err == nil:
   426  			delete(instances, instId)
   427  		case params.IsCodeNotProvisioned(err):
   428  		case params.IsCodeNotFoundOrCodeUnauthorized(err):
   429  		default:
   430  			return nil, err
   431  		}
   432  	}
   433  	// Now remove all those instances that we are stopping already as we
   434  	// know about those and don't want to include them in the unknown list.
   435  	for _, inst := range stopping {
   436  		delete(instances, inst.Id())
   437  	}
   438  	var unknown []instance.Instance
   439  	for _, inst := range instances {
   440  		unknown = append(unknown, inst)
   441  	}
   442  	return unknown, nil
   443  }
   444  
   445  // instancesForMachines returns a list of instance.Instance that represent
   446  // the list of machines running in the provider. Missing machines are
   447  // omitted from the list.
   448  func (task *provisionerTask) instancesForMachines(machines []*apiprovisioner.Machine) []instance.Instance {
   449  	var instances []instance.Instance
   450  	for _, machine := range machines {
   451  		instId, err := machine.InstanceId()
   452  		if err == nil {
   453  			instance, found := task.instances[instId]
   454  			// If the instance is not found we can't stop it.
   455  			if found {
   456  				instances = append(instances, instance)
   457  			}
   458  		}
   459  	}
   460  	return instances
   461  }
   462  
   463  func (task *provisionerTask) stopInstances(instances []instance.Instance) error {
   464  	// Although calling StopInstance with an empty slice should produce no change in the
   465  	// provider, environs like dummy do not consider this a noop.
   466  	if len(instances) == 0 {
   467  		return nil
   468  	}
   469  	if wrench.IsActive("provisioner", "stop-instances") {
   470  		return errors.New("wrench in the works")
   471  	}
   472  
   473  	ids := make([]instance.Id, len(instances))
   474  	for i, inst := range instances {
   475  		ids[i] = inst.Id()
   476  	}
   477  	if err := task.broker.StopInstances(ids...); err != nil {
   478  		return errors.Annotate(err, "broker failed to stop instances")
   479  	}
   480  	return nil
   481  }
   482  
   483  func (task *provisionerTask) constructInstanceConfig(
   484  	machine *apiprovisioner.Machine,
   485  	auth authentication.AuthenticationProvider,
   486  	pInfo *params.ProvisioningInfo,
   487  ) (*instancecfg.InstanceConfig, error) {
   488  
   489  	stateInfo, apiInfo, err := auth.SetupAuthentication(machine)
   490  	if err != nil {
   491  		return nil, errors.Annotate(err, "failed to setup authentication")
   492  	}
   493  
   494  	// Generated a nonce for the new instance, with the format: "machine-#:UUID".
   495  	// The first part is a badge, specifying the tag of the machine the provisioner
   496  	// is running on, while the second part is a random UUID.
   497  	uuid, err := utils.NewUUID()
   498  	if err != nil {
   499  		return nil, errors.Annotate(err, "failed to generate a nonce for machine "+machine.Id())
   500  	}
   501  
   502  	nonce := fmt.Sprintf("%s:%s", task.machineTag, uuid)
   503  	instanceConfig, err := instancecfg.NewInstanceConfig(
   504  		names.NewControllerTag(controller.Config(pInfo.ControllerConfig).ControllerUUID()),
   505  		machine.Id(),
   506  		nonce,
   507  		task.imageStream,
   508  		pInfo.Series,
   509  		apiInfo,
   510  	)
   511  	if err != nil {
   512  		return nil, errors.Trace(err)
   513  	}
   514  
   515  	instanceConfig.Tags = pInfo.Tags
   516  	if len(pInfo.Jobs) > 0 {
   517  		instanceConfig.Jobs = pInfo.Jobs
   518  	}
   519  
   520  	if multiwatcher.AnyJobNeedsState(instanceConfig.Jobs...) {
   521  		publicKey, err := simplestreams.UserPublicSigningKey()
   522  		if err != nil {
   523  			return nil, err
   524  		}
   525  		instanceConfig.Controller = &instancecfg.ControllerConfig{
   526  			PublicImageSigningKey: publicKey,
   527  			MongoInfo:             stateInfo,
   528  		}
   529  		instanceConfig.Controller.Config = make(map[string]interface{})
   530  		for k, v := range pInfo.ControllerConfig {
   531  			instanceConfig.Controller.Config[k] = v
   532  		}
   533  	}
   534  
   535  	return instanceConfig, nil
   536  }
   537  
   538  func constructStartInstanceParams(
   539  	controllerUUID string,
   540  	machine *apiprovisioner.Machine,
   541  	instanceConfig *instancecfg.InstanceConfig,
   542  	provisioningInfo *params.ProvisioningInfo,
   543  	possibleTools coretools.List,
   544  ) (environs.StartInstanceParams, error) {
   545  
   546  	volumes := make([]storage.VolumeParams, len(provisioningInfo.Volumes))
   547  	for i, v := range provisioningInfo.Volumes {
   548  		volumeTag, err := names.ParseVolumeTag(v.VolumeTag)
   549  		if err != nil {
   550  			return environs.StartInstanceParams{}, errors.Trace(err)
   551  		}
   552  		if v.Attachment == nil {
   553  			return environs.StartInstanceParams{}, errors.Errorf("volume params missing attachment")
   554  		}
   555  		machineTag, err := names.ParseMachineTag(v.Attachment.MachineTag)
   556  		if err != nil {
   557  			return environs.StartInstanceParams{}, errors.Trace(err)
   558  		}
   559  		if machineTag != machine.Tag() {
   560  			return environs.StartInstanceParams{}, errors.Errorf("volume attachment params has invalid machine tag")
   561  		}
   562  		if v.Attachment.InstanceId != "" {
   563  			return environs.StartInstanceParams{}, errors.Errorf("volume attachment params specifies instance ID")
   564  		}
   565  		volumes[i] = storage.VolumeParams{
   566  			volumeTag,
   567  			v.Size,
   568  			storage.ProviderType(v.Provider),
   569  			v.Attributes,
   570  			v.Tags,
   571  			&storage.VolumeAttachmentParams{
   572  				AttachmentParams: storage.AttachmentParams{
   573  					Machine:  machineTag,
   574  					ReadOnly: v.Attachment.ReadOnly,
   575  				},
   576  				Volume: volumeTag,
   577  			},
   578  		}
   579  	}
   580  
   581  	var subnetsToZones map[network.Id][]string
   582  	if provisioningInfo.SubnetsToZones != nil {
   583  		// Convert subnet provider ids from string to network.Id.
   584  		subnetsToZones = make(map[network.Id][]string, len(provisioningInfo.SubnetsToZones))
   585  		for providerId, zones := range provisioningInfo.SubnetsToZones {
   586  			subnetsToZones[network.Id(providerId)] = zones
   587  		}
   588  	}
   589  
   590  	var endpointBindings map[string]network.Id
   591  	if len(provisioningInfo.EndpointBindings) != 0 {
   592  		endpointBindings = make(map[string]network.Id)
   593  		for endpoint, space := range provisioningInfo.EndpointBindings {
   594  			endpointBindings[endpoint] = network.Id(space)
   595  		}
   596  	}
   597  	possibleImageMetadata := make([]*imagemetadata.ImageMetadata, len(provisioningInfo.ImageMetadata))
   598  	for i, metadata := range provisioningInfo.ImageMetadata {
   599  		possibleImageMetadata[i] = &imagemetadata.ImageMetadata{
   600  			Id:          metadata.ImageId,
   601  			Arch:        metadata.Arch,
   602  			RegionAlias: metadata.Region,
   603  			RegionName:  metadata.Region,
   604  			Storage:     metadata.RootStorageType,
   605  			Stream:      metadata.Stream,
   606  			VirtType:    metadata.VirtType,
   607  			Version:     metadata.Version,
   608  		}
   609  	}
   610  
   611  	return environs.StartInstanceParams{
   612  		ControllerUUID:    controllerUUID,
   613  		Constraints:       provisioningInfo.Constraints,
   614  		Tools:             possibleTools,
   615  		InstanceConfig:    instanceConfig,
   616  		Placement:         provisioningInfo.Placement,
   617  		DistributionGroup: machine.DistributionGroup,
   618  		Volumes:           volumes,
   619  		SubnetsToZones:    subnetsToZones,
   620  		EndpointBindings:  endpointBindings,
   621  		ImageMetadata:     possibleImageMetadata,
   622  		StatusCallback:    machine.SetInstanceStatus,
   623  	}, nil
   624  }
   625  
   626  func (task *provisionerTask) maintainMachines(machines []*apiprovisioner.Machine) error {
   627  	for _, m := range machines {
   628  		logger.Infof("maintainMachines: %v", m)
   629  		startInstanceParams := environs.StartInstanceParams{}
   630  		startInstanceParams.InstanceConfig = &instancecfg.InstanceConfig{}
   631  		startInstanceParams.InstanceConfig.MachineId = m.Id()
   632  		if err := task.broker.MaintainInstance(startInstanceParams); err != nil {
   633  			return errors.Annotatef(err, "cannot maintain machine %v", m)
   634  		}
   635  	}
   636  	return nil
   637  }
   638  
   639  func (task *provisionerTask) startMachines(machines []*apiprovisioner.Machine) error {
   640  	for _, m := range machines {
   641  
   642  		pInfo, err := m.ProvisioningInfo()
   643  		if err != nil {
   644  			return task.setErrorStatus("fetching provisioning info for machine %q: %v", m, err)
   645  		}
   646  
   647  		instanceCfg, err := task.constructInstanceConfig(m, task.auth, pInfo)
   648  		if err != nil {
   649  			return task.setErrorStatus("creating instance config for machine %q: %v", m, err)
   650  		}
   651  
   652  		assocProvInfoAndMachCfg(pInfo, instanceCfg)
   653  
   654  		var arch string
   655  		if pInfo.Constraints.Arch != nil {
   656  			arch = *pInfo.Constraints.Arch
   657  		}
   658  
   659  		possibleTools, err := task.toolsFinder.FindTools(
   660  			jujuversion.Current,
   661  			pInfo.Series,
   662  			arch,
   663  		)
   664  		if err != nil {
   665  			return task.setErrorStatus("cannot find tools for machine %q: %v", m, err)
   666  		}
   667  
   668  		startInstanceParams, err := constructStartInstanceParams(
   669  			task.controllerUUID,
   670  			m,
   671  			instanceCfg,
   672  			pInfo,
   673  			possibleTools,
   674  		)
   675  		if err != nil {
   676  			return task.setErrorStatus("cannot construct params for machine %q: %v", m, err)
   677  		}
   678  
   679  		if err := task.startMachine(m, pInfo, startInstanceParams); err != nil {
   680  			return errors.Annotatef(err, "cannot start machine %v", m)
   681  		}
   682  	}
   683  	return nil
   684  }
   685  
   686  func (task *provisionerTask) setErrorStatus(message string, machine *apiprovisioner.Machine, err error) error {
   687  	logger.Errorf(message, machine, err)
   688  	if err1 := machine.SetStatus(status.Error, err.Error(), nil); err1 != nil {
   689  		// Something is wrong with this machine, better report it back.
   690  		return errors.Annotatef(err1, "cannot set error status for machine %q", machine)
   691  	}
   692  	return nil
   693  }
   694  
   695  func (task *provisionerTask) startMachine(
   696  	machine *apiprovisioner.Machine,
   697  	provisioningInfo *params.ProvisioningInfo,
   698  	startInstanceParams environs.StartInstanceParams,
   699  ) error {
   700  	var result *environs.StartInstanceResult
   701  	for attemptsLeft := task.retryStartInstanceStrategy.retryCount; attemptsLeft >= 0; attemptsLeft-- {
   702  		attemptResult, err := task.broker.StartInstance(startInstanceParams)
   703  		if err == nil {
   704  			result = attemptResult
   705  			break
   706  		} else if attemptsLeft <= 0 {
   707  			// Set the state to error, so the machine will be skipped
   708  			// next time until the error is resolved, but don't return
   709  			// an error; just keep going with the other machines.
   710  			return task.setErrorStatus("cannot start instance for machine %q: %v", machine, err)
   711  		}
   712  
   713  		logger.Warningf("%v", errors.Annotate(err, "starting instance"))
   714  		retryMsg := fmt.Sprintf("will retry to start instance in %v", task.retryStartInstanceStrategy.retryDelay)
   715  		if err2 := machine.SetStatus(status.Pending, retryMsg, nil); err2 != nil {
   716  			logger.Errorf("%v", err2)
   717  		}
   718  		logger.Infof(retryMsg)
   719  
   720  		select {
   721  		case <-task.catacomb.Dying():
   722  			return task.catacomb.ErrDying()
   723  		case <-time.After(task.retryStartInstanceStrategy.retryDelay):
   724  		}
   725  	}
   726  
   727  	networkConfig := networkingcommon.NetworkConfigFromInterfaceInfo(result.NetworkInfo)
   728  	volumes := volumesToAPIserver(result.Volumes)
   729  	volumeNameToAttachmentInfo := volumeAttachmentsToAPIserver(result.VolumeAttachments)
   730  
   731  	if err := machine.SetInstanceInfo(
   732  		result.Instance.Id(),
   733  		startInstanceParams.InstanceConfig.MachineNonce,
   734  		result.Hardware,
   735  		networkConfig,
   736  		volumes,
   737  		volumeNameToAttachmentInfo,
   738  	); err != nil {
   739  		// We need to stop the instance right away here, set error status and go on.
   740  		if err2 := task.setErrorStatus("cannot register instance for machine %v: %v", machine, err); err2 != nil {
   741  			logger.Errorf("%v", errors.Annotate(err2, "cannot set machine's status"))
   742  		}
   743  		if err2 := task.broker.StopInstances(result.Instance.Id()); err2 != nil {
   744  			logger.Errorf("%v", errors.Annotate(err2, "after failing to set instance info"))
   745  		}
   746  		return errors.Annotate(err, "cannot set instance info")
   747  	}
   748  
   749  	logger.Infof(
   750  		"started machine %s as instance %s with hardware %q, network config %+v, volumes %v, volume attachments %v, subnets to zones %v",
   751  		machine,
   752  		result.Instance.Id(),
   753  		result.Hardware,
   754  		networkConfig,
   755  		volumes,
   756  		volumeNameToAttachmentInfo,
   757  		startInstanceParams.SubnetsToZones,
   758  	)
   759  	return nil
   760  }
   761  
   762  type provisioningInfo struct {
   763  	Constraints    constraints.Value
   764  	Series         string
   765  	Placement      string
   766  	InstanceConfig *instancecfg.InstanceConfig
   767  	SubnetsToZones map[string][]string
   768  }
   769  
   770  func assocProvInfoAndMachCfg(
   771  	provInfo *params.ProvisioningInfo,
   772  	instanceConfig *instancecfg.InstanceConfig,
   773  ) *provisioningInfo {
   774  	return &provisioningInfo{
   775  		Constraints:    provInfo.Constraints,
   776  		Series:         provInfo.Series,
   777  		Placement:      provInfo.Placement,
   778  		InstanceConfig: instanceConfig,
   779  		SubnetsToZones: provInfo.SubnetsToZones,
   780  	}
   781  }
   782  
   783  func volumesToAPIserver(volumes []storage.Volume) []params.Volume {
   784  	result := make([]params.Volume, len(volumes))
   785  	for i, v := range volumes {
   786  		result[i] = params.Volume{
   787  			v.Tag.String(),
   788  			params.VolumeInfo{
   789  				v.VolumeId,
   790  				v.HardwareId,
   791  				v.Size,
   792  				v.Persistent,
   793  			},
   794  		}
   795  	}
   796  	return result
   797  }
   798  
   799  func volumeAttachmentsToAPIserver(attachments []storage.VolumeAttachment) map[string]params.VolumeAttachmentInfo {
   800  	result := make(map[string]params.VolumeAttachmentInfo)
   801  	for _, a := range attachments {
   802  		result[a.Volume.String()] = params.VolumeAttachmentInfo{
   803  			a.DeviceName,
   804  			a.DeviceLink,
   805  			a.BusAddress,
   806  			a.ReadOnly,
   807  		}
   808  	}
   809  	return result
   810  }