github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/worker/provisioner/provisioner_task.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package provisioner
     5  
     6  import (
     7  	stdcontext "context"
     8  	"fmt"
     9  	"math/rand"
    10  	"sort"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/juju/collections/set"
    15  	"github.com/juju/errors"
    16  	"github.com/juju/names/v5"
    17  	"github.com/juju/utils/v3"
    18  	"github.com/juju/version/v2"
    19  	"github.com/juju/worker/v3"
    20  	"github.com/juju/worker/v3/catacomb"
    21  
    22  	apiprovisioner "github.com/juju/juju/api/agent/provisioner"
    23  	"github.com/juju/juju/cloudconfig/instancecfg"
    24  	"github.com/juju/juju/container"
    25  	"github.com/juju/juju/controller"
    26  	"github.com/juju/juju/controller/authentication"
    27  	"github.com/juju/juju/core/arch"
    28  	corebase "github.com/juju/juju/core/base"
    29  	"github.com/juju/juju/core/constraints"
    30  	"github.com/juju/juju/core/instance"
    31  	"github.com/juju/juju/core/life"
    32  	"github.com/juju/juju/core/lxdprofile"
    33  	"github.com/juju/juju/core/network"
    34  	"github.com/juju/juju/core/status"
    35  	"github.com/juju/juju/core/watcher"
    36  	"github.com/juju/juju/core/workerpool"
    37  	"github.com/juju/juju/environs"
    38  	"github.com/juju/juju/environs/config"
    39  	"github.com/juju/juju/environs/context"
    40  	"github.com/juju/juju/environs/imagemetadata"
    41  	"github.com/juju/juju/environs/instances"
    42  	"github.com/juju/juju/environs/simplestreams"
    43  	providercommon "github.com/juju/juju/provider/common"
    44  	"github.com/juju/juju/rpc/params"
    45  	"github.com/juju/juju/storage"
    46  	coretools "github.com/juju/juju/tools"
    47  	"github.com/juju/juju/worker/common"
    48  	"github.com/juju/juju/wrench"
    49  )
    50  
    51  type ProvisionerTask interface {
    52  	worker.Worker
    53  
    54  	// SetHarvestMode sets a flag to indicate how the provisioner task
    55  	// should harvest machines. See config.HarvestMode for
    56  	// documentation of behavior.
    57  	SetHarvestMode(mode config.HarvestMode)
    58  
    59  	// SetNumProvisionWorkers resizes the pool of provision workers.
    60  	SetNumProvisionWorkers(numWorkers int)
    61  }
    62  
    63  // TaskAPI describes API methods required by a ProvisionerTask.
    64  type TaskAPI interface {
    65  	Machines(...names.MachineTag) ([]apiprovisioner.MachineResult, error)
    66  	MachinesWithTransientErrors() ([]apiprovisioner.MachineStatusResult, error)
    67  	ProvisioningInfo(machineTags []names.MachineTag) (params.ProvisioningInfoResults, error)
    68  }
    69  
    70  type DistributionGroupFinder interface {
    71  	DistributionGroupByMachineId(...names.MachineTag) ([]apiprovisioner.DistributionGroupResult, error)
    72  }
    73  
    74  // ToolsFinder is an interface used for finding tools to run on
    75  // provisioned instances.
    76  type ToolsFinder interface {
    77  	// FindTools returns a list of tools matching the specified
    78  	// version, os, and architecture. If arch is empty, the
    79  	// implementation is expected to use a well documented default.
    80  	FindTools(version version.Number, os string, arch string) (coretools.List, error)
    81  }
    82  
    83  // TaskConfig holds the initialisation data for a ProvisionerTask instance.
    84  type TaskConfig struct {
    85  	ControllerUUID             string
    86  	HostTag                    names.Tag
    87  	Logger                     Logger
    88  	HarvestMode                config.HarvestMode
    89  	TaskAPI                    TaskAPI
    90  	DistributionGroupFinder    DistributionGroupFinder
    91  	ToolsFinder                ToolsFinder
    92  	MachineWatcher             watcher.StringsWatcher
    93  	RetryWatcher               watcher.NotifyWatcher
    94  	Broker                     environs.InstanceBroker
    95  	Auth                       authentication.AuthenticationProvider
    96  	ImageStream                string
    97  	RetryStartInstanceStrategy RetryStrategy
    98  	CloudCallContextFunc       common.CloudCallContextFunc
    99  	NumProvisionWorkers        int
   100  	EventProcessedCb           func(string)
   101  }
   102  
   103  func NewProvisionerTask(cfg TaskConfig) (ProvisionerTask, error) {
   104  	machineChanges := cfg.MachineWatcher.Changes()
   105  	workers := []worker.Worker{cfg.MachineWatcher}
   106  	var retryChanges watcher.NotifyChannel
   107  	if cfg.RetryWatcher != nil {
   108  		retryChanges = cfg.RetryWatcher.Changes()
   109  		workers = append(workers, cfg.RetryWatcher)
   110  	}
   111  	task := &provisionerTask{
   112  		controllerUUID:             cfg.ControllerUUID,
   113  		hostTag:                    cfg.HostTag,
   114  		logger:                     cfg.Logger,
   115  		taskAPI:                    cfg.TaskAPI,
   116  		distributionGroupFinder:    cfg.DistributionGroupFinder,
   117  		toolsFinder:                cfg.ToolsFinder,
   118  		machineChanges:             machineChanges,
   119  		retryChanges:               retryChanges,
   120  		broker:                     cfg.Broker,
   121  		auth:                       cfg.Auth,
   122  		harvestMode:                cfg.HarvestMode,
   123  		harvestModeChan:            make(chan config.HarvestMode, 1),
   124  		machines:                   make(map[string]apiprovisioner.MachineProvisioner),
   125  		machinesStarting:           make(map[string]bool),
   126  		machinesStopDeferred:       make(map[string]bool),
   127  		machinesStopping:           make(map[string]bool),
   128  		availabilityZoneMachines:   make([]*AvailabilityZoneMachine, 0),
   129  		imageStream:                cfg.ImageStream,
   130  		retryStartInstanceStrategy: cfg.RetryStartInstanceStrategy,
   131  		cloudCallCtxFunc:           cfg.CloudCallContextFunc,
   132  		wp:                         workerpool.NewWorkerPool(cfg.Logger, cfg.NumProvisionWorkers),
   133  		wpSizeChan:                 make(chan int, 1),
   134  		eventProcessedCb:           cfg.EventProcessedCb,
   135  	}
   136  	err := catacomb.Invoke(catacomb.Plan{
   137  		Site: &task.catacomb,
   138  		Work: task.loop,
   139  		Init: workers,
   140  	})
   141  	if err != nil {
   142  		return nil, errors.Trace(err)
   143  	}
   144  	return task, nil
   145  }
   146  
   147  // The list of events that are passed into the eventProcessed callback by the
   148  // main loop.
   149  const (
   150  	eventTypeProcessedMachines         = "processed-machines"
   151  	eventTypeRetriedMachinesWithErrors = "retried-machines-with-errors"
   152  	eventTypeResizedWorkerPool         = "resized-worker-pool"
   153  	eventTypeHarvestModeChanged        = "harvest-mode-changed"
   154  )
   155  
   156  type provisionerTask struct {
   157  	controllerUUID             string
   158  	hostTag                    names.Tag
   159  	logger                     Logger
   160  	taskAPI                    TaskAPI
   161  	distributionGroupFinder    DistributionGroupFinder
   162  	toolsFinder                ToolsFinder
   163  	machineChanges             watcher.StringsChannel
   164  	retryChanges               watcher.NotifyChannel
   165  	broker                     environs.InstanceBroker
   166  	catacomb                   catacomb.Catacomb
   167  	auth                       authentication.AuthenticationProvider
   168  	imageStream                string
   169  	harvestMode                config.HarvestMode
   170  	harvestModeChan            chan config.HarvestMode
   171  	retryStartInstanceStrategy RetryStrategy
   172  
   173  	machinesMutex            sync.RWMutex
   174  	machines                 map[string]apiprovisioner.MachineProvisioner // machine ID -> machine
   175  	machinesStarting         map[string]bool                              // machine IDs currently being started.
   176  	machinesStopping         map[string]bool                              // machine IDs currently being stopped.
   177  	machinesStopDeferred     map[string]bool                              // machine IDs which were set as dead while starting. They will be stopped once they are online.
   178  	availabilityZoneMachines []*AvailabilityZoneMachine
   179  	instances                map[instance.Id]instances.Instance // instanceID -> instance
   180  	cloudCallCtxFunc         common.CloudCallContextFunc
   181  
   182  	// A worker pool for starting/stopping instances in parallel.
   183  	wp         *workerpool.WorkerPool
   184  	wpSizeChan chan int
   185  
   186  	// eventProcessedCb is an optional, externally-registered callback that
   187  	// will be invoked when the task main loop successfully processes an event.
   188  	// The event type is provided as the first arg to the callback.
   189  	eventProcessedCb func(string)
   190  }
   191  
   192  // Kill implements worker.Worker.Kill.
   193  func (task *provisionerTask) Kill() {
   194  	task.catacomb.Kill(nil)
   195  }
   196  
   197  // Wait implements worker.Worker.Wait.
   198  func (task *provisionerTask) Wait() error {
   199  	return task.catacomb.Wait()
   200  }
   201  
   202  func (task *provisionerTask) loop() (taskErr error) {
   203  	task.logger.Infof("entering provisioner task loop; using provisioner pool with %d workers", task.wp.Size())
   204  	defer func() {
   205  		wpErr := task.wp.Close()
   206  		if taskErr == nil {
   207  			taskErr = wpErr
   208  		}
   209  		task.logger.Infof("exiting provisioner task loop; err: %v", taskErr)
   210  	}()
   211  
   212  	// Don't allow the harvesting mode to change until we have read at
   213  	// least one set of changes, which will populate the task.machines
   214  	// map. Otherwise we will potentially see all legitimate instances
   215  	// as unknown.
   216  	var harvestModeChan chan config.HarvestMode
   217  
   218  	// When the watcher is started, it will have the initial changes be all
   219  	// the machines that are relevant. Also, since this is available straight
   220  	// away, we know there will be some changes right off the bat.
   221  	ctx := task.cloudCallCtxFunc(stdcontext.Background())
   222  	for {
   223  		select {
   224  		case ids, ok := <-task.machineChanges:
   225  			if !ok {
   226  				return errors.New("machine watcher closed channel")
   227  			}
   228  
   229  			if err := task.processMachines(ctx, ids); err != nil {
   230  				return errors.Annotate(err, "processing updated machines")
   231  			}
   232  
   233  			task.notifyEventProcessedCallback(eventTypeProcessedMachines)
   234  
   235  			// We've seen a set of changes.
   236  			// Enable modification of harvesting mode.
   237  			harvestModeChan = task.harvestModeChan
   238  		case numWorkers := <-task.wpSizeChan:
   239  			if task.wp.Size() == numWorkers {
   240  				continue // nothing to do
   241  			}
   242  
   243  			// Stop the current pool (checking for any pending
   244  			// errors) and create a new one.
   245  			task.logger.Infof("resizing provision worker pool size to %d", numWorkers)
   246  			if err := task.wp.Close(); err != nil {
   247  				return err
   248  			}
   249  			task.wp = workerpool.NewWorkerPool(task.logger, numWorkers)
   250  			task.notifyEventProcessedCallback(eventTypeResizedWorkerPool)
   251  		case harvestMode := <-harvestModeChan:
   252  			if harvestMode == task.harvestMode {
   253  				break
   254  			}
   255  			task.logger.Infof("harvesting mode changed to %s", harvestMode)
   256  			task.harvestMode = harvestMode
   257  			task.notifyEventProcessedCallback(eventTypeHarvestModeChanged)
   258  			if harvestMode.HarvestUnknown() {
   259  				task.logger.Infof("harvesting unknown machines")
   260  				if err := task.processMachines(ctx, nil); err != nil {
   261  					return errors.Annotate(err, "processing machines after safe mode disabled")
   262  				}
   263  				task.notifyEventProcessedCallback(eventTypeProcessedMachines)
   264  			}
   265  		case <-task.retryChanges:
   266  			if err := task.processMachinesWithTransientErrors(ctx); err != nil {
   267  				return errors.Annotate(err, "processing machines with transient errors")
   268  			}
   269  			task.notifyEventProcessedCallback(eventTypeRetriedMachinesWithErrors)
   270  		case <-task.wp.Done():
   271  			// The worker pool has detected one or more errors and
   272  			// is in the process of shutting down. Collect and
   273  			// report any emitted errors.
   274  			return task.wp.Close()
   275  		case <-task.catacomb.Dying():
   276  			return task.catacomb.ErrDying()
   277  		}
   278  	}
   279  }
   280  
   281  func (task *provisionerTask) notifyEventProcessedCallback(evtType string) {
   282  	if task.eventProcessedCb != nil {
   283  		task.eventProcessedCb(evtType)
   284  	}
   285  }
   286  
   287  // SetHarvestMode implements ProvisionerTask.SetHarvestMode().
   288  func (task *provisionerTask) SetHarvestMode(mode config.HarvestMode) {
   289  	select {
   290  	case task.harvestModeChan <- mode:
   291  	case <-task.catacomb.Dying():
   292  	}
   293  }
   294  
   295  // SetNumProvisionWorkers queues a pool resize request to be processed by the
   296  // provisioner task main loop.
   297  func (task *provisionerTask) SetNumProvisionWorkers(numWorkers int) {
   298  	select {
   299  	case task.wpSizeChan <- numWorkers:
   300  	case <-task.catacomb.Dying():
   301  	}
   302  }
   303  
   304  func (task *provisionerTask) processMachinesWithTransientErrors(ctx context.ProviderCallContext) error {
   305  	results, err := task.taskAPI.MachinesWithTransientErrors()
   306  	if err != nil || len(results) == 0 {
   307  		return nil
   308  	}
   309  	task.logger.Tracef("processMachinesWithTransientErrors(%v)", results)
   310  	var pending []apiprovisioner.MachineProvisioner
   311  	for _, result := range results {
   312  		if result.Status.Error != nil {
   313  			task.logger.Errorf("cannot retry provisioning of machine %q: %v", result.Machine.Id(), result.Status.Error)
   314  			continue
   315  		}
   316  		machine := result.Machine
   317  		if err := machine.SetStatus(status.Pending, "", nil); err != nil {
   318  			task.logger.Errorf("cannot reset status of machine %q: %v", machine.Id(), err)
   319  			continue
   320  		}
   321  		if err := machine.SetInstanceStatus(status.Provisioning, "", nil); err != nil {
   322  			task.logger.Errorf("cannot reset instance status of machine %q: %v", machine.Id(), err)
   323  			continue
   324  		}
   325  		if err := machine.SetModificationStatus(status.Idle, "", nil); err != nil {
   326  			task.logger.Errorf("cannot reset modification status of machine %q: %v", machine.Id(), err)
   327  			continue
   328  		}
   329  		task.machinesMutex.Lock()
   330  		task.machines[machine.Tag().String()] = machine
   331  		task.machinesMutex.Unlock()
   332  		pending = append(pending, machine)
   333  	}
   334  	return task.queueStartMachines(ctx, pending)
   335  }
   336  
   337  func (task *provisionerTask) processMachines(ctx context.ProviderCallContext, ids []string) error {
   338  	task.logger.Tracef("processMachines(%v)", ids)
   339  
   340  	// Populate the tasks maps of current instances and machines.
   341  	if err := task.populateMachineMaps(ctx, ids); err != nil {
   342  		return errors.Trace(err)
   343  	}
   344  
   345  	// Maintain zone-machine distributions.
   346  	err := task.updateAvailabilityZoneMachines(ctx)
   347  	if err != nil && !errors.IsNotImplemented(err) {
   348  		return errors.Annotate(err, "updating AZ distributions")
   349  	}
   350  
   351  	// Find machines without an instance ID or that are dead.
   352  	pending, dead, err := task.pendingOrDead(ids)
   353  	if err != nil {
   354  		return errors.Trace(err)
   355  	}
   356  
   357  	// Queue removal of any dead machines that are not already being
   358  	// stopped or flagged for deferred stopping once they are online.
   359  	if err := task.filterAndQueueRemovalOfDeadMachines(ctx, dead); err != nil {
   360  		return errors.Trace(err)
   361  	}
   362  
   363  	// Queue start requests for any other pending instances.
   364  	return errors.Trace(task.queueStartMachines(ctx, pending))
   365  }
   366  
   367  func instanceIds(instances []instances.Instance) []string {
   368  	ids := make([]string, 0, len(instances))
   369  	for _, inst := range instances {
   370  		ids = append(ids, string(inst.Id()))
   371  	}
   372  	return ids
   373  }
   374  
   375  // populateMachineMaps updates task.instances. Also updates task.machines map
   376  // if a list of IDs is given.
   377  func (task *provisionerTask) populateMachineMaps(ctx context.ProviderCallContext, ids []string) error {
   378  	allInstances, err := task.broker.AllRunningInstances(ctx)
   379  	if err != nil {
   380  		return errors.Annotate(err, "getting all instances from broker")
   381  	}
   382  
   383  	instances := make(map[instance.Id]instances.Instance)
   384  	for _, i := range allInstances {
   385  		instances[i.Id()] = i
   386  	}
   387  	task.machinesMutex.Lock()
   388  	task.instances = instances
   389  	task.machinesMutex.Unlock()
   390  
   391  	// Update the machines map with new data for each of the machines in the
   392  	// change list.
   393  	machineTags := make([]names.MachineTag, len(ids))
   394  	for i, id := range ids {
   395  		machineTags[i] = names.NewMachineTag(id)
   396  	}
   397  	machines, err := task.taskAPI.Machines(machineTags...)
   398  	if err != nil {
   399  		return errors.Annotatef(err, "getting machines %v", ids)
   400  	}
   401  	task.machinesMutex.Lock()
   402  	defer task.machinesMutex.Unlock()
   403  	for i, result := range machines {
   404  		switch {
   405  		case result.Err == nil:
   406  			task.machines[result.Machine.Id()] = result.Machine
   407  		case params.IsCodeNotFoundOrCodeUnauthorized(result.Err):
   408  			task.logger.Debugf("machine %q not found in state", ids[i])
   409  			delete(task.machines, ids[i])
   410  		default:
   411  			return errors.Annotatef(result.Err, "getting machine %v", ids[i])
   412  		}
   413  	}
   414  	return nil
   415  }
   416  
   417  // pendingOrDead looks up machines with ids and returns those that do not
   418  // have an instance id assigned yet, and also those that are dead. Any machines
   419  // that are currently being stopped or have been marked for deferred stopping
   420  // once they are online will be skipped.
   421  func (task *provisionerTask) pendingOrDead(
   422  	ids []string,
   423  ) (pending, dead []apiprovisioner.MachineProvisioner, err error) {
   424  	task.machinesMutex.RLock()
   425  	defer task.machinesMutex.RUnlock()
   426  	for _, id := range ids {
   427  		// Ignore machines that have been either queued for deferred
   428  		// stopping or they are currently stopping
   429  		if _, found := task.machinesStopDeferred[id]; found {
   430  			task.logger.Tracef("pendingOrDead: ignoring machine %q; machine has deferred stop flag set", id)
   431  			continue // ignore: will be stopped once started
   432  		} else if _, found := task.machinesStopping[id]; found {
   433  			task.logger.Tracef("pendingOrDead: ignoring machine %q; machine is currently being stopped", id)
   434  			continue // ignore: currently being stopped.
   435  		}
   436  
   437  		machine, found := task.machines[id]
   438  		if !found {
   439  			task.logger.Infof("machine %q not found", id)
   440  			continue
   441  		}
   442  		var classification MachineClassification
   443  		classification, err = classifyMachine(task.logger, machine)
   444  		if err != nil {
   445  			return // return the error
   446  		}
   447  		switch classification {
   448  		case Pending:
   449  			pending = append(pending, machine)
   450  		case Dead:
   451  			dead = append(dead, machine)
   452  		}
   453  	}
   454  	task.logger.Tracef("pending machines: %v", pending)
   455  	task.logger.Tracef("dead machines: %v", dead)
   456  	return
   457  }
   458  
   459  type ClassifiableMachine interface {
   460  	Life() life.Value
   461  	InstanceId() (instance.Id, error)
   462  	EnsureDead() error
   463  	Status() (status.Status, string, error)
   464  	InstanceStatus() (status.Status, string, error)
   465  	Id() string
   466  }
   467  
   468  type MachineClassification string
   469  
   470  const (
   471  	None    MachineClassification = "none"
   472  	Pending MachineClassification = "Pending"
   473  	Dead    MachineClassification = "Dead"
   474  )
   475  
   476  func classifyMachine(logger Logger, machine ClassifiableMachine) (
   477  	MachineClassification, error) {
   478  	switch machine.Life() {
   479  	case life.Dying:
   480  		if _, err := machine.InstanceId(); err == nil {
   481  			return None, nil
   482  		} else if !params.IsCodeNotProvisioned(err) {
   483  			return None, errors.Annotatef(err, "loading dying machine id:%s, details:%v", machine.Id(), machine)
   484  		}
   485  		logger.Infof("killing dying, unprovisioned machine %q", machine)
   486  		if err := machine.EnsureDead(); err != nil {
   487  			return None, errors.Annotatef(err, "ensuring machine dead id:%s, details:%v", machine.Id(), machine)
   488  		}
   489  		fallthrough
   490  	case life.Dead:
   491  		return Dead, nil
   492  	}
   493  	instId, err := machine.InstanceId()
   494  	if err != nil {
   495  		if !params.IsCodeNotProvisioned(err) {
   496  			return None, errors.Annotatef(err, "loading machine id:%s, details:%v", machine.Id(), machine)
   497  		}
   498  		machineStatus, _, err := machine.Status()
   499  		if err != nil {
   500  			logger.Infof("cannot get machine id:%s, details:%v, err:%v", machine.Id(), machine, err)
   501  			return None, nil
   502  		}
   503  		if machineStatus == status.Pending {
   504  			logger.Infof("found machine pending provisioning id:%s, details:%v", machine.Id(), machine)
   505  			return Pending, nil
   506  		}
   507  		instanceStatus, _, err := machine.InstanceStatus()
   508  		if err != nil {
   509  			logger.Infof("cannot read instance status id:%s, details:%v, err:%v", machine.Id(), machine, err)
   510  			return None, nil
   511  		}
   512  		if instanceStatus == status.Provisioning {
   513  			logger.Infof("found machine provisioning id:%s, details:%v", machine.Id(), machine)
   514  			return Pending, nil
   515  		}
   516  		return None, nil
   517  	}
   518  	logger.Infof("machine %s already started as instance %q", machine.Id(), instId)
   519  
   520  	return None, nil
   521  }
   522  
   523  // findUnknownInstances finds instances which are not associated with a machine.
   524  func (task *provisionerTask) findUnknownInstances(stopping []instances.Instance) ([]instances.Instance, error) {
   525  	// Make a copy of the instances we know about.
   526  	taskInstances := make(map[instance.Id]instances.Instance)
   527  	for k, v := range task.instances {
   528  		taskInstances[k] = v
   529  	}
   530  
   531  	task.machinesMutex.RLock()
   532  	defer task.machinesMutex.RUnlock()
   533  	for _, m := range task.machines {
   534  		instId, err := m.InstanceId()
   535  		switch {
   536  		case err == nil:
   537  			delete(taskInstances, instId)
   538  		case params.IsCodeNotProvisioned(err):
   539  		case params.IsCodeNotFoundOrCodeUnauthorized(err):
   540  		default:
   541  			return nil, err
   542  		}
   543  	}
   544  	// Now remove all those instances that we are stopping already as we
   545  	// know about those and don't want to include them in the unknown list.
   546  	for _, inst := range stopping {
   547  		delete(taskInstances, inst.Id())
   548  	}
   549  	var unknown []instances.Instance
   550  	for _, inst := range taskInstances {
   551  		unknown = append(unknown, inst)
   552  	}
   553  	return unknown, nil
   554  }
   555  
   556  // filterAndQueueRemovalOfDeadMachines scans the list of dead machines and:
   557  //   - Sets the deferred stop flag for machines that are still online
   558  //   - Filters out any machines that are either stopping or have the deferred
   559  //     stop flag set.
   560  //   - Marks the remaining machines as stopping and queues a request for them to
   561  //     be cleaned up.
   562  func (task *provisionerTask) filterAndQueueRemovalOfDeadMachines(ctx context.ProviderCallContext, dead []apiprovisioner.MachineProvisioner) error {
   563  	// Flag any machines in the dead list that are still being started so
   564  	// they will be stopped once they come online.
   565  	task.deferStopForNotYetStartedMachines(dead)
   566  
   567  	// Filter the initial dead machine list. Any machines marked for
   568  	// deferred stopping, machines that are already being stopped and
   569  	// machines that have not yet finished provisioning will be removed
   570  	// from the filtered list.
   571  	dead = task.filterDeadMachines(dead)
   572  
   573  	// The remaining machines will be removed asynchronously and this
   574  	// method can be invoked again concurrently to process another machine
   575  	// change event. To avoid attempts to remove the same machines twice,
   576  	// they are flagged as stopping.
   577  	task.machinesMutex.Lock()
   578  	for _, machine := range dead {
   579  		machID := machine.Id()
   580  		if !task.machinesStopDeferred[machID] {
   581  			task.machinesStopping[machID] = true
   582  		}
   583  	}
   584  	task.machinesMutex.Unlock()
   585  	return task.queueRemovalOfDeadMachines(ctx, dead)
   586  }
   587  
   588  func (task *provisionerTask) queueRemovalOfDeadMachines(
   589  	ctx context.ProviderCallContext,
   590  	dead []apiprovisioner.MachineProvisioner,
   591  ) error {
   592  	// Collect the instances for all provisioned machines that are dead.
   593  	stopping := task.instancesForDeadMachines(dead)
   594  
   595  	// Find running instances that have no machines associated.
   596  	unknown, err := task.findUnknownInstances(stopping)
   597  	if err != nil {
   598  		return errors.Trace(err)
   599  	}
   600  
   601  	if !task.harvestMode.HarvestUnknown() && len(unknown) != 0 {
   602  		task.logger.Infof(
   603  			"%s is set to %s; unknown instances not stopped %v",
   604  			config.ProvisionerHarvestModeKey,
   605  			task.harvestMode.String(),
   606  			instanceIds(unknown),
   607  		)
   608  		unknown = nil
   609  	}
   610  
   611  	if (task.harvestMode.HarvestNone() || !task.harvestMode.HarvestDestroyed()) && len(stopping) != 0 {
   612  		task.logger.Infof(
   613  			`%s is set to "%s"; will not harvest %s`,
   614  			config.ProvisionerHarvestModeKey,
   615  			task.harvestMode.String(),
   616  			instanceIds(stopping),
   617  		)
   618  		stopping = nil
   619  	}
   620  
   621  	if len(dead) == 0 {
   622  		return nil // nothing to do
   623  	}
   624  
   625  	provTask := workerpool.Task{
   626  		Type: "stop-instances",
   627  		Process: func() error {
   628  			if len(stopping) > 0 {
   629  				task.logger.Infof("stopping known instances %v", instanceIds(stopping))
   630  			}
   631  			if len(unknown) > 0 {
   632  				task.logger.Infof("stopping unknown instances %v", instanceIds(unknown))
   633  			}
   634  
   635  			// It is important that we stop unknown instances before starting
   636  			// pending ones, because if we start an instance and then fail to
   637  			// set its InstanceId on the machine.
   638  			// We don't want to start a new instance for the same machine ID.
   639  			if err := task.doStopInstances(ctx, append(stopping, unknown...)); err != nil {
   640  				return errors.Trace(err)
   641  			}
   642  
   643  			// Remove any dead machines from state.
   644  			for _, machine := range dead {
   645  				task.logger.Infof("removing dead machine %q", machine.Id())
   646  				if err := machine.MarkForRemoval(); err != nil {
   647  					task.logger.Errorf("failed to remove dead machine %q", machine.Id())
   648  				}
   649  				task.removeMachineFromAZMap(machine)
   650  				machID := machine.Id()
   651  				task.machinesMutex.Lock()
   652  				delete(task.machines, machID)
   653  				delete(task.machinesStopping, machID)
   654  				task.machinesMutex.Unlock()
   655  			}
   656  
   657  			return nil
   658  		},
   659  	}
   660  
   661  	select {
   662  	case task.wp.Queue() <- provTask:
   663  		// successfully enqueued removal request
   664  		return nil
   665  	case <-task.catacomb.Dying():
   666  		return task.catacomb.ErrDying()
   667  	case <-task.wp.Done():
   668  		// Capture and surface asynchronous worker pool errors.
   669  		return task.wp.Close()
   670  	}
   671  }
   672  
   673  // Filter the provided dead machines and remove any machines marked for
   674  // deferred stopping, machines that are currently being stopped and any
   675  // machines that they have not finished starting.
   676  func (task *provisionerTask) filterDeadMachines(dead []apiprovisioner.MachineProvisioner) []apiprovisioner.MachineProvisioner {
   677  	var deadMachines []apiprovisioner.MachineProvisioner
   678  
   679  	task.machinesMutex.Lock()
   680  	for _, machine := range dead {
   681  		machID := machine.Id()
   682  
   683  		// Ignore any machines for which we have either deferred the
   684  		// stopping of the machine is currently being stopped or they
   685  		// are still being started.
   686  		if task.machinesStopDeferred[machID] || task.machinesStopping[machID] || task.machinesStarting[machID] {
   687  			continue
   688  		}
   689  
   690  		// This machine should be queued for deletion.
   691  		deadMachines = append(deadMachines, machine)
   692  	}
   693  	task.machinesMutex.Unlock()
   694  
   695  	return deadMachines
   696  }
   697  
   698  // Iterate the list of dead machines and flag the ones that are still being
   699  // started so they can be immediately stopped once they come online.
   700  func (task *provisionerTask) deferStopForNotYetStartedMachines(dead []apiprovisioner.MachineProvisioner) {
   701  	task.machinesMutex.Lock()
   702  	for _, machine := range dead {
   703  		machID := machine.Id()
   704  		if task.machinesStarting[machID] {
   705  			task.machinesStopDeferred[machID] = true
   706  		}
   707  	}
   708  	task.machinesMutex.Unlock()
   709  }
   710  
   711  // instancesForDeadMachines returns a list of instances that correspond to
   712  // machines with a life of "dead" in state. Missing machines and machines that
   713  // have not finished starting are omitted from the list.
   714  func (task *provisionerTask) instancesForDeadMachines(dead []apiprovisioner.MachineProvisioner) []instances.Instance {
   715  	var deadInstances []instances.Instance
   716  	for _, machine := range dead {
   717  		// Ignore machines that are still provisioning
   718  		task.machinesMutex.RLock()
   719  		if task.machinesStarting[machine.Id()] {
   720  			task.machinesMutex.RUnlock()
   721  			continue
   722  		}
   723  		task.machinesMutex.RUnlock()
   724  
   725  		instId, err := machine.InstanceId()
   726  		if err == nil {
   727  			keep, _ := machine.KeepInstance()
   728  			if keep {
   729  				task.logger.Debugf("machine %v is dead but keep-instance is true", instId)
   730  				continue
   731  			}
   732  
   733  			// If the instance is not found we can't stop it.
   734  			if inst, found := task.instances[instId]; found {
   735  				deadInstances = append(deadInstances, inst)
   736  			}
   737  		}
   738  	}
   739  	return deadInstances
   740  }
   741  
   742  func (task *provisionerTask) doStopInstances(ctx context.ProviderCallContext, instances []instances.Instance) error {
   743  	// Although calling StopInstance with an empty slice should produce no change in the
   744  	// provider, environs like dummy do not consider this a noop.
   745  	if len(instances) == 0 {
   746  		return nil
   747  	}
   748  	if wrench.IsActive("provisioner", "stop-instances") {
   749  		return errors.New("wrench in the works")
   750  	}
   751  
   752  	ids := make([]instance.Id, len(instances))
   753  	for i, inst := range instances {
   754  		ids[i] = inst.Id()
   755  	}
   756  	if err := task.broker.StopInstances(ctx, ids...); err != nil {
   757  		return errors.Annotate(err, "stopping instances")
   758  	}
   759  	return nil
   760  }
   761  
   762  func (task *provisionerTask) constructInstanceConfig(
   763  	machine apiprovisioner.MachineProvisioner,
   764  	auth authentication.AuthenticationProvider,
   765  	pInfo *params.ProvisioningInfo,
   766  ) (*instancecfg.InstanceConfig, error) {
   767  
   768  	apiInfo, err := auth.SetupAuthentication(machine)
   769  	if err != nil {
   770  		return nil, errors.Annotate(err, "setting up authentication")
   771  	}
   772  
   773  	// Generated a nonce for the new instance, with the format: "machine-#:UUID".
   774  	// The first part is a badge, specifying the tag of the machine the provisioner
   775  	// is running on, while the second part is a random UUID.
   776  	uuid, err := utils.NewUUID()
   777  	if err != nil {
   778  		return nil, errors.Annotate(err, "generating nonce for machine "+machine.Id())
   779  	}
   780  
   781  	nonce := fmt.Sprintf("%s:%s", task.hostTag, uuid)
   782  	base, err := corebase.ParseBase(pInfo.Base.Name, pInfo.Base.Channel)
   783  	if err != nil {
   784  		return nil, errors.Annotatef(err, "parsing machine base %q", pInfo.Base)
   785  	}
   786  	instanceConfig, err := instancecfg.NewInstanceConfig(
   787  		names.NewControllerTag(controller.Config(pInfo.ControllerConfig).ControllerUUID()),
   788  		machine.Id(),
   789  		nonce,
   790  		task.imageStream,
   791  		base,
   792  		apiInfo,
   793  	)
   794  	if err != nil {
   795  		return nil, errors.Trace(err)
   796  	}
   797  
   798  	instanceConfig.ControllerConfig = make(map[string]interface{})
   799  	for k, v := range pInfo.ControllerConfig {
   800  		instanceConfig.ControllerConfig[k] = v
   801  	}
   802  
   803  	instanceConfig.Tags = pInfo.Tags
   804  	if len(pInfo.Jobs) > 0 {
   805  		instanceConfig.Jobs = pInfo.Jobs
   806  	}
   807  
   808  	if instanceConfig.IsController() {
   809  		publicKey, err := simplestreams.UserPublicSigningKey()
   810  		if err != nil {
   811  			return nil, errors.Trace(err)
   812  		}
   813  		instanceConfig.PublicImageSigningKey = publicKey
   814  	}
   815  
   816  	instanceConfig.CloudInitUserData = pInfo.CloudInitUserData
   817  
   818  	return instanceConfig, nil
   819  }
   820  
   821  func (task *provisionerTask) constructStartInstanceParams(
   822  	controllerUUID string,
   823  	machine apiprovisioner.MachineProvisioner,
   824  	instanceConfig *instancecfg.InstanceConfig,
   825  	provisioningInfo *params.ProvisioningInfo,
   826  	possibleTools coretools.List,
   827  ) (environs.StartInstanceParams, error) {
   828  
   829  	volumes := make([]storage.VolumeParams, len(provisioningInfo.Volumes))
   830  	for i, v := range provisioningInfo.Volumes {
   831  		volumeTag, err := names.ParseVolumeTag(v.VolumeTag)
   832  		if err != nil {
   833  			return environs.StartInstanceParams{}, errors.Trace(err)
   834  		}
   835  		if v.Attachment == nil {
   836  			return environs.StartInstanceParams{}, errors.Errorf("volume params missing attachment")
   837  		}
   838  		machineTag, err := names.ParseMachineTag(v.Attachment.MachineTag)
   839  		if err != nil {
   840  			return environs.StartInstanceParams{}, errors.Trace(err)
   841  		}
   842  		if machineTag != machine.Tag() {
   843  			return environs.StartInstanceParams{}, errors.Errorf("volume attachment params has invalid machine tag")
   844  		}
   845  		if v.Attachment.InstanceId != "" {
   846  			return environs.StartInstanceParams{}, errors.Errorf("volume attachment params specifies instance ID")
   847  		}
   848  		volumes[i] = storage.VolumeParams{
   849  			Tag:          volumeTag,
   850  			Size:         v.Size,
   851  			Provider:     storage.ProviderType(v.Provider),
   852  			Attributes:   v.Attributes,
   853  			ResourceTags: v.Tags,
   854  			Attachment: &storage.VolumeAttachmentParams{
   855  				AttachmentParams: storage.AttachmentParams{
   856  					Machine:  machineTag,
   857  					ReadOnly: v.Attachment.ReadOnly,
   858  				},
   859  				Volume: volumeTag,
   860  			},
   861  		}
   862  	}
   863  	volumeAttachments := make([]storage.VolumeAttachmentParams, len(provisioningInfo.VolumeAttachments))
   864  	for i, v := range provisioningInfo.VolumeAttachments {
   865  		volumeTag, err := names.ParseVolumeTag(v.VolumeTag)
   866  		if err != nil {
   867  			return environs.StartInstanceParams{}, errors.Trace(err)
   868  		}
   869  		machineTag, err := names.ParseMachineTag(v.MachineTag)
   870  		if err != nil {
   871  			return environs.StartInstanceParams{}, errors.Trace(err)
   872  		}
   873  		if machineTag != machine.Tag() {
   874  			return environs.StartInstanceParams{}, errors.Errorf("volume attachment params has invalid machine tag")
   875  		}
   876  		if v.InstanceId != "" {
   877  			return environs.StartInstanceParams{}, errors.Errorf("volume attachment params specifies instance ID")
   878  		}
   879  		if v.VolumeId == "" {
   880  			return environs.StartInstanceParams{}, errors.Errorf("volume attachment params does not specify volume ID")
   881  		}
   882  		volumeAttachments[i] = storage.VolumeAttachmentParams{
   883  			AttachmentParams: storage.AttachmentParams{
   884  				Provider: storage.ProviderType(v.Provider),
   885  				Machine:  machineTag,
   886  				ReadOnly: v.ReadOnly,
   887  			},
   888  			Volume:   volumeTag,
   889  			VolumeId: v.VolumeId,
   890  		}
   891  	}
   892  
   893  	var endpointBindings map[string]network.Id
   894  	if len(provisioningInfo.EndpointBindings) != 0 {
   895  		endpointBindings = make(map[string]network.Id)
   896  		for endpoint, space := range provisioningInfo.EndpointBindings {
   897  			endpointBindings[endpoint] = network.Id(space)
   898  		}
   899  	}
   900  
   901  	possibleImageMetadata := make([]*imagemetadata.ImageMetadata, len(provisioningInfo.ImageMetadata))
   902  	for i, metadata := range provisioningInfo.ImageMetadata {
   903  		possibleImageMetadata[i] = &imagemetadata.ImageMetadata{
   904  			Id:          metadata.ImageId,
   905  			Arch:        metadata.Arch,
   906  			RegionAlias: metadata.Region,
   907  			RegionName:  metadata.Region,
   908  			Storage:     metadata.RootStorageType,
   909  			Stream:      metadata.Stream,
   910  			VirtType:    metadata.VirtType,
   911  			Version:     metadata.Version,
   912  		}
   913  	}
   914  
   915  	startInstanceParams := environs.StartInstanceParams{
   916  		ControllerUUID:    controllerUUID,
   917  		Constraints:       provisioningInfo.Constraints,
   918  		Tools:             possibleTools,
   919  		InstanceConfig:    instanceConfig,
   920  		Placement:         provisioningInfo.Placement,
   921  		Volumes:           volumes,
   922  		VolumeAttachments: volumeAttachments,
   923  		SubnetsToZones:    subnetZonesFromNetworkTopology(provisioningInfo.ProvisioningNetworkTopology),
   924  		EndpointBindings:  endpointBindings,
   925  		ImageMetadata:     possibleImageMetadata,
   926  		StatusCallback:    machine.SetInstanceStatus,
   927  		Abort:             task.catacomb.Dying(),
   928  		CharmLXDProfiles:  provisioningInfo.CharmLXDProfiles,
   929  	}
   930  	if provisioningInfo.RootDisk != nil {
   931  		startInstanceParams.RootDisk = &storage.VolumeParams{
   932  			Provider:   storage.ProviderType(provisioningInfo.RootDisk.Provider),
   933  			Attributes: provisioningInfo.RootDisk.Attributes,
   934  		}
   935  	}
   936  
   937  	return startInstanceParams, nil
   938  }
   939  
   940  // AvailabilityZoneMachine keeps track a single zone and which machines
   941  // are in it, which machines have failed to use it and which machines
   942  // shouldn't use it. This data is used to decide on how to distribute
   943  // machines across availability zones.
   944  //
   945  // Exposed for testing.
   946  type AvailabilityZoneMachine struct {
   947  	ZoneName           string
   948  	MachineIds         set.Strings
   949  	FailedMachineIds   set.Strings
   950  	ExcludedMachineIds set.Strings // Don't use these machines in the zone.
   951  }
   952  
   953  // MatchesConstraints against an AZ. If the constraints specifies Zones, make sure
   954  // this AZ matches a listed ZoneName.
   955  func (az *AvailabilityZoneMachine) MatchesConstraints(cons constraints.Value) bool {
   956  	if !cons.HasZones() {
   957  		return true
   958  	}
   959  	for _, zone := range *cons.Zones {
   960  		if az.ZoneName == zone {
   961  			return true
   962  		}
   963  	}
   964  	return false
   965  }
   966  
   967  // updateAvailabilityZoneMachines maintains a mapping of AZs to machines
   968  // running in each zone.
   969  // If the provider does not implement the ZonedEnviron interface, return nil.
   970  func (task *provisionerTask) updateAvailabilityZoneMachines(ctx context.ProviderCallContext) error {
   971  	zonedEnv, ok := task.broker.(providercommon.ZonedEnviron)
   972  	if !ok {
   973  		return nil
   974  	}
   975  
   976  	task.machinesMutex.Lock()
   977  	defer task.machinesMutex.Unlock()
   978  
   979  	// Only populate from the provider if we have no data.
   980  	// Otherwise, just check that we know all the current AZs.
   981  	if len(task.availabilityZoneMachines) == 0 {
   982  		if err := task.populateAvailabilityZoneMachines(ctx, zonedEnv); err != nil {
   983  			return errors.Trace(err)
   984  		}
   985  	} else {
   986  		if err := task.checkProviderAvailabilityZones(ctx, zonedEnv); err != nil {
   987  			return errors.Trace(err)
   988  		}
   989  	}
   990  
   991  	zones := make([]string, len(task.availabilityZoneMachines))
   992  	for i, azm := range task.availabilityZoneMachines {
   993  		zones[i] = azm.ZoneName
   994  	}
   995  	task.logger.Infof("provisioning in zones: %v", zones)
   996  
   997  	return nil
   998  }
   999  
  1000  // populateAvailabilityZoneMachines populates the slice,
  1001  // availabilityZoneMachines, with each zone and the IDs of
  1002  // machines running in that zone, according to the provider.
  1003  func (task *provisionerTask) populateAvailabilityZoneMachines(
  1004  	ctx context.ProviderCallContext, zonedEnv providercommon.ZonedEnviron,
  1005  ) error {
  1006  	availabilityZoneInstances, err := providercommon.AvailabilityZoneAllocations(zonedEnv, ctx, []instance.Id{})
  1007  	if err != nil {
  1008  		return errors.Trace(err)
  1009  	}
  1010  
  1011  	instanceMachines := make(map[instance.Id]string)
  1012  	for _, machine := range task.machines {
  1013  		instId, err := machine.InstanceId()
  1014  		if err != nil {
  1015  			continue
  1016  		}
  1017  		instanceMachines[instId] = machine.Id()
  1018  	}
  1019  
  1020  	// Translate instance IDs to machines IDs to aid distributing
  1021  	// to-be-created instances across availability zones.
  1022  	task.availabilityZoneMachines = make([]*AvailabilityZoneMachine, len(availabilityZoneInstances))
  1023  	for i, azInstances := range availabilityZoneInstances {
  1024  		machineIds := set.NewStrings()
  1025  		for _, instanceId := range azInstances.Instances {
  1026  			if id, ok := instanceMachines[instanceId]; ok {
  1027  				machineIds.Add(id)
  1028  			}
  1029  		}
  1030  		task.availabilityZoneMachines[i] = &AvailabilityZoneMachine{
  1031  			ZoneName:           azInstances.ZoneName,
  1032  			MachineIds:         machineIds,
  1033  			FailedMachineIds:   set.NewStrings(),
  1034  			ExcludedMachineIds: set.NewStrings(),
  1035  		}
  1036  	}
  1037  	return nil
  1038  }
  1039  
  1040  // checkProviderAvailabilityZones queries the known AZs.
  1041  // If any are missing from the AZ-machines slice, add them.
  1042  // If we have entries that are not known by the provider to be available zones,
  1043  // check whether we have machines there.
  1044  // If so, log a warning, otherwise we can delete them safely.
  1045  func (task *provisionerTask) checkProviderAvailabilityZones(
  1046  	ctx context.ProviderCallContext, zonedEnv providercommon.ZonedEnviron,
  1047  ) error {
  1048  	azs, err := zonedEnv.AvailabilityZones(ctx)
  1049  	if err != nil {
  1050  		return errors.Trace(err)
  1051  	}
  1052  
  1053  	zones := set.NewStrings()
  1054  	for _, z := range azs {
  1055  		if z.Available() {
  1056  			zones.Add(z.Name())
  1057  		}
  1058  	}
  1059  
  1060  	// Process all the zones that the provisioner knows about.
  1061  	newAZMs := task.availabilityZoneMachines[:0]
  1062  	for _, azm := range task.availabilityZoneMachines {
  1063  		// Provider has the zone as available, and we know it. All good.
  1064  		if zones.Contains(azm.ZoneName) {
  1065  			newAZMs = append(newAZMs, azm)
  1066  			zones.Remove(azm.ZoneName)
  1067  			continue
  1068  		}
  1069  
  1070  		// If the zone isn't available, but we think we have machines there,
  1071  		// play it safe and retain the entry.
  1072  		if len(azm.MachineIds) > 0 {
  1073  			task.logger.Warningf("machines %v are in zone %q, which is not available, or not known by the cloud",
  1074  				azm.MachineIds.Values(), azm.ZoneName)
  1075  			newAZMs = append(newAZMs, azm)
  1076  		}
  1077  
  1078  		// Fallthrough is for the zone's entry to be dropped.
  1079  		// We don't retain it for newAZMs.
  1080  		// The new list is logged by the caller.
  1081  	}
  1082  	task.availabilityZoneMachines = newAZMs
  1083  
  1084  	// Add any remaining zones to the list.
  1085  	// Since this method is only called if we have previously populated the
  1086  	// zone-machines slice, we can't have provisioned machines in the zone yet.
  1087  	for _, z := range zones.Values() {
  1088  		task.availabilityZoneMachines = append(task.availabilityZoneMachines, &AvailabilityZoneMachine{
  1089  			ZoneName:           z,
  1090  			MachineIds:         set.NewStrings(),
  1091  			FailedMachineIds:   set.NewStrings(),
  1092  			ExcludedMachineIds: set.NewStrings(),
  1093  		})
  1094  	}
  1095  	return nil
  1096  }
  1097  
  1098  // populateDistributionGroupZoneMap returns a zone mapping which only includes
  1099  // machines in the same distribution group.  This is used to determine where new
  1100  // machines in that distribution group should be placed.
  1101  func (task *provisionerTask) populateDistributionGroupZoneMap(machineIds []string) []*AvailabilityZoneMachine {
  1102  	var dgAvailabilityZoneMachines []*AvailabilityZoneMachine
  1103  	dgSet := set.NewStrings(machineIds...)
  1104  	for _, azm := range task.availabilityZoneMachines {
  1105  		dgAvailabilityZoneMachines = append(dgAvailabilityZoneMachines, &AvailabilityZoneMachine{
  1106  			azm.ZoneName,
  1107  			azm.MachineIds.Intersection(dgSet),
  1108  			azm.FailedMachineIds,
  1109  			azm.ExcludedMachineIds,
  1110  		})
  1111  	}
  1112  	return dgAvailabilityZoneMachines
  1113  }
  1114  
  1115  // machineAvailabilityZoneDistribution returns a suggested availability zone
  1116  // for the specified machine to start in.
  1117  // If the current provider does not implement availability zones, "" and no
  1118  // error will be returned.
  1119  // Machines are spread across availability zones based on lowest population of
  1120  // the "available" zones, and any supplied zone constraints.
  1121  // Machines in the same DistributionGroup are placed in different zones,
  1122  // distributed based on lowest population of machines in that DistributionGroup.
  1123  // Machines are not placed in a zone they are excluded from.
  1124  // If availability zones are implemented and one isn't found, return NotFound error.
  1125  func (task *provisionerTask) machineAvailabilityZoneDistribution(
  1126  	machineId string, distGroupMachineIds []string, cons constraints.Value,
  1127  ) (string, error) {
  1128  	task.machinesMutex.Lock()
  1129  	defer task.machinesMutex.Unlock()
  1130  
  1131  	if len(task.availabilityZoneMachines) == 0 {
  1132  		return "", nil
  1133  	}
  1134  
  1135  	// Assign an initial zone to a machine based on lowest population,
  1136  	// accommodating any supplied zone constraints.
  1137  	// If the machine has a distribution group, assign based on lowest zone
  1138  	// population of the distribution group machine.
  1139  	// If more than one zone has the same number of machines, pick one of those at random.
  1140  	zoneMachines := task.availabilityZoneMachines
  1141  	if len(distGroupMachineIds) > 0 {
  1142  		zoneMachines = task.populateDistributionGroupZoneMap(distGroupMachineIds)
  1143  	}
  1144  
  1145  	// Make a map of zone machines keyed on count.
  1146  	zoneMap := make(map[int][]*AvailabilityZoneMachine)
  1147  	for _, zm := range zoneMachines {
  1148  		machineCount := zm.MachineIds.Size()
  1149  		zoneMap[machineCount] = append(zoneMap[machineCount], zm)
  1150  	}
  1151  	// Sort the counts we have by size so
  1152  	// we can process starting with the lowest.
  1153  	var zoneCounts []int
  1154  	for k := range zoneMap {
  1155  		zoneCounts = append(zoneCounts, k)
  1156  	}
  1157  	sort.Ints(zoneCounts)
  1158  
  1159  	var machineZone string
  1160  done:
  1161  	// Starting with the lowest count first, find a suitable AZ.
  1162  	for _, count := range zoneCounts {
  1163  		zmList := zoneMap[count]
  1164  		for len(zmList) > 0 {
  1165  			// Pick a random AZ to try.
  1166  			index := rand.Intn(len(zmList))
  1167  			zoneMachines := zmList[index]
  1168  			if !zoneMachines.MatchesConstraints(cons) {
  1169  				task.logger.Debugf("machine %s does not match az %s: constraints do not match",
  1170  					machineId, zoneMachines.ZoneName)
  1171  			} else if zoneMachines.FailedMachineIds.Contains(machineId) {
  1172  				task.logger.Debugf("machine %s does not match az %s: excluded in failed machine ids",
  1173  					machineId, zoneMachines.ZoneName)
  1174  			} else if zoneMachines.ExcludedMachineIds.Contains(machineId) {
  1175  				task.logger.Debugf("machine %s does not match az %s: excluded machine id",
  1176  					machineId, zoneMachines.ZoneName)
  1177  			} else {
  1178  				// Success, we're out of here.
  1179  				machineZone = zoneMachines.ZoneName
  1180  				break done
  1181  			}
  1182  			// Zone not suitable so remove it from the list and try the next one.
  1183  			zmList = append(zmList[:index], zmList[index+1:]...)
  1184  		}
  1185  	}
  1186  
  1187  	if machineZone == "" {
  1188  		return machineZone, errors.NotFoundf("suitable availability zone for machine %v", machineId)
  1189  	}
  1190  
  1191  	for _, zoneMachines := range task.availabilityZoneMachines {
  1192  		if zoneMachines.ZoneName == machineZone {
  1193  			zoneMachines.MachineIds.Add(machineId)
  1194  			break
  1195  		}
  1196  	}
  1197  	return machineZone, nil
  1198  }
  1199  
  1200  // queueStartMachines resolves the distribution groups for the provided
  1201  // machines and enqueues a request for starting each one. If the distribution
  1202  // group resolution fails for a particular machine, the method will set the
  1203  // machine status and immediately return with an error if that operation fails.
  1204  // Any provisioning-related errors are reported asynchronously by the worker
  1205  // pool.
  1206  func (task *provisionerTask) queueStartMachines(ctx context.ProviderCallContext, machines []apiprovisioner.MachineProvisioner) error {
  1207  	if len(machines) == 0 {
  1208  		return nil
  1209  	}
  1210  
  1211  	// Get the distributionGroups for each machine now to avoid
  1212  	// successive calls to DistributionGroupByMachineId which will
  1213  	// return the same data.
  1214  	machineTags := make([]names.MachineTag, len(machines))
  1215  	for i, machine := range machines {
  1216  		machineTags[i] = machine.MachineTag()
  1217  	}
  1218  	machineDistributionGroups, err := task.distributionGroupFinder.DistributionGroupByMachineId(machineTags...)
  1219  	if err != nil {
  1220  		return errors.Trace(err)
  1221  	}
  1222  
  1223  	// Get all the provisioning info at once, so that we don't make many
  1224  	// singular requests in parallel to an API that supports batching.
  1225  	// key the results by machine IDs for retrieval in the loop below.
  1226  	// We rely here on the API guarantee - that the returned results are
  1227  	// ordered to correspond to the call arguments.
  1228  	pInfoResults, err := task.taskAPI.ProvisioningInfo(machineTags)
  1229  	if err != nil {
  1230  		return errors.Trace(err)
  1231  	}
  1232  	pInfoMap := make(map[string]params.ProvisioningInfoResult, len(pInfoResults.Results))
  1233  	for i, tag := range machineTags {
  1234  		pInfoMap[tag.Id()] = pInfoResults.Results[i]
  1235  	}
  1236  
  1237  	for i, m := range machines {
  1238  		if machineDistributionGroups[i].Err != nil {
  1239  			if err := task.setErrorStatus("fetching distribution groups for machine %q: %v", m, machineDistributionGroups[i].Err); err != nil {
  1240  				return errors.Trace(err)
  1241  			}
  1242  			continue
  1243  		}
  1244  
  1245  		// Create and enqueue start instance request.  Keep track of
  1246  		// the pending request so that if a deletion request comes in
  1247  		// before the machine has completed provisioning we can defer
  1248  		// it until it does.
  1249  		task.machinesMutex.Lock()
  1250  		task.machinesStarting[m.Id()] = true
  1251  		task.machinesMutex.Unlock()
  1252  
  1253  		// Reassign the loop variable to prevent
  1254  		// overwriting the dispatched references.
  1255  		machine := m
  1256  		distGroup := machineDistributionGroups[i].MachineIds
  1257  
  1258  		provTask := workerpool.Task{
  1259  			Type: fmt.Sprintf("start-instance %s", machine.Id()),
  1260  			Process: func() error {
  1261  				machID := machine.Id()
  1262  
  1263  				if provisionErr := task.doStartMachine(ctx, machine, distGroup, pInfoMap[machID]); provisionErr != nil {
  1264  					return provisionErr
  1265  				}
  1266  
  1267  				task.machinesMutex.Lock()
  1268  				delete(task.machinesStarting, machID)
  1269  				// If the provisioning succeeded but a deletion
  1270  				// request has been deferred queue it now.
  1271  				stopDeferred := task.machinesStopDeferred[machID]
  1272  				if stopDeferred {
  1273  					delete(task.machinesStopDeferred, machID)
  1274  					task.machinesStopping[machID] = true
  1275  				}
  1276  				task.machinesMutex.Unlock()
  1277  
  1278  				if stopDeferred {
  1279  					task.logger.Debugf("triggering deferred stop of machine %q", machID)
  1280  					return task.queueRemovalOfDeadMachines(ctx, []apiprovisioner.MachineProvisioner{
  1281  						machine,
  1282  					})
  1283  				}
  1284  
  1285  				return nil
  1286  			},
  1287  		}
  1288  
  1289  		select {
  1290  		case task.wp.Queue() <- provTask:
  1291  			// successfully enqueued provision request
  1292  		case <-task.catacomb.Dying():
  1293  			return task.catacomb.ErrDying()
  1294  		case <-task.wp.Done():
  1295  			// Capture and surface asynchronous worker pool errors.
  1296  			return task.wp.Close()
  1297  		}
  1298  	}
  1299  
  1300  	return nil
  1301  }
  1302  
  1303  func (task *provisionerTask) setErrorStatus(msg string, machine apiprovisioner.MachineProvisioner, err error) error {
  1304  	task.logger.Errorf(msg, machine, err)
  1305  	errForStatus := errors.Cause(err)
  1306  	if err2 := machine.SetInstanceStatus(status.ProvisioningError, errForStatus.Error(), nil); err2 != nil {
  1307  		// Something is wrong with this machine, better report it back.
  1308  		return errors.Annotatef(err2, "setting error status for machine %q", machine)
  1309  	}
  1310  	return nil
  1311  }
  1312  
  1313  func (task *provisionerTask) doStartMachine(
  1314  	ctx context.ProviderCallContext,
  1315  	machine apiprovisioner.MachineProvisioner,
  1316  	distributionGroupMachineIds []string,
  1317  	pInfoResult params.ProvisioningInfoResult,
  1318  ) (startErr error) {
  1319  	defer func() {
  1320  		if startErr == nil {
  1321  			return
  1322  		}
  1323  
  1324  		// Mask the error if the machine has the deferred stop flag set.
  1325  		// A stop request will be triggered immediately once this
  1326  		// method returns.
  1327  		task.machinesMutex.RLock()
  1328  		defer task.machinesMutex.RUnlock()
  1329  		machID := machine.Id()
  1330  		if task.machinesStopDeferred[machID] {
  1331  			task.logger.Tracef("doStartMachine: ignoring doStartMachine error (%v) for machine %q; machine has been marked dead while it was being started and has the deferred stop flag set", startErr, machID)
  1332  			startErr = nil
  1333  		}
  1334  	}()
  1335  
  1336  	if err := machine.SetInstanceStatus(status.Provisioning, "starting", nil); err != nil {
  1337  		task.logger.Errorf("%v", err)
  1338  	}
  1339  
  1340  	v, err := machine.ModelAgentVersion()
  1341  	if err != nil {
  1342  		return errors.Trace(err)
  1343  	}
  1344  
  1345  	startInstanceParams, err := task.setupToStartMachine(machine, v, pInfoResult)
  1346  	if err != nil {
  1347  		return errors.Trace(task.setErrorStatus("%v %v", machine, err))
  1348  	}
  1349  
  1350  	// Figure out if the zones available to use for a new instance are
  1351  	// restricted based on placement, and if so exclude those machines
  1352  	// from being started in any other zone.
  1353  	if err := task.populateExcludedMachines(ctx, machine.Id(), startInstanceParams); err != nil {
  1354  		return errors.Trace(err)
  1355  	}
  1356  
  1357  	// TODO ProvisionerParallelization 2017-10-03
  1358  	// Improve the retry loop, newer methodology
  1359  	// Is rate limiting handled correctly?
  1360  	var result *environs.StartInstanceResult
  1361  
  1362  	// Attempt creating the instance "retryCount" times. If the provider
  1363  	// supports availability zones and we're automatically distributing
  1364  	// across the zones, then we try each zone for every attempt, or until
  1365  	// one of the StartInstance calls returns an error satisfying
  1366  	// Is(err, environs.ErrAvailabilityZoneIndependent)
  1367  	for attemptsLeft := task.retryStartInstanceStrategy.retryCount; attemptsLeft >= 0; {
  1368  		if startInstanceParams.AvailabilityZone, err = task.machineAvailabilityZoneDistribution(
  1369  			machine.Id(), distributionGroupMachineIds, startInstanceParams.Constraints,
  1370  		); err != nil {
  1371  			return task.setErrorStatus("cannot start instance for machine %q: %v", machine, err)
  1372  		}
  1373  		if startInstanceParams.AvailabilityZone != "" {
  1374  			task.logger.Infof("trying machine %s StartInstance in availability zone %s",
  1375  				machine, startInstanceParams.AvailabilityZone)
  1376  		}
  1377  
  1378  		attemptResult, err := task.broker.StartInstance(ctx, startInstanceParams)
  1379  		if err == nil {
  1380  			result = attemptResult
  1381  			break
  1382  		} else if attemptsLeft <= 0 {
  1383  			// Set the state to error, so the machine will be skipped
  1384  			// next time until the error is resolved.
  1385  			task.removeMachineFromAZMap(machine)
  1386  			return task.setErrorStatus("cannot start instance for machine %q: %v", machine, err)
  1387  		} else {
  1388  			if startInstanceParams.AvailabilityZone != "" {
  1389  				task.logger.Warningf("machine %s failed to start in availability zone %s: %v",
  1390  					machine, startInstanceParams.AvailabilityZone, err)
  1391  			} else {
  1392  				task.logger.Warningf("machine %s failed to start: %v", machine, err)
  1393  			}
  1394  		}
  1395  
  1396  		retrying := true
  1397  		retryMsg := ""
  1398  		if startInstanceParams.AvailabilityZone != "" && !errors.Is(err, environs.ErrAvailabilityZoneIndependent) {
  1399  			// We've specified a zone, and the error may be specific to
  1400  			// that zone. Retry in another zone if there are any untried.
  1401  			azRemaining, err2 := task.markMachineFailedInAZ(machine,
  1402  				startInstanceParams.AvailabilityZone, startInstanceParams.Constraints)
  1403  			if err2 != nil {
  1404  				if err = task.setErrorStatus("cannot start instance: %v", machine, err2); err != nil {
  1405  					task.logger.Errorf("setting error status: %s", err)
  1406  				}
  1407  				return err2
  1408  			}
  1409  			if azRemaining {
  1410  				retryMsg = fmt.Sprintf(
  1411  					"failed to start machine %s in zone %q, retrying in %v with new availability zone: %s",
  1412  					machine, startInstanceParams.AvailabilityZone,
  1413  					task.retryStartInstanceStrategy.retryDelay, err,
  1414  				)
  1415  				task.logger.Debugf("%s", retryMsg)
  1416  				// There's still more zones to try, so don't decrement "attemptsLeft" yet.
  1417  				retrying = false
  1418  			} else {
  1419  				// All availability zones have been attempted for this iteration,
  1420  				// clear the failures for the next time around. A given zone may
  1421  				// succeed after a prior failure.
  1422  				task.clearMachineAZFailures(machine)
  1423  			}
  1424  		}
  1425  		if retrying {
  1426  			retryMsg = fmt.Sprintf(
  1427  				"failed to start machine %s (%s), retrying in %v (%d more attempts)",
  1428  				machine, err.Error(), task.retryStartInstanceStrategy.retryDelay, attemptsLeft,
  1429  			)
  1430  			task.logger.Warningf("%s", retryMsg)
  1431  			attemptsLeft--
  1432  		}
  1433  
  1434  		if err3 := machine.SetInstanceStatus(status.Provisioning, retryMsg, nil); err3 != nil {
  1435  			task.logger.Warningf("failed to set instance status: %v", err3)
  1436  		}
  1437  
  1438  		select {
  1439  		case <-task.catacomb.Dying():
  1440  			return task.catacomb.ErrDying()
  1441  		case <-time.After(task.retryStartInstanceStrategy.retryDelay):
  1442  		}
  1443  	}
  1444  
  1445  	networkConfig := params.NetworkConfigFromInterfaceInfo(result.NetworkInfo)
  1446  	volumes := volumesToAPIServer(result.Volumes)
  1447  	volumeNameToAttachmentInfo := volumeAttachmentsToAPIServer(result.VolumeAttachments)
  1448  	instanceID := result.Instance.Id()
  1449  
  1450  	// Gather the charm LXD profile names, including the lxd profile names from
  1451  	// the container brokers.
  1452  	charmLXDProfiles, err := task.gatherCharmLXDProfiles(
  1453  		string(instanceID), machine.Tag().Id(), startInstanceParams.CharmLXDProfiles)
  1454  	if err != nil {
  1455  		return errors.Trace(err)
  1456  	}
  1457  
  1458  	if err := machine.SetInstanceInfo(
  1459  		instanceID,
  1460  		result.DisplayName,
  1461  		startInstanceParams.InstanceConfig.MachineNonce,
  1462  		result.Hardware,
  1463  		networkConfig,
  1464  		volumes,
  1465  		volumeNameToAttachmentInfo,
  1466  		charmLXDProfiles,
  1467  	); err != nil {
  1468  		// We need to stop the instance right away here, set error status and go on.
  1469  		if err2 := task.setErrorStatus("cannot register instance for machine %v: %v", machine, err); err2 != nil {
  1470  			task.logger.Errorf("%v", errors.Annotate(err2, "setting machine status"))
  1471  		}
  1472  		if err2 := task.broker.StopInstances(ctx, instanceID); err2 != nil {
  1473  			task.logger.Errorf("%v", errors.Annotate(err2, "after failing to set instance info"))
  1474  		}
  1475  		return errors.Annotate(err, "setting instance info")
  1476  	}
  1477  
  1478  	task.logger.Infof(
  1479  		"started machine %s as instance %s with hardware %q, network config %+v, "+
  1480  			"volumes %v, volume attachments %v, subnets to zones %v, lxd profiles %v",
  1481  		machine,
  1482  		instanceID,
  1483  		result.Hardware,
  1484  		networkConfig,
  1485  		volumes,
  1486  		volumeNameToAttachmentInfo,
  1487  		startInstanceParams.SubnetsToZones,
  1488  		startInstanceParams.CharmLXDProfiles,
  1489  	)
  1490  	return nil
  1491  }
  1492  
  1493  // setupToStartMachine gathers the necessary information,
  1494  // based on the specified machine, to create ProvisioningInfo
  1495  // and StartInstanceParams to be used by startMachine.
  1496  func (task *provisionerTask) setupToStartMachine(
  1497  	machine apiprovisioner.MachineProvisioner, version *version.Number, pInfoResult params.ProvisioningInfoResult,
  1498  ) (environs.StartInstanceParams, error) {
  1499  	// Check that we have a result.
  1500  	// We should never have an empty result without an error,
  1501  	// but we guard for that conservatively.
  1502  	if pInfoResult.Error != nil {
  1503  		return environs.StartInstanceParams{}, *pInfoResult.Error
  1504  	}
  1505  	pInfo := pInfoResult.Result
  1506  	if pInfo == nil {
  1507  		return environs.StartInstanceParams{}, errors.Errorf("no provisioning info for machine %q", machine.Id())
  1508  	}
  1509  
  1510  	instanceCfg, err := task.constructInstanceConfig(machine, task.auth, pInfo)
  1511  	if err != nil {
  1512  		return environs.StartInstanceParams{}, errors.Annotatef(err, "creating instance config for machine %q", machine)
  1513  	}
  1514  
  1515  	// We default to amd64 unless otherwise specified.
  1516  	agentArch := arch.DefaultArchitecture
  1517  	if pInfo.Constraints.Arch != nil {
  1518  		agentArch = *pInfo.Constraints.Arch
  1519  	}
  1520  
  1521  	possibleTools, err := task.toolsFinder.FindTools(*version, pInfo.Base.Name, agentArch)
  1522  	if err != nil {
  1523  		return environs.StartInstanceParams{}, errors.Annotatef(err, "finding agent binaries for machine %q", machine)
  1524  	}
  1525  
  1526  	startInstanceParams, err := task.constructStartInstanceParams(
  1527  		task.controllerUUID,
  1528  		machine,
  1529  		instanceCfg,
  1530  		pInfo,
  1531  		possibleTools,
  1532  	)
  1533  	if err != nil {
  1534  		return environs.StartInstanceParams{}, errors.Annotatef(err, "constructing params for machine %q", machine)
  1535  	}
  1536  
  1537  	return startInstanceParams, nil
  1538  }
  1539  
  1540  // populateExcludedMachines, translates the results of DeriveAvailabilityZones
  1541  // into availabilityZoneMachines.ExcludedMachineIds for machines not to be used
  1542  // in the given zone.
  1543  func (task *provisionerTask) populateExcludedMachines(ctx context.ProviderCallContext, machineId string, startInstanceParams environs.StartInstanceParams) error {
  1544  	zonedEnv, ok := task.broker.(providercommon.ZonedEnviron)
  1545  	if !ok {
  1546  		return nil
  1547  	}
  1548  	derivedZones, err := zonedEnv.DeriveAvailabilityZones(ctx, startInstanceParams)
  1549  	if err != nil {
  1550  		return errors.Trace(err)
  1551  	}
  1552  	if len(derivedZones) == 0 {
  1553  		return nil
  1554  	}
  1555  	task.machinesMutex.Lock()
  1556  	defer task.machinesMutex.Unlock()
  1557  	useZones := set.NewStrings(derivedZones...)
  1558  	for _, zoneMachines := range task.availabilityZoneMachines {
  1559  		if !useZones.Contains(zoneMachines.ZoneName) {
  1560  			zoneMachines.ExcludedMachineIds.Add(machineId)
  1561  		}
  1562  	}
  1563  	return nil
  1564  }
  1565  
  1566  // gatherCharmLXDProfiles consumes the charms LXD Profiles from the different
  1567  // sources. This includes getting the information from the broker.
  1568  func (task *provisionerTask) gatherCharmLXDProfiles(
  1569  	instanceID, machineTag string, machineProfiles []string,
  1570  ) ([]string, error) {
  1571  	if !names.IsContainerMachine(machineTag) {
  1572  		return machineProfiles, nil
  1573  	}
  1574  
  1575  	manager, ok := task.broker.(container.LXDProfileNameRetriever)
  1576  	if !ok {
  1577  		task.logger.Tracef("failed to gather profile names, broker didn't conform to LXDProfileNameRetriever")
  1578  		return machineProfiles, nil
  1579  	}
  1580  
  1581  	profileNames, err := manager.LXDProfileNames(instanceID)
  1582  	if err != nil {
  1583  		return nil, errors.Trace(err)
  1584  	}
  1585  
  1586  	return lxdprofile.LXDProfileNames(profileNames), nil
  1587  }
  1588  
  1589  // markMachineFailedInAZ moves the machine in zone from MachineIds to FailedMachineIds
  1590  // in availabilityZoneMachines, report if there are any availability zones not failed for
  1591  // the specified machine.
  1592  func (task *provisionerTask) markMachineFailedInAZ(machine apiprovisioner.MachineProvisioner, zone string,
  1593  	cons constraints.Value) (bool, error) {
  1594  	if zone == "" {
  1595  		return false, errors.New("no zone provided")
  1596  	}
  1597  	task.machinesMutex.Lock()
  1598  	defer task.machinesMutex.Unlock()
  1599  	for _, zoneMachines := range task.availabilityZoneMachines {
  1600  		if zone == zoneMachines.ZoneName {
  1601  			zoneMachines.MachineIds.Remove(machine.Id())
  1602  			zoneMachines.FailedMachineIds.Add(machine.Id())
  1603  			break
  1604  		}
  1605  	}
  1606  
  1607  	// Check if there are any zones left to try (that also match constraints).
  1608  	for _, zoneMachines := range task.availabilityZoneMachines {
  1609  		if zoneMachines.MatchesConstraints(cons) &&
  1610  			!zoneMachines.FailedMachineIds.Contains(machine.Id()) &&
  1611  			!zoneMachines.ExcludedMachineIds.Contains(machine.Id()) {
  1612  			return true, nil
  1613  		}
  1614  	}
  1615  	return false, nil
  1616  }
  1617  
  1618  func (task *provisionerTask) clearMachineAZFailures(machine apiprovisioner.MachineProvisioner) {
  1619  	task.machinesMutex.Lock()
  1620  	defer task.machinesMutex.Unlock()
  1621  	for _, zoneMachines := range task.availabilityZoneMachines {
  1622  		zoneMachines.FailedMachineIds.Remove(machine.Id())
  1623  	}
  1624  }
  1625  
  1626  // removeMachineFromAZMap removes the specified machine from availabilityZoneMachines.
  1627  // It is assumed this is called when the machines are being deleted from state, or failed
  1628  // provisioning.
  1629  func (task *provisionerTask) removeMachineFromAZMap(machine apiprovisioner.MachineProvisioner) {
  1630  	machineId := machine.Id()
  1631  	task.machinesMutex.Lock()
  1632  	defer task.machinesMutex.Unlock()
  1633  	for _, zoneMachines := range task.availabilityZoneMachines {
  1634  		zoneMachines.MachineIds.Remove(machineId)
  1635  		zoneMachines.FailedMachineIds.Remove(machineId)
  1636  	}
  1637  }
  1638  
  1639  // subnetZonesFromNetworkTopology denormalises the topology passed from the API
  1640  // server into a slice of subnet to AZ list maps, one for each listed space.
  1641  func subnetZonesFromNetworkTopology(topology params.ProvisioningNetworkTopology) []map[network.Id][]string {
  1642  	if len(topology.SpaceSubnets) == 0 {
  1643  		return nil
  1644  	}
  1645  
  1646  	// We want to ensure consistent ordering of the return based on the spaces.
  1647  	spaceNames := make([]string, 0, len(topology.SpaceSubnets))
  1648  	for spaceName := range topology.SpaceSubnets {
  1649  		spaceNames = append(spaceNames, spaceName)
  1650  	}
  1651  	sort.Strings(spaceNames)
  1652  
  1653  	subnetsToZones := make([]map[network.Id][]string, 0, len(spaceNames))
  1654  	for _, spaceName := range spaceNames {
  1655  		subnetAZs := make(map[network.Id][]string)
  1656  		for _, subnet := range topology.SpaceSubnets[spaceName] {
  1657  			subnetAZs[network.Id(subnet)] = topology.SubnetAZs[subnet]
  1658  		}
  1659  		subnetsToZones = append(subnetsToZones, subnetAZs)
  1660  	}
  1661  	return subnetsToZones
  1662  }
  1663  
  1664  func volumesToAPIServer(volumes []storage.Volume) []params.Volume {
  1665  	result := make([]params.Volume, len(volumes))
  1666  	for i, v := range volumes {
  1667  		result[i] = params.Volume{
  1668  			VolumeTag: v.Tag.String(),
  1669  			Info: params.VolumeInfo{
  1670  				VolumeId:   v.VolumeId,
  1671  				HardwareId: v.HardwareId,
  1672  				WWN:        v.WWN, // pool
  1673  				Size:       v.Size,
  1674  				Persistent: v.Persistent,
  1675  			},
  1676  		}
  1677  	}
  1678  	return result
  1679  }
  1680  
  1681  func volumeAttachmentsToAPIServer(attachments []storage.VolumeAttachment) map[string]params.VolumeAttachmentInfo {
  1682  	result := make(map[string]params.VolumeAttachmentInfo)
  1683  	for _, a := range attachments {
  1684  
  1685  		// Volume attachment plans are used in the OCI provider where actions
  1686  		// are required on the instance itself in order to complete attachments
  1687  		// of SCSI volumes.
  1688  		// TODO (manadart 2020-02-04): I believe this code path to be untested.
  1689  		var planInfo *params.VolumeAttachmentPlanInfo
  1690  		if a.PlanInfo != nil {
  1691  			planInfo = &params.VolumeAttachmentPlanInfo{
  1692  				DeviceType:       a.PlanInfo.DeviceType,
  1693  				DeviceAttributes: a.PlanInfo.DeviceAttributes,
  1694  			}
  1695  		}
  1696  
  1697  		result[a.Volume.String()] = params.VolumeAttachmentInfo{
  1698  			DeviceName: a.DeviceName,
  1699  			DeviceLink: a.DeviceLink,
  1700  			BusAddress: a.BusAddress,
  1701  			ReadOnly:   a.ReadOnly,
  1702  			PlanInfo:   planInfo,
  1703  		}
  1704  	}
  1705  	return result
  1706  }