github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/worker/provisioner/provisioner.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package provisioner
     5  
     6  import (
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/juju/errors"
    11  	"github.com/juju/names/v5"
    12  	"github.com/juju/worker/v3"
    13  	"github.com/juju/worker/v3/catacomb"
    14  
    15  	"github.com/juju/juju/agent"
    16  	apiprovisioner "github.com/juju/juju/api/agent/provisioner"
    17  	"github.com/juju/juju/controller/authentication"
    18  	"github.com/juju/juju/core/instance"
    19  	"github.com/juju/juju/core/watcher"
    20  	"github.com/juju/juju/environs"
    21  	"github.com/juju/juju/environs/config"
    22  	"github.com/juju/juju/worker/common"
    23  )
    24  
    25  // Ensure our structs implement the required Provisioner interface.
    26  var _ Provisioner = (*environProvisioner)(nil)
    27  var _ Provisioner = (*containerProvisioner)(nil)
    28  
    29  var (
    30  	retryStrategyDelay = 10 * time.Second
    31  	retryStrategyCount = 10
    32  )
    33  
    34  // Provisioner represents a running provisioner worker.
    35  type Provisioner interface {
    36  	worker.Worker
    37  	getMachineWatcher() (watcher.StringsWatcher, error)
    38  	getRetryWatcher() (watcher.NotifyWatcher, error)
    39  }
    40  
    41  // environProvisioner represents a running provisioning worker for machine nodes
    42  // belonging to an environment.
    43  type environProvisioner struct {
    44  	provisioner
    45  	environ        environs.Environ
    46  	configObserver configObserver
    47  }
    48  
    49  // containerProvisioner represents a running provisioning worker for containers
    50  // hosted on a machine.
    51  type containerProvisioner struct {
    52  	provisioner
    53  	containerType  instance.ContainerType
    54  	machine        apiprovisioner.MachineProvisioner
    55  	configObserver configObserver
    56  }
    57  
    58  // provisioner providers common behaviour for a running provisioning worker.
    59  type provisioner struct {
    60  	Provisioner
    61  	st                      *apiprovisioner.State
    62  	agentConfig             agent.Config
    63  	logger                  Logger
    64  	broker                  environs.InstanceBroker
    65  	distributionGroupFinder DistributionGroupFinder
    66  	toolsFinder             ToolsFinder
    67  	catacomb                catacomb.Catacomb
    68  	callContextFunc         common.CloudCallContextFunc
    69  }
    70  
    71  // RetryStrategy defines the retry behavior when encountering a retryable
    72  // error during provisioning.
    73  //
    74  // TODO(katco): 2016-08-09: lp:1611427
    75  type RetryStrategy struct {
    76  	retryDelay time.Duration
    77  	retryCount int
    78  }
    79  
    80  // NewRetryStrategy returns a new retry strategy with the specified delay and
    81  // count for use with retryable provisioning errors.
    82  func NewRetryStrategy(delay time.Duration, count int) RetryStrategy {
    83  	return RetryStrategy{
    84  		retryDelay: delay,
    85  		retryCount: count,
    86  	}
    87  }
    88  
    89  // configObserver is implemented so that tests can see when the environment
    90  // configuration changes.
    91  // The catacomb is set in export_test to the provider's member.
    92  // This is used to prevent notify from blocking a provisioner that has had its
    93  // Kill method invoked.
    94  type configObserver struct {
    95  	sync.Mutex
    96  	observer chan<- *config.Config
    97  	catacomb *catacomb.Catacomb
    98  }
    99  
   100  // notify notifies the observer of a configuration change.
   101  func (o *configObserver) notify(cfg *config.Config) {
   102  	o.Lock()
   103  	if o.observer != nil {
   104  		select {
   105  		case o.observer <- cfg:
   106  		case <-o.catacomb.Dying():
   107  		}
   108  	}
   109  	o.Unlock()
   110  }
   111  
   112  // Kill implements worker.Worker.Kill.
   113  func (p *provisioner) Kill() {
   114  	p.catacomb.Kill(nil)
   115  }
   116  
   117  // Wait implements worker.Worker.Wait.
   118  func (p *provisioner) Wait() error {
   119  	return p.catacomb.Wait()
   120  }
   121  
   122  // getToolsFinder returns a ToolsFinder for the provided State.
   123  // This exists for mocking.
   124  var getToolsFinder = func(st *apiprovisioner.State) ToolsFinder {
   125  	return st
   126  }
   127  
   128  // getDistributionGroupFinder returns a DistributionGroupFinder
   129  // for the provided State. This exists for mocking.
   130  var getDistributionGroupFinder = func(st *apiprovisioner.State) DistributionGroupFinder {
   131  	return st
   132  }
   133  
   134  // getStartTask creates a new worker for the provisioner,
   135  func (p *provisioner) getStartTask(harvestMode config.HarvestMode, workerCount int) (ProvisionerTask, error) {
   136  	auth, err := authentication.NewAPIAuthenticator(p.st)
   137  	if err != nil {
   138  		return nil, err
   139  	}
   140  	// Start responding to changes in machines, and to any further updates
   141  	// to the environment config.
   142  	machineWatcher, err := p.getMachineWatcher()
   143  	if err != nil {
   144  		return nil, err
   145  	}
   146  	retryWatcher, err := p.getRetryWatcher()
   147  	if err != nil && !errors.IsNotImplemented(err) {
   148  		return nil, err
   149  	}
   150  	hostTag := p.agentConfig.Tag()
   151  	if kind := hostTag.Kind(); kind != names.ControllerAgentTagKind && kind != names.MachineTagKind {
   152  		return nil, errors.Errorf("agent's tag is not a machine or controller agent tag, got %T", hostTag)
   153  	}
   154  
   155  	modelCfg, err := p.st.ModelConfig()
   156  	if err != nil {
   157  		return nil, errors.Annotate(err, "could not retrieve the model config.")
   158  	}
   159  
   160  	controllerCfg, err := p.st.ControllerConfig()
   161  	if err != nil {
   162  		return nil, errors.Annotate(err, "could not retrieve the controller config.")
   163  	}
   164  
   165  	task, err := NewProvisionerTask(TaskConfig{
   166  		ControllerUUID:             controllerCfg.ControllerUUID(),
   167  		HostTag:                    hostTag,
   168  		Logger:                     p.logger,
   169  		HarvestMode:                harvestMode,
   170  		TaskAPI:                    p.st,
   171  		DistributionGroupFinder:    p.distributionGroupFinder,
   172  		ToolsFinder:                p.toolsFinder,
   173  		MachineWatcher:             machineWatcher,
   174  		RetryWatcher:               retryWatcher,
   175  		Broker:                     p.broker,
   176  		Auth:                       auth,
   177  		ImageStream:                modelCfg.ImageStream(),
   178  		RetryStartInstanceStrategy: RetryStrategy{retryDelay: retryStrategyDelay, retryCount: retryStrategyCount},
   179  		CloudCallContextFunc:       p.callContextFunc,
   180  		NumProvisionWorkers:        workerCount, // event callback is currently only being used by tests
   181  	})
   182  	if err != nil {
   183  		return nil, errors.Trace(err)
   184  	}
   185  	return task, nil
   186  }
   187  
   188  // NewEnvironProvisioner returns a new Provisioner for an environment.
   189  // When new machines are added to the state, it allocates instances
   190  // from the environment and allocates them to the new machines.
   191  func NewEnvironProvisioner(
   192  	st *apiprovisioner.State,
   193  	agentConfig agent.Config,
   194  	logger Logger,
   195  	environ environs.Environ,
   196  	credentialAPI common.CredentialAPI,
   197  ) (Provisioner, error) {
   198  	if logger == nil {
   199  		return nil, errors.NotValidf("missing logger")
   200  	}
   201  	p := &environProvisioner{
   202  		provisioner: provisioner{
   203  			st:                      st,
   204  			agentConfig:             agentConfig,
   205  			logger:                  logger,
   206  			toolsFinder:             getToolsFinder(st),
   207  			distributionGroupFinder: getDistributionGroupFinder(st),
   208  			callContextFunc:         common.NewCloudCallContextFunc(credentialAPI),
   209  		},
   210  		environ: environ,
   211  	}
   212  	p.Provisioner = p
   213  	p.broker = environ
   214  	logger.Tracef("Starting environ provisioner for %q", p.agentConfig.Tag())
   215  
   216  	err := catacomb.Invoke(catacomb.Plan{
   217  		Site: &p.catacomb,
   218  		Work: p.loop,
   219  	})
   220  	if err != nil {
   221  		return nil, errors.Trace(err)
   222  	}
   223  	return p, nil
   224  }
   225  
   226  func (p *environProvisioner) loop() error {
   227  	// TODO(mjs channeling axw) - It would be better if there were
   228  	// APIs to watch and fetch provisioner specific config instead of
   229  	// watcher for all changes to model config. This would avoid the
   230  	// need for a full model config.
   231  	var modelConfigChanges <-chan struct{}
   232  	modelWatcher, err := p.st.WatchForModelConfigChanges()
   233  	if err != nil {
   234  		return loggedErrorStack(p.logger, errors.Trace(err))
   235  	}
   236  	if err := p.catacomb.Add(modelWatcher); err != nil {
   237  		return errors.Trace(err)
   238  	}
   239  	modelConfigChanges = modelWatcher.Changes()
   240  
   241  	modelConfig := p.environ.Config()
   242  	p.configObserver.notify(modelConfig)
   243  	harvestMode := modelConfig.ProvisionerHarvestMode()
   244  	workerCount := modelConfig.NumProvisionWorkers()
   245  	task, err := p.getStartTask(harvestMode, workerCount)
   246  	if err != nil {
   247  		return loggedErrorStack(p.logger, errors.Trace(err))
   248  	}
   249  	if err := p.catacomb.Add(task); err != nil {
   250  		return errors.Trace(err)
   251  	}
   252  
   253  	for {
   254  		select {
   255  		case <-p.catacomb.Dying():
   256  			return p.catacomb.ErrDying()
   257  		case _, ok := <-modelConfigChanges:
   258  			if !ok {
   259  				return errors.New("model configuration watcher closed")
   260  			}
   261  			modelConfig, err := p.st.ModelConfig()
   262  			if err != nil {
   263  				return errors.Annotate(err, "cannot load model configuration")
   264  			}
   265  			if err := p.setConfig(modelConfig); err != nil {
   266  				return errors.Annotate(err, "loaded invalid model configuration")
   267  			}
   268  			task.SetHarvestMode(modelConfig.ProvisionerHarvestMode())
   269  			task.SetNumProvisionWorkers(modelConfig.NumProvisionWorkers())
   270  		}
   271  	}
   272  }
   273  
   274  func (p *environProvisioner) getMachineWatcher() (watcher.StringsWatcher, error) {
   275  	return p.st.WatchModelMachines()
   276  }
   277  
   278  func (p *environProvisioner) getRetryWatcher() (watcher.NotifyWatcher, error) {
   279  	return p.st.WatchMachineErrorRetry()
   280  }
   281  
   282  // setConfig updates the environment configuration and notifies
   283  // the config observer.
   284  func (p *environProvisioner) setConfig(modelConfig *config.Config) error {
   285  	if err := p.environ.SetConfig(modelConfig); err != nil {
   286  		return errors.Trace(err)
   287  	}
   288  	p.configObserver.notify(modelConfig)
   289  	return nil
   290  }
   291  
   292  // NewContainerProvisioner returns a new Provisioner. When new machines
   293  // are added to the state, it allocates instances from the environment
   294  // and allocates them to the new machines.
   295  func NewContainerProvisioner(
   296  	containerType instance.ContainerType,
   297  	st *apiprovisioner.State,
   298  	logger Logger,
   299  	agentConfig agent.Config,
   300  	broker environs.InstanceBroker,
   301  	toolsFinder ToolsFinder,
   302  	distributionGroupFinder DistributionGroupFinder,
   303  	credentialAPI common.CredentialAPI,
   304  ) (Provisioner, error) {
   305  	p := &containerProvisioner{
   306  		provisioner: provisioner{
   307  			st:                      st,
   308  			agentConfig:             agentConfig,
   309  			logger:                  logger,
   310  			broker:                  broker,
   311  			toolsFinder:             toolsFinder,
   312  			distributionGroupFinder: distributionGroupFinder,
   313  			callContextFunc:         common.NewCloudCallContextFunc(credentialAPI),
   314  		},
   315  		containerType: containerType,
   316  	}
   317  	p.Provisioner = p
   318  	logger.Tracef("Starting %s provisioner for %q", p.containerType, p.agentConfig.Tag())
   319  
   320  	err := catacomb.Invoke(catacomb.Plan{
   321  		Site: &p.catacomb,
   322  		Work: p.loop,
   323  	})
   324  	if err != nil {
   325  		return nil, errors.Trace(err)
   326  	}
   327  	return p, nil
   328  }
   329  
   330  func (p *containerProvisioner) loop() error {
   331  	modelWatcher, err := p.st.WatchForModelConfigChanges()
   332  	if err != nil {
   333  		return errors.Trace(err)
   334  	}
   335  	if err := p.catacomb.Add(modelWatcher); err != nil {
   336  		return errors.Trace(err)
   337  	}
   338  
   339  	modelConfig, err := p.st.ModelConfig()
   340  	if err != nil {
   341  		return errors.Trace(err)
   342  	}
   343  	p.configObserver.notify(modelConfig)
   344  	harvestMode := modelConfig.ProvisionerHarvestMode()
   345  	workerCount := modelConfig.NumContainerProvisionWorkers()
   346  
   347  	task, err := p.getStartTask(harvestMode, workerCount)
   348  	if err != nil {
   349  		return loggedErrorStack(p.logger, errors.Trace(err))
   350  	}
   351  	if err := p.catacomb.Add(task); err != nil {
   352  		return errors.Trace(err)
   353  	}
   354  
   355  	for {
   356  		select {
   357  		case <-p.catacomb.Dying():
   358  			return p.catacomb.ErrDying()
   359  		case _, ok := <-modelWatcher.Changes():
   360  			if !ok {
   361  				return errors.New("model configuration watch closed")
   362  			}
   363  			modelConfig, err := p.st.ModelConfig()
   364  			if err != nil {
   365  				return errors.Annotate(err, "cannot load model configuration")
   366  			}
   367  			p.configObserver.notify(modelConfig)
   368  			task.SetHarvestMode(modelConfig.ProvisionerHarvestMode())
   369  			task.SetNumProvisionWorkers(modelConfig.NumContainerProvisionWorkers())
   370  		}
   371  	}
   372  }
   373  
   374  func (p *containerProvisioner) getMachine() (apiprovisioner.MachineProvisioner, error) {
   375  	if p.machine == nil {
   376  		tag := p.agentConfig.Tag()
   377  		machineTag, ok := tag.(names.MachineTag)
   378  		if !ok {
   379  			return nil, errors.Errorf("expected names.MachineTag, got %T", tag)
   380  		}
   381  		result, err := p.st.Machines(machineTag)
   382  		if err != nil {
   383  			p.logger.Errorf("error retrieving %s from state", machineTag)
   384  			return nil, err
   385  		}
   386  		if result[0].Err != nil {
   387  			p.logger.Errorf("%s is not in state", machineTag)
   388  			return nil, err
   389  		}
   390  		p.machine = result[0].Machine
   391  	}
   392  	return p.machine, nil
   393  }
   394  
   395  func (p *containerProvisioner) getMachineWatcher() (watcher.StringsWatcher, error) {
   396  	machine, err := p.getMachine()
   397  	if err != nil {
   398  		return nil, err
   399  	}
   400  	return machine.WatchContainers(p.containerType)
   401  }
   402  
   403  func (p *containerProvisioner) getRetryWatcher() (watcher.NotifyWatcher, error) {
   404  	return nil, errors.NotImplementedf("getRetryWatcher")
   405  }