github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/worker/provisioner/provisioner.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package provisioner 5 6 import ( 7 "sync" 8 "time" 9 10 "github.com/juju/errors" 11 "github.com/juju/names/v5" 12 "github.com/juju/worker/v3" 13 "github.com/juju/worker/v3/catacomb" 14 15 "github.com/juju/juju/agent" 16 apiprovisioner "github.com/juju/juju/api/agent/provisioner" 17 "github.com/juju/juju/controller/authentication" 18 "github.com/juju/juju/core/instance" 19 "github.com/juju/juju/core/watcher" 20 "github.com/juju/juju/environs" 21 "github.com/juju/juju/environs/config" 22 "github.com/juju/juju/worker/common" 23 ) 24 25 // Ensure our structs implement the required Provisioner interface. 26 var _ Provisioner = (*environProvisioner)(nil) 27 var _ Provisioner = (*containerProvisioner)(nil) 28 29 var ( 30 retryStrategyDelay = 10 * time.Second 31 retryStrategyCount = 10 32 ) 33 34 // Provisioner represents a running provisioner worker. 35 type Provisioner interface { 36 worker.Worker 37 getMachineWatcher() (watcher.StringsWatcher, error) 38 getRetryWatcher() (watcher.NotifyWatcher, error) 39 } 40 41 // environProvisioner represents a running provisioning worker for machine nodes 42 // belonging to an environment. 43 type environProvisioner struct { 44 provisioner 45 environ environs.Environ 46 configObserver configObserver 47 } 48 49 // containerProvisioner represents a running provisioning worker for containers 50 // hosted on a machine. 51 type containerProvisioner struct { 52 provisioner 53 containerType instance.ContainerType 54 machine apiprovisioner.MachineProvisioner 55 configObserver configObserver 56 } 57 58 // provisioner providers common behaviour for a running provisioning worker. 59 type provisioner struct { 60 Provisioner 61 st *apiprovisioner.State 62 agentConfig agent.Config 63 logger Logger 64 broker environs.InstanceBroker 65 distributionGroupFinder DistributionGroupFinder 66 toolsFinder ToolsFinder 67 catacomb catacomb.Catacomb 68 callContextFunc common.CloudCallContextFunc 69 } 70 71 // RetryStrategy defines the retry behavior when encountering a retryable 72 // error during provisioning. 73 // 74 // TODO(katco): 2016-08-09: lp:1611427 75 type RetryStrategy struct { 76 retryDelay time.Duration 77 retryCount int 78 } 79 80 // NewRetryStrategy returns a new retry strategy with the specified delay and 81 // count for use with retryable provisioning errors. 82 func NewRetryStrategy(delay time.Duration, count int) RetryStrategy { 83 return RetryStrategy{ 84 retryDelay: delay, 85 retryCount: count, 86 } 87 } 88 89 // configObserver is implemented so that tests can see when the environment 90 // configuration changes. 91 // The catacomb is set in export_test to the provider's member. 92 // This is used to prevent notify from blocking a provisioner that has had its 93 // Kill method invoked. 94 type configObserver struct { 95 sync.Mutex 96 observer chan<- *config.Config 97 catacomb *catacomb.Catacomb 98 } 99 100 // notify notifies the observer of a configuration change. 101 func (o *configObserver) notify(cfg *config.Config) { 102 o.Lock() 103 if o.observer != nil { 104 select { 105 case o.observer <- cfg: 106 case <-o.catacomb.Dying(): 107 } 108 } 109 o.Unlock() 110 } 111 112 // Kill implements worker.Worker.Kill. 113 func (p *provisioner) Kill() { 114 p.catacomb.Kill(nil) 115 } 116 117 // Wait implements worker.Worker.Wait. 118 func (p *provisioner) Wait() error { 119 return p.catacomb.Wait() 120 } 121 122 // getToolsFinder returns a ToolsFinder for the provided State. 123 // This exists for mocking. 124 var getToolsFinder = func(st *apiprovisioner.State) ToolsFinder { 125 return st 126 } 127 128 // getDistributionGroupFinder returns a DistributionGroupFinder 129 // for the provided State. This exists for mocking. 130 var getDistributionGroupFinder = func(st *apiprovisioner.State) DistributionGroupFinder { 131 return st 132 } 133 134 // getStartTask creates a new worker for the provisioner, 135 func (p *provisioner) getStartTask(harvestMode config.HarvestMode, workerCount int) (ProvisionerTask, error) { 136 auth, err := authentication.NewAPIAuthenticator(p.st) 137 if err != nil { 138 return nil, err 139 } 140 // Start responding to changes in machines, and to any further updates 141 // to the environment config. 142 machineWatcher, err := p.getMachineWatcher() 143 if err != nil { 144 return nil, err 145 } 146 retryWatcher, err := p.getRetryWatcher() 147 if err != nil && !errors.IsNotImplemented(err) { 148 return nil, err 149 } 150 hostTag := p.agentConfig.Tag() 151 if kind := hostTag.Kind(); kind != names.ControllerAgentTagKind && kind != names.MachineTagKind { 152 return nil, errors.Errorf("agent's tag is not a machine or controller agent tag, got %T", hostTag) 153 } 154 155 modelCfg, err := p.st.ModelConfig() 156 if err != nil { 157 return nil, errors.Annotate(err, "could not retrieve the model config.") 158 } 159 160 controllerCfg, err := p.st.ControllerConfig() 161 if err != nil { 162 return nil, errors.Annotate(err, "could not retrieve the controller config.") 163 } 164 165 task, err := NewProvisionerTask(TaskConfig{ 166 ControllerUUID: controllerCfg.ControllerUUID(), 167 HostTag: hostTag, 168 Logger: p.logger, 169 HarvestMode: harvestMode, 170 TaskAPI: p.st, 171 DistributionGroupFinder: p.distributionGroupFinder, 172 ToolsFinder: p.toolsFinder, 173 MachineWatcher: machineWatcher, 174 RetryWatcher: retryWatcher, 175 Broker: p.broker, 176 Auth: auth, 177 ImageStream: modelCfg.ImageStream(), 178 RetryStartInstanceStrategy: RetryStrategy{retryDelay: retryStrategyDelay, retryCount: retryStrategyCount}, 179 CloudCallContextFunc: p.callContextFunc, 180 NumProvisionWorkers: workerCount, // event callback is currently only being used by tests 181 }) 182 if err != nil { 183 return nil, errors.Trace(err) 184 } 185 return task, nil 186 } 187 188 // NewEnvironProvisioner returns a new Provisioner for an environment. 189 // When new machines are added to the state, it allocates instances 190 // from the environment and allocates them to the new machines. 191 func NewEnvironProvisioner( 192 st *apiprovisioner.State, 193 agentConfig agent.Config, 194 logger Logger, 195 environ environs.Environ, 196 credentialAPI common.CredentialAPI, 197 ) (Provisioner, error) { 198 if logger == nil { 199 return nil, errors.NotValidf("missing logger") 200 } 201 p := &environProvisioner{ 202 provisioner: provisioner{ 203 st: st, 204 agentConfig: agentConfig, 205 logger: logger, 206 toolsFinder: getToolsFinder(st), 207 distributionGroupFinder: getDistributionGroupFinder(st), 208 callContextFunc: common.NewCloudCallContextFunc(credentialAPI), 209 }, 210 environ: environ, 211 } 212 p.Provisioner = p 213 p.broker = environ 214 logger.Tracef("Starting environ provisioner for %q", p.agentConfig.Tag()) 215 216 err := catacomb.Invoke(catacomb.Plan{ 217 Site: &p.catacomb, 218 Work: p.loop, 219 }) 220 if err != nil { 221 return nil, errors.Trace(err) 222 } 223 return p, nil 224 } 225 226 func (p *environProvisioner) loop() error { 227 // TODO(mjs channeling axw) - It would be better if there were 228 // APIs to watch and fetch provisioner specific config instead of 229 // watcher for all changes to model config. This would avoid the 230 // need for a full model config. 231 var modelConfigChanges <-chan struct{} 232 modelWatcher, err := p.st.WatchForModelConfigChanges() 233 if err != nil { 234 return loggedErrorStack(p.logger, errors.Trace(err)) 235 } 236 if err := p.catacomb.Add(modelWatcher); err != nil { 237 return errors.Trace(err) 238 } 239 modelConfigChanges = modelWatcher.Changes() 240 241 modelConfig := p.environ.Config() 242 p.configObserver.notify(modelConfig) 243 harvestMode := modelConfig.ProvisionerHarvestMode() 244 workerCount := modelConfig.NumProvisionWorkers() 245 task, err := p.getStartTask(harvestMode, workerCount) 246 if err != nil { 247 return loggedErrorStack(p.logger, errors.Trace(err)) 248 } 249 if err := p.catacomb.Add(task); err != nil { 250 return errors.Trace(err) 251 } 252 253 for { 254 select { 255 case <-p.catacomb.Dying(): 256 return p.catacomb.ErrDying() 257 case _, ok := <-modelConfigChanges: 258 if !ok { 259 return errors.New("model configuration watcher closed") 260 } 261 modelConfig, err := p.st.ModelConfig() 262 if err != nil { 263 return errors.Annotate(err, "cannot load model configuration") 264 } 265 if err := p.setConfig(modelConfig); err != nil { 266 return errors.Annotate(err, "loaded invalid model configuration") 267 } 268 task.SetHarvestMode(modelConfig.ProvisionerHarvestMode()) 269 task.SetNumProvisionWorkers(modelConfig.NumProvisionWorkers()) 270 } 271 } 272 } 273 274 func (p *environProvisioner) getMachineWatcher() (watcher.StringsWatcher, error) { 275 return p.st.WatchModelMachines() 276 } 277 278 func (p *environProvisioner) getRetryWatcher() (watcher.NotifyWatcher, error) { 279 return p.st.WatchMachineErrorRetry() 280 } 281 282 // setConfig updates the environment configuration and notifies 283 // the config observer. 284 func (p *environProvisioner) setConfig(modelConfig *config.Config) error { 285 if err := p.environ.SetConfig(modelConfig); err != nil { 286 return errors.Trace(err) 287 } 288 p.configObserver.notify(modelConfig) 289 return nil 290 } 291 292 // NewContainerProvisioner returns a new Provisioner. When new machines 293 // are added to the state, it allocates instances from the environment 294 // and allocates them to the new machines. 295 func NewContainerProvisioner( 296 containerType instance.ContainerType, 297 st *apiprovisioner.State, 298 logger Logger, 299 agentConfig agent.Config, 300 broker environs.InstanceBroker, 301 toolsFinder ToolsFinder, 302 distributionGroupFinder DistributionGroupFinder, 303 credentialAPI common.CredentialAPI, 304 ) (Provisioner, error) { 305 p := &containerProvisioner{ 306 provisioner: provisioner{ 307 st: st, 308 agentConfig: agentConfig, 309 logger: logger, 310 broker: broker, 311 toolsFinder: toolsFinder, 312 distributionGroupFinder: distributionGroupFinder, 313 callContextFunc: common.NewCloudCallContextFunc(credentialAPI), 314 }, 315 containerType: containerType, 316 } 317 p.Provisioner = p 318 logger.Tracef("Starting %s provisioner for %q", p.containerType, p.agentConfig.Tag()) 319 320 err := catacomb.Invoke(catacomb.Plan{ 321 Site: &p.catacomb, 322 Work: p.loop, 323 }) 324 if err != nil { 325 return nil, errors.Trace(err) 326 } 327 return p, nil 328 } 329 330 func (p *containerProvisioner) loop() error { 331 modelWatcher, err := p.st.WatchForModelConfigChanges() 332 if err != nil { 333 return errors.Trace(err) 334 } 335 if err := p.catacomb.Add(modelWatcher); err != nil { 336 return errors.Trace(err) 337 } 338 339 modelConfig, err := p.st.ModelConfig() 340 if err != nil { 341 return errors.Trace(err) 342 } 343 p.configObserver.notify(modelConfig) 344 harvestMode := modelConfig.ProvisionerHarvestMode() 345 workerCount := modelConfig.NumContainerProvisionWorkers() 346 347 task, err := p.getStartTask(harvestMode, workerCount) 348 if err != nil { 349 return loggedErrorStack(p.logger, errors.Trace(err)) 350 } 351 if err := p.catacomb.Add(task); err != nil { 352 return errors.Trace(err) 353 } 354 355 for { 356 select { 357 case <-p.catacomb.Dying(): 358 return p.catacomb.ErrDying() 359 case _, ok := <-modelWatcher.Changes(): 360 if !ok { 361 return errors.New("model configuration watch closed") 362 } 363 modelConfig, err := p.st.ModelConfig() 364 if err != nil { 365 return errors.Annotate(err, "cannot load model configuration") 366 } 367 p.configObserver.notify(modelConfig) 368 task.SetHarvestMode(modelConfig.ProvisionerHarvestMode()) 369 task.SetNumProvisionWorkers(modelConfig.NumContainerProvisionWorkers()) 370 } 371 } 372 } 373 374 func (p *containerProvisioner) getMachine() (apiprovisioner.MachineProvisioner, error) { 375 if p.machine == nil { 376 tag := p.agentConfig.Tag() 377 machineTag, ok := tag.(names.MachineTag) 378 if !ok { 379 return nil, errors.Errorf("expected names.MachineTag, got %T", tag) 380 } 381 result, err := p.st.Machines(machineTag) 382 if err != nil { 383 p.logger.Errorf("error retrieving %s from state", machineTag) 384 return nil, err 385 } 386 if result[0].Err != nil { 387 p.logger.Errorf("%s is not in state", machineTag) 388 return nil, err 389 } 390 p.machine = result[0].Machine 391 } 392 return p.machine, nil 393 } 394 395 func (p *containerProvisioner) getMachineWatcher() (watcher.StringsWatcher, error) { 396 machine, err := p.getMachine() 397 if err != nil { 398 return nil, err 399 } 400 return machine.WatchContainers(p.containerType) 401 } 402 403 func (p *containerProvisioner) getRetryWatcher() (watcher.NotifyWatcher, error) { 404 return nil, errors.NotImplementedf("getRetryWatcher") 405 }