github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/worker/caasoperatorprovisioner/worker.go (about) 1 // Copyright 2017 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 // This worker is responsible for watching the life cycle of CAAS pod-spec 5 // applications and creating their operator pods (or removing them). Unlike 6 // the caasapplicationprovisioner worker, this worker does not create a new 7 // child worker for every application being monitored. 8 // 9 // Note that the separate caasapplicationprovisioner worker handles CAAS 10 // sidecar applications. 11 12 package caasoperatorprovisioner 13 14 import ( 15 "bytes" 16 "fmt" 17 "strings" 18 "time" 19 20 "github.com/juju/charm/v12" 21 "github.com/juju/clock" 22 "github.com/juju/errors" 23 "github.com/juju/names/v5" 24 "github.com/juju/retry" 25 "github.com/juju/utils/v3" 26 "github.com/juju/worker/v3" 27 "github.com/juju/worker/v3/catacomb" 28 29 "github.com/juju/juju/agent" 30 charmscommon "github.com/juju/juju/api/common/charms" 31 apicaasprovisioner "github.com/juju/juju/api/controller/caasoperatorprovisioner" 32 "github.com/juju/juju/caas" 33 k8sconstants "github.com/juju/juju/caas/kubernetes/provider/constants" 34 "github.com/juju/juju/core/life" 35 "github.com/juju/juju/core/watcher" 36 "github.com/juju/juju/rpc/params" 37 "github.com/juju/juju/storage" 38 ) 39 40 // Logger is here to stop the desire of creating a package level Logger. 41 // Don't do this, instead use the one passed as manifold config. 42 type logger interface{} 43 44 var _ logger = struct{}{} 45 46 // CAASProvisionerFacade exposes CAAS provisioning functionality to a worker. 47 type CAASProvisionerFacade interface { 48 OperatorProvisioningInfo(string) (apicaasprovisioner.OperatorProvisioningInfo, error) 49 WatchApplications() (watcher.StringsWatcher, error) 50 SetPasswords([]apicaasprovisioner.ApplicationPassword) (params.ErrorResults, error) 51 Life(string) (life.Value, error) 52 IssueOperatorCertificate(string) (apicaasprovisioner.OperatorCertificate, error) 53 ApplicationCharmInfo(appName string) (*charmscommon.CharmInfo, error) 54 } 55 56 // Config defines the operation of a Worker. 57 type Config struct { 58 Facade CAASProvisionerFacade 59 OperatorManager caas.ApplicationOperatorManager 60 ModelTag names.ModelTag 61 AgentConfig agent.Config 62 Clock clock.Clock 63 Logger Logger 64 } 65 66 // NewProvisionerWorker starts and returns a new CAAS provisioner worker. 67 func NewProvisionerWorker(config Config) (worker.Worker, error) { 68 p := &provisioner{ 69 provisionerFacade: config.Facade, 70 operatorManager: config.OperatorManager, 71 modelTag: config.ModelTag, 72 agentConfig: config.AgentConfig, 73 clock: config.Clock, 74 logger: config.Logger, 75 } 76 err := catacomb.Invoke(catacomb.Plan{ 77 Site: &p.catacomb, 78 Work: p.loop, 79 }) 80 return p, err 81 } 82 83 type provisioner struct { 84 catacomb catacomb.Catacomb 85 provisionerFacade CAASProvisionerFacade 86 operatorManager caas.ApplicationOperatorManager 87 clock clock.Clock 88 logger Logger 89 90 modelTag names.ModelTag 91 agentConfig agent.Config 92 } 93 94 // Kill is part of the worker.Worker interface. 95 func (p *provisioner) Kill() { 96 p.catacomb.Kill(nil) 97 } 98 99 // Wait is part of the worker.Worker interface. 100 func (p *provisioner) Wait() error { 101 return p.catacomb.Wait() 102 } 103 104 func (p *provisioner) loop() error { 105 // TODO(caas) - this loop should also keep an eye on kubernetes and ensure 106 // that the operator stays up, redeploying it if the pod goes 107 // away. For some runtimes we *could* rely on the the runtime's 108 // features to do this. 109 110 appWatcher, err := p.provisionerFacade.WatchApplications() 111 if err != nil { 112 return errors.Trace(err) 113 } 114 if err := p.catacomb.Add(appWatcher); err != nil { 115 return errors.Trace(err) 116 } 117 118 for { 119 select { 120 case <-p.catacomb.Dying(): 121 return p.catacomb.ErrDying() 122 123 // CAAS applications changed so either create or remove pods as appropriate. 124 case apps, ok := <-appWatcher.Changes(): 125 if !ok { 126 return errors.New("app watcher closed channel") 127 } 128 var newApps []string 129 for _, app := range apps { 130 // Ignore events for v2 charms. 131 format, err := p.charmFormat(app) 132 if errors.IsNotFound(err) { 133 p.logger.Debugf("application %q no longer exists", app) 134 continue 135 } else if err != nil { 136 return errors.Trace(err) 137 } 138 if format > charm.FormatV1 { 139 p.logger.Tracef("application %q is v2, ignoring event", app) 140 continue 141 } 142 143 // Process events for v1 charms. 144 appLife, err := p.provisionerFacade.Life(app) 145 if err != nil && !errors.IsNotFound(err) { 146 return errors.Trace(err) 147 } 148 if err != nil || appLife == life.Dead { 149 p.logger.Debugf("deleting operator for %q", app) 150 if err := p.operatorManager.DeleteOperator(app); err != nil { 151 return errors.Annotatef(err, "failed to stop operator for %q", app) 152 } 153 continue 154 } 155 if appLife != life.Alive { 156 continue 157 } 158 newApps = append(newApps, app) 159 } 160 if len(newApps) == 0 { 161 continue 162 } 163 if err := p.ensureOperators(newApps); err != nil { 164 return errors.Trace(err) 165 } 166 } 167 } 168 } 169 170 func (p *provisioner) charmFormat(appName string) (charm.Format, error) { 171 charmInfo, err := p.provisionerFacade.ApplicationCharmInfo(appName) 172 if err != nil { 173 return charm.FormatUnknown, errors.Annotatef(err, "failed to get charm info for application %q", appName) 174 } 175 return charm.MetaFormat(charmInfo.Charm()), nil 176 } 177 178 func (p *provisioner) waitForOperatorTerminated(app string) error { 179 tryAgain := errors.New("try again") 180 existsFunc := func() error { 181 opState, err := p.operatorManager.OperatorExists(app) 182 if err != nil { 183 return errors.Trace(err) 184 } 185 if !opState.Exists { 186 return nil 187 } 188 if opState.Exists && !opState.Terminating { 189 return errors.Errorf("operator %q should be terminating but is now running", app) 190 } 191 return tryAgain 192 } 193 retryCallArgs := retry.CallArgs{ 194 Attempts: 60, 195 Delay: 3 * time.Second, 196 MaxDuration: 3 * time.Minute, 197 Clock: p.clock, 198 Func: existsFunc, 199 IsFatalError: func(err error) bool { 200 return err != tryAgain 201 }, 202 } 203 return errors.Trace(retry.Call(retryCallArgs)) 204 } 205 206 // ensureOperators creates operator pods for the specified app names -> api passwords. 207 func (p *provisioner) ensureOperators(apps []string) error { 208 var appPasswords []apicaasprovisioner.ApplicationPassword 209 operatorConfig := make([]*caas.OperatorConfig, len(apps)) 210 for i, app := range apps { 211 opState, err := p.operatorManager.OperatorExists(app) 212 if err != nil { 213 return errors.Annotatef(err, "failed to find operator for %q", app) 214 } 215 if opState.Exists && opState.Terminating { 216 // We can't deploy an app while a previous version is terminating. 217 // TODO(caas) - the remove application process should block until app terminated 218 // TODO(caas) - consider making this async, but ok for now as it's a corner case 219 if err := p.waitForOperatorTerminated(app); err != nil { 220 return errors.Annotatef(err, "operator for %q was terminating and there was an error waiting for it to stop", app) 221 } 222 opState.Exists = false 223 } 224 225 op, err := p.operatorManager.Operator(app) 226 if err != nil && !errors.IsNotFound(err) { 227 return errors.Trace(err) 228 } 229 230 // If the operator does not exist already, we need to create an initial 231 // password for it. 232 var password string 233 if !opState.Exists { 234 if password, err = utils.RandomPassword(); err != nil { 235 return errors.Trace(err) 236 } 237 appPasswords = append(appPasswords, apicaasprovisioner.ApplicationPassword{Name: app, Password: password}) 238 } 239 240 var prevCfg caas.OperatorConfig 241 if op != nil && op.Config != nil { 242 prevCfg = *op.Config 243 } 244 config, err := p.updateOperatorConfig(app, password, prevCfg) 245 if err != nil { 246 return errors.Annotatef(err, "failed to generate operator config for %q", app) 247 } 248 operatorConfig[i] = config 249 } 250 // If we did create any passwords for new operators, first they need 251 // to be saved so the agent can login when it starts up. 252 if len(appPasswords) > 0 { 253 errorResults, err := p.provisionerFacade.SetPasswords(appPasswords) 254 if err != nil { 255 return errors.Annotate(err, "failed to set application api passwords") 256 } 257 if err := errorResults.Combine(); err != nil { 258 return errors.Annotate(err, "failed to set application api passwords") 259 } 260 } 261 262 // Now that any new config/passwords are done, create or update 263 // the operators themselves. 264 var errorStrings []string 265 for i, app := range apps { 266 if err := p.ensureOperator(app, operatorConfig[i]); err != nil { 267 errorStrings = append(errorStrings, err.Error()) 268 continue 269 } 270 } 271 if errorStrings != nil { 272 err := errors.New(strings.Join(errorStrings, "\n")) 273 return errors.Annotate(err, "failed to provision all operators") 274 } 275 return nil 276 } 277 278 func (p *provisioner) ensureOperator(app string, config *caas.OperatorConfig) error { 279 if err := p.operatorManager.EnsureOperator(app, p.agentConfig.DataDir(), config); err != nil { 280 return errors.Annotatef(err, "failed to start operator for %q", app) 281 } 282 p.logger.Infof("started operator for application %q", app) 283 return nil 284 } 285 286 func (p *provisioner) updateOperatorConfig(appName, password string, prevCfg caas.OperatorConfig) (*caas.OperatorConfig, error) { 287 info, err := p.provisionerFacade.OperatorProvisioningInfo(appName) 288 if err != nil { 289 return nil, errors.Annotatef(err, "fetching operator provisioning info") 290 } 291 // Operators may have storage configured because charms 292 // have persistent state which must be preserved between any 293 // operator restarts. Newer charms though store state in the controller. 294 if info.CharmStorage != nil && info.CharmStorage.Provider != k8sconstants.StorageProviderType { 295 if spType := info.CharmStorage.Provider; spType == "" { 296 return nil, errors.NotValidf("missing operator storage provider") 297 } else { 298 return nil, errors.NotSupportedf("operator storage provider %q", spType) 299 } 300 } 301 p.logger.Tracef("using operator info %+v", info) 302 303 cfg := &caas.OperatorConfig{ 304 ImageDetails: info.ImageDetails, 305 BaseImageDetails: info.BaseImageDetails, 306 Version: info.Version, 307 ResourceTags: info.Tags, 308 CharmStorage: charmStorageParams(info.CharmStorage), 309 ConfigMapGeneration: prevCfg.ConfigMapGeneration, 310 } 311 312 cfg.AgentConf, err = p.updateAgentConf(appName, password, info, prevCfg.AgentConf) 313 if err != nil { 314 return nil, errors.Annotatef(err, "updating agent config") 315 } 316 317 cfg.OperatorInfo, err = p.updateOperatorInfo(appName, prevCfg.OperatorInfo) 318 if err != nil { 319 return nil, errors.Annotatef(err, "updating operator info") 320 } 321 322 return cfg, nil 323 } 324 325 func (p *provisioner) updateAgentConf(appName, password string, 326 info apicaasprovisioner.OperatorProvisioningInfo, 327 prevAgentConfData []byte) ([]byte, error) { 328 if len(prevAgentConfData) == 0 && password == "" { 329 return nil, errors.NewNotValid(nil, fmt.Sprintf("no existing agent conf found and no new password generated for %q operator", appName)) 330 } 331 if password == "" { 332 // Read password from previous agent config for the existing operator. 333 prevAgentConf, err := agent.ParseConfigData(prevAgentConfData) 334 if err != nil { 335 return nil, errors.Trace(err) 336 } 337 password = prevAgentConf.OldPassword() 338 } 339 340 appTag := names.NewApplicationTag(appName) 341 conf, err := agent.NewAgentConfig( 342 agent.AgentConfigParams{ 343 Paths: agent.Paths{ 344 DataDir: p.agentConfig.DataDir(), 345 LogDir: p.agentConfig.LogDir(), 346 }, 347 Tag: appTag, 348 Controller: p.agentConfig.Controller(), 349 Model: p.modelTag, 350 APIAddresses: info.APIAddresses, 351 CACert: p.agentConfig.CACert(), 352 Password: password, 353 354 // UpgradedToVersion is mandatory but not used by 355 // caas operator agents as they are not upgraded insitu. 356 UpgradedToVersion: info.Version, 357 }, 358 ) 359 if err != nil { 360 return nil, errors.Annotatef(err, "creating new agent config") 361 } 362 newAgentConfData, err := conf.Render() 363 if err != nil { 364 return nil, errors.Annotatef(err, "rendering new agent config") 365 } 366 p.logger.Debugf("agentConfData for %q changed %v", appName, !bytes.Equal(prevAgentConfData, newAgentConfData)) 367 return newAgentConfData, nil 368 } 369 370 func (p *provisioner) updateOperatorInfo(appName string, prevOperatorInfoData []byte) ([]byte, error) { 371 var operatorInfo caas.OperatorInfo 372 if prevOperatorInfoData != nil { 373 prevOperatorInfo, err := caas.UnmarshalOperatorInfo(prevOperatorInfoData) 374 if err != nil { 375 return nil, errors.Annotatef(err, "unmarshalling operator info") 376 } 377 operatorInfo = *prevOperatorInfo 378 } 379 380 if operatorInfo.Cert == "" || 381 operatorInfo.PrivateKey == "" || 382 operatorInfo.CACert == "" { 383 cert, err := p.provisionerFacade.IssueOperatorCertificate(appName) 384 if err != nil { 385 return nil, errors.Annotatef(err, "issuing certificate") 386 } 387 operatorInfo.Cert = cert.Cert 388 operatorInfo.PrivateKey = cert.PrivateKey 389 operatorInfo.CACert = cert.CACert 390 } 391 392 return operatorInfo.Marshal() 393 } 394 395 func charmStorageParams(in *storage.KubernetesFilesystemParams) *caas.CharmStorageParams { 396 if in == nil { 397 return nil 398 } 399 return &caas.CharmStorageParams{ 400 Provider: in.Provider, 401 Size: in.Size, 402 Attributes: in.Attributes, 403 ResourceTags: in.ResourceTags, 404 } 405 }