github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/worker/caasoperatorprovisioner/worker.go (about)

     1  // Copyright 2017 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  // This worker is responsible for watching the life cycle of CAAS pod-spec
     5  // applications and creating their operator pods (or removing them). Unlike
     6  // the caasapplicationprovisioner worker, this worker does not create a new
     7  // child worker for every application being monitored.
     8  //
     9  // Note that the separate caasapplicationprovisioner worker handles CAAS
    10  // sidecar applications.
    11  
    12  package caasoperatorprovisioner
    13  
    14  import (
    15  	"bytes"
    16  	"fmt"
    17  	"strings"
    18  	"time"
    19  
    20  	"github.com/juju/charm/v12"
    21  	"github.com/juju/clock"
    22  	"github.com/juju/errors"
    23  	"github.com/juju/names/v5"
    24  	"github.com/juju/retry"
    25  	"github.com/juju/utils/v3"
    26  	"github.com/juju/worker/v3"
    27  	"github.com/juju/worker/v3/catacomb"
    28  
    29  	"github.com/juju/juju/agent"
    30  	charmscommon "github.com/juju/juju/api/common/charms"
    31  	apicaasprovisioner "github.com/juju/juju/api/controller/caasoperatorprovisioner"
    32  	"github.com/juju/juju/caas"
    33  	k8sconstants "github.com/juju/juju/caas/kubernetes/provider/constants"
    34  	"github.com/juju/juju/core/life"
    35  	"github.com/juju/juju/core/watcher"
    36  	"github.com/juju/juju/rpc/params"
    37  	"github.com/juju/juju/storage"
    38  )
    39  
    40  // Logger is here to stop the desire of creating a package level Logger.
    41  // Don't do this, instead use the one passed as manifold config.
    42  type logger interface{}
    43  
    44  var _ logger = struct{}{}
    45  
    46  // CAASProvisionerFacade exposes CAAS provisioning functionality to a worker.
    47  type CAASProvisionerFacade interface {
    48  	OperatorProvisioningInfo(string) (apicaasprovisioner.OperatorProvisioningInfo, error)
    49  	WatchApplications() (watcher.StringsWatcher, error)
    50  	SetPasswords([]apicaasprovisioner.ApplicationPassword) (params.ErrorResults, error)
    51  	Life(string) (life.Value, error)
    52  	IssueOperatorCertificate(string) (apicaasprovisioner.OperatorCertificate, error)
    53  	ApplicationCharmInfo(appName string) (*charmscommon.CharmInfo, error)
    54  }
    55  
    56  // Config defines the operation of a Worker.
    57  type Config struct {
    58  	Facade          CAASProvisionerFacade
    59  	OperatorManager caas.ApplicationOperatorManager
    60  	ModelTag        names.ModelTag
    61  	AgentConfig     agent.Config
    62  	Clock           clock.Clock
    63  	Logger          Logger
    64  }
    65  
    66  // NewProvisionerWorker starts and returns a new CAAS provisioner worker.
    67  func NewProvisionerWorker(config Config) (worker.Worker, error) {
    68  	p := &provisioner{
    69  		provisionerFacade: config.Facade,
    70  		operatorManager:   config.OperatorManager,
    71  		modelTag:          config.ModelTag,
    72  		agentConfig:       config.AgentConfig,
    73  		clock:             config.Clock,
    74  		logger:            config.Logger,
    75  	}
    76  	err := catacomb.Invoke(catacomb.Plan{
    77  		Site: &p.catacomb,
    78  		Work: p.loop,
    79  	})
    80  	return p, err
    81  }
    82  
    83  type provisioner struct {
    84  	catacomb          catacomb.Catacomb
    85  	provisionerFacade CAASProvisionerFacade
    86  	operatorManager   caas.ApplicationOperatorManager
    87  	clock             clock.Clock
    88  	logger            Logger
    89  
    90  	modelTag    names.ModelTag
    91  	agentConfig agent.Config
    92  }
    93  
    94  // Kill is part of the worker.Worker interface.
    95  func (p *provisioner) Kill() {
    96  	p.catacomb.Kill(nil)
    97  }
    98  
    99  // Wait is part of the worker.Worker interface.
   100  func (p *provisioner) Wait() error {
   101  	return p.catacomb.Wait()
   102  }
   103  
   104  func (p *provisioner) loop() error {
   105  	// TODO(caas) -  this loop should also keep an eye on kubernetes and ensure
   106  	// that the operator stays up, redeploying it if the pod goes
   107  	// away. For some runtimes we *could* rely on the the runtime's
   108  	// features to do this.
   109  
   110  	appWatcher, err := p.provisionerFacade.WatchApplications()
   111  	if err != nil {
   112  		return errors.Trace(err)
   113  	}
   114  	if err := p.catacomb.Add(appWatcher); err != nil {
   115  		return errors.Trace(err)
   116  	}
   117  
   118  	for {
   119  		select {
   120  		case <-p.catacomb.Dying():
   121  			return p.catacomb.ErrDying()
   122  
   123  		// CAAS applications changed so either create or remove pods as appropriate.
   124  		case apps, ok := <-appWatcher.Changes():
   125  			if !ok {
   126  				return errors.New("app watcher closed channel")
   127  			}
   128  			var newApps []string
   129  			for _, app := range apps {
   130  				// Ignore events for v2 charms.
   131  				format, err := p.charmFormat(app)
   132  				if errors.IsNotFound(err) {
   133  					p.logger.Debugf("application %q no longer exists", app)
   134  					continue
   135  				} else if err != nil {
   136  					return errors.Trace(err)
   137  				}
   138  				if format > charm.FormatV1 {
   139  					p.logger.Tracef("application %q is v2, ignoring event", app)
   140  					continue
   141  				}
   142  
   143  				// Process events for v1 charms.
   144  				appLife, err := p.provisionerFacade.Life(app)
   145  				if err != nil && !errors.IsNotFound(err) {
   146  					return errors.Trace(err)
   147  				}
   148  				if err != nil || appLife == life.Dead {
   149  					p.logger.Debugf("deleting operator for %q", app)
   150  					if err := p.operatorManager.DeleteOperator(app); err != nil {
   151  						return errors.Annotatef(err, "failed to stop operator for %q", app)
   152  					}
   153  					continue
   154  				}
   155  				if appLife != life.Alive {
   156  					continue
   157  				}
   158  				newApps = append(newApps, app)
   159  			}
   160  			if len(newApps) == 0 {
   161  				continue
   162  			}
   163  			if err := p.ensureOperators(newApps); err != nil {
   164  				return errors.Trace(err)
   165  			}
   166  		}
   167  	}
   168  }
   169  
   170  func (p *provisioner) charmFormat(appName string) (charm.Format, error) {
   171  	charmInfo, err := p.provisionerFacade.ApplicationCharmInfo(appName)
   172  	if err != nil {
   173  		return charm.FormatUnknown, errors.Annotatef(err, "failed to get charm info for application %q", appName)
   174  	}
   175  	return charm.MetaFormat(charmInfo.Charm()), nil
   176  }
   177  
   178  func (p *provisioner) waitForOperatorTerminated(app string) error {
   179  	tryAgain := errors.New("try again")
   180  	existsFunc := func() error {
   181  		opState, err := p.operatorManager.OperatorExists(app)
   182  		if err != nil {
   183  			return errors.Trace(err)
   184  		}
   185  		if !opState.Exists {
   186  			return nil
   187  		}
   188  		if opState.Exists && !opState.Terminating {
   189  			return errors.Errorf("operator %q should be terminating but is now running", app)
   190  		}
   191  		return tryAgain
   192  	}
   193  	retryCallArgs := retry.CallArgs{
   194  		Attempts:    60,
   195  		Delay:       3 * time.Second,
   196  		MaxDuration: 3 * time.Minute,
   197  		Clock:       p.clock,
   198  		Func:        existsFunc,
   199  		IsFatalError: func(err error) bool {
   200  			return err != tryAgain
   201  		},
   202  	}
   203  	return errors.Trace(retry.Call(retryCallArgs))
   204  }
   205  
   206  // ensureOperators creates operator pods for the specified app names -> api passwords.
   207  func (p *provisioner) ensureOperators(apps []string) error {
   208  	var appPasswords []apicaasprovisioner.ApplicationPassword
   209  	operatorConfig := make([]*caas.OperatorConfig, len(apps))
   210  	for i, app := range apps {
   211  		opState, err := p.operatorManager.OperatorExists(app)
   212  		if err != nil {
   213  			return errors.Annotatef(err, "failed to find operator for %q", app)
   214  		}
   215  		if opState.Exists && opState.Terminating {
   216  			// We can't deploy an app while a previous version is terminating.
   217  			// TODO(caas) - the remove application process should block until app terminated
   218  			// TODO(caas) - consider making this async, but ok for now as it's a corner case
   219  			if err := p.waitForOperatorTerminated(app); err != nil {
   220  				return errors.Annotatef(err, "operator for %q was terminating and there was an error waiting for it to stop", app)
   221  			}
   222  			opState.Exists = false
   223  		}
   224  
   225  		op, err := p.operatorManager.Operator(app)
   226  		if err != nil && !errors.IsNotFound(err) {
   227  			return errors.Trace(err)
   228  		}
   229  
   230  		// If the operator does not exist already, we need to create an initial
   231  		// password for it.
   232  		var password string
   233  		if !opState.Exists {
   234  			if password, err = utils.RandomPassword(); err != nil {
   235  				return errors.Trace(err)
   236  			}
   237  			appPasswords = append(appPasswords, apicaasprovisioner.ApplicationPassword{Name: app, Password: password})
   238  		}
   239  
   240  		var prevCfg caas.OperatorConfig
   241  		if op != nil && op.Config != nil {
   242  			prevCfg = *op.Config
   243  		}
   244  		config, err := p.updateOperatorConfig(app, password, prevCfg)
   245  		if err != nil {
   246  			return errors.Annotatef(err, "failed to generate operator config for %q", app)
   247  		}
   248  		operatorConfig[i] = config
   249  	}
   250  	// If we did create any passwords for new operators, first they need
   251  	// to be saved so the agent can login when it starts up.
   252  	if len(appPasswords) > 0 {
   253  		errorResults, err := p.provisionerFacade.SetPasswords(appPasswords)
   254  		if err != nil {
   255  			return errors.Annotate(err, "failed to set application api passwords")
   256  		}
   257  		if err := errorResults.Combine(); err != nil {
   258  			return errors.Annotate(err, "failed to set application api passwords")
   259  		}
   260  	}
   261  
   262  	// Now that any new config/passwords are done, create or update
   263  	// the operators themselves.
   264  	var errorStrings []string
   265  	for i, app := range apps {
   266  		if err := p.ensureOperator(app, operatorConfig[i]); err != nil {
   267  			errorStrings = append(errorStrings, err.Error())
   268  			continue
   269  		}
   270  	}
   271  	if errorStrings != nil {
   272  		err := errors.New(strings.Join(errorStrings, "\n"))
   273  		return errors.Annotate(err, "failed to provision all operators")
   274  	}
   275  	return nil
   276  }
   277  
   278  func (p *provisioner) ensureOperator(app string, config *caas.OperatorConfig) error {
   279  	if err := p.operatorManager.EnsureOperator(app, p.agentConfig.DataDir(), config); err != nil {
   280  		return errors.Annotatef(err, "failed to start operator for %q", app)
   281  	}
   282  	p.logger.Infof("started operator for application %q", app)
   283  	return nil
   284  }
   285  
   286  func (p *provisioner) updateOperatorConfig(appName, password string, prevCfg caas.OperatorConfig) (*caas.OperatorConfig, error) {
   287  	info, err := p.provisionerFacade.OperatorProvisioningInfo(appName)
   288  	if err != nil {
   289  		return nil, errors.Annotatef(err, "fetching operator provisioning info")
   290  	}
   291  	// Operators may have storage configured because charms
   292  	// have persistent state which must be preserved between any
   293  	// operator restarts. Newer charms though store state in the controller.
   294  	if info.CharmStorage != nil && info.CharmStorage.Provider != k8sconstants.StorageProviderType {
   295  		if spType := info.CharmStorage.Provider; spType == "" {
   296  			return nil, errors.NotValidf("missing operator storage provider")
   297  		} else {
   298  			return nil, errors.NotSupportedf("operator storage provider %q", spType)
   299  		}
   300  	}
   301  	p.logger.Tracef("using operator info %+v", info)
   302  
   303  	cfg := &caas.OperatorConfig{
   304  		ImageDetails:        info.ImageDetails,
   305  		BaseImageDetails:    info.BaseImageDetails,
   306  		Version:             info.Version,
   307  		ResourceTags:        info.Tags,
   308  		CharmStorage:        charmStorageParams(info.CharmStorage),
   309  		ConfigMapGeneration: prevCfg.ConfigMapGeneration,
   310  	}
   311  
   312  	cfg.AgentConf, err = p.updateAgentConf(appName, password, info, prevCfg.AgentConf)
   313  	if err != nil {
   314  		return nil, errors.Annotatef(err, "updating agent config")
   315  	}
   316  
   317  	cfg.OperatorInfo, err = p.updateOperatorInfo(appName, prevCfg.OperatorInfo)
   318  	if err != nil {
   319  		return nil, errors.Annotatef(err, "updating operator info")
   320  	}
   321  
   322  	return cfg, nil
   323  }
   324  
   325  func (p *provisioner) updateAgentConf(appName, password string,
   326  	info apicaasprovisioner.OperatorProvisioningInfo,
   327  	prevAgentConfData []byte) ([]byte, error) {
   328  	if len(prevAgentConfData) == 0 && password == "" {
   329  		return nil, errors.NewNotValid(nil, fmt.Sprintf("no existing agent conf found and no new password generated for %q operator", appName))
   330  	}
   331  	if password == "" {
   332  		// Read password from previous agent config for the existing operator.
   333  		prevAgentConf, err := agent.ParseConfigData(prevAgentConfData)
   334  		if err != nil {
   335  			return nil, errors.Trace(err)
   336  		}
   337  		password = prevAgentConf.OldPassword()
   338  	}
   339  
   340  	appTag := names.NewApplicationTag(appName)
   341  	conf, err := agent.NewAgentConfig(
   342  		agent.AgentConfigParams{
   343  			Paths: agent.Paths{
   344  				DataDir: p.agentConfig.DataDir(),
   345  				LogDir:  p.agentConfig.LogDir(),
   346  			},
   347  			Tag:          appTag,
   348  			Controller:   p.agentConfig.Controller(),
   349  			Model:        p.modelTag,
   350  			APIAddresses: info.APIAddresses,
   351  			CACert:       p.agentConfig.CACert(),
   352  			Password:     password,
   353  
   354  			// UpgradedToVersion is mandatory but not used by
   355  			// caas operator agents as they are not upgraded insitu.
   356  			UpgradedToVersion: info.Version,
   357  		},
   358  	)
   359  	if err != nil {
   360  		return nil, errors.Annotatef(err, "creating new agent config")
   361  	}
   362  	newAgentConfData, err := conf.Render()
   363  	if err != nil {
   364  		return nil, errors.Annotatef(err, "rendering new agent config")
   365  	}
   366  	p.logger.Debugf("agentConfData for %q changed %v", appName, !bytes.Equal(prevAgentConfData, newAgentConfData))
   367  	return newAgentConfData, nil
   368  }
   369  
   370  func (p *provisioner) updateOperatorInfo(appName string, prevOperatorInfoData []byte) ([]byte, error) {
   371  	var operatorInfo caas.OperatorInfo
   372  	if prevOperatorInfoData != nil {
   373  		prevOperatorInfo, err := caas.UnmarshalOperatorInfo(prevOperatorInfoData)
   374  		if err != nil {
   375  			return nil, errors.Annotatef(err, "unmarshalling operator info")
   376  		}
   377  		operatorInfo = *prevOperatorInfo
   378  	}
   379  
   380  	if operatorInfo.Cert == "" ||
   381  		operatorInfo.PrivateKey == "" ||
   382  		operatorInfo.CACert == "" {
   383  		cert, err := p.provisionerFacade.IssueOperatorCertificate(appName)
   384  		if err != nil {
   385  			return nil, errors.Annotatef(err, "issuing certificate")
   386  		}
   387  		operatorInfo.Cert = cert.Cert
   388  		operatorInfo.PrivateKey = cert.PrivateKey
   389  		operatorInfo.CACert = cert.CACert
   390  	}
   391  
   392  	return operatorInfo.Marshal()
   393  }
   394  
   395  func charmStorageParams(in *storage.KubernetesFilesystemParams) *caas.CharmStorageParams {
   396  	if in == nil {
   397  		return nil
   398  	}
   399  	return &caas.CharmStorageParams{
   400  		Provider:     in.Provider,
   401  		Size:         in.Size,
   402  		Attributes:   in.Attributes,
   403  		ResourceTags: in.ResourceTags,
   404  	}
   405  }