github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/cmd/jujud/agent/caasoperator.go (about)

     1  // Copyright 2017 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package agent
     5  
     6  import (
     7  	"io"
     8  	"os"
     9  	"path/filepath"
    10  	"runtime"
    11  	"time"
    12  
    13  	"github.com/juju/clock"
    14  	"github.com/juju/cmd/v3"
    15  	"github.com/juju/errors"
    16  	"github.com/juju/featureflag"
    17  	"github.com/juju/gnuflag"
    18  	"github.com/juju/loggo"
    19  	"github.com/juju/names/v5"
    20  	"github.com/juju/pubsub/v2"
    21  	"github.com/juju/utils/v3/voyeur"
    22  	"github.com/juju/worker/v3"
    23  	"github.com/juju/worker/v3/dependency"
    24  	"github.com/prometheus/client_golang/prometheus"
    25  
    26  	"github.com/juju/juju/agent"
    27  	"github.com/juju/juju/agent/addons"
    28  	apicaasoperator "github.com/juju/juju/api/agent/caasoperator"
    29  	"github.com/juju/juju/api/base"
    30  	caasconstants "github.com/juju/juju/caas/kubernetes/provider/constants"
    31  	jujucmd "github.com/juju/juju/cmd"
    32  	"github.com/juju/juju/cmd/jujud/agent/agentconf"
    33  	"github.com/juju/juju/cmd/jujud/agent/caasoperator"
    34  	"github.com/juju/juju/cmd/jujud/agent/engine"
    35  	agenterrors "github.com/juju/juju/cmd/jujud/agent/errors"
    36  	cmdutil "github.com/juju/juju/cmd/jujud/util"
    37  	"github.com/juju/juju/core/machinelock"
    38  	"github.com/juju/juju/upgrades"
    39  	jujuversion "github.com/juju/juju/version"
    40  	jworker "github.com/juju/juju/worker"
    41  	"github.com/juju/juju/worker/gate"
    42  	"github.com/juju/juju/worker/introspection"
    43  	"github.com/juju/juju/worker/logsender"
    44  	"github.com/juju/juju/worker/upgradesteps"
    45  )
    46  
    47  var (
    48  	// Should be an explicit dependency, can't do it cleanly yet.
    49  	// Exported for testing.
    50  	CaasOperatorManifolds = caasoperator.Manifolds
    51  )
    52  
    53  // CaasOperatorAgent is a cmd.Command responsible for running a CAAS operator agent.
    54  type CaasOperatorAgent struct {
    55  	cmd.CommandBase
    56  	agentconf.AgentConf
    57  	configChangedVal *voyeur.Value
    58  	ApplicationName  string
    59  	runner           *worker.Runner
    60  	bufferedLogger   *logsender.BufferedLogWriter
    61  	ctx              *cmd.Context
    62  	dead             chan struct{}
    63  	errReason        error
    64  	machineLock      machinelock.Lock
    65  
    66  	preUpgradeSteps upgrades.PreUpgradeStepsFunc
    67  	upgradeComplete gate.Lock
    68  
    69  	prometheusRegistry *prometheus.Registry
    70  
    71  	configure func(*caasoperator.ManifoldsConfig) error
    72  }
    73  
    74  // NewCaasOperatorAgent creates a new CAASOperatorAgent instance properly initialized.
    75  func NewCaasOperatorAgent(
    76  	ctx *cmd.Context,
    77  	bufferedLogger *logsender.BufferedLogWriter,
    78  	configure func(*caasoperator.ManifoldsConfig) error,
    79  ) (*CaasOperatorAgent, error) {
    80  	prometheusRegistry, err := addons.NewPrometheusRegistry()
    81  	if err != nil {
    82  		return nil, errors.Trace(err)
    83  	}
    84  	return &CaasOperatorAgent{
    85  		AgentConf:          agentconf.NewAgentConf(""),
    86  		configChangedVal:   voyeur.NewValue(true),
    87  		ctx:                ctx,
    88  		dead:               make(chan struct{}),
    89  		bufferedLogger:     bufferedLogger,
    90  		prometheusRegistry: prometheusRegistry,
    91  		preUpgradeSteps:    upgrades.PreUpgradeSteps,
    92  		configure:          configure,
    93  	}, nil
    94  }
    95  
    96  // Info implements Command.
    97  func (op *CaasOperatorAgent) Info() *cmd.Info {
    98  	return jujucmd.Info(&cmd.Info{
    99  		Name:    "caasoperator",
   100  		Purpose: "run a juju CAAS Operator",
   101  	})
   102  }
   103  
   104  // SetFlags implements Command.
   105  func (op *CaasOperatorAgent) SetFlags(f *gnuflag.FlagSet) {
   106  	op.AgentConf.AddFlags(f)
   107  	f.StringVar(&op.ApplicationName, "application-name", "", "name of the application")
   108  }
   109  
   110  // Init initializes the command for running.
   111  func (op *CaasOperatorAgent) Init(args []string) error {
   112  	if op.ApplicationName == "" {
   113  		return agenterrors.RequiredError("application-name")
   114  	}
   115  	if !names.IsValidApplication(op.ApplicationName) {
   116  		return errors.Errorf(`--application-name option expects "<application>" argument`)
   117  	}
   118  	if err := op.AgentConf.CheckArgs(args); err != nil {
   119  		return err
   120  	}
   121  	op.runner = worker.NewRunner(worker.RunnerParams{
   122  		IsFatal:       agenterrors.IsFatal,
   123  		MoreImportant: agenterrors.MoreImportant,
   124  		RestartDelay:  jworker.RestartDelay,
   125  		Logger:        logger,
   126  	})
   127  	return nil
   128  }
   129  
   130  // Wait waits for the CaasOperator agent to finish.
   131  func (op *CaasOperatorAgent) Wait() error {
   132  	<-op.dead
   133  	return op.errReason
   134  }
   135  
   136  // Stop implements Worker.
   137  func (op *CaasOperatorAgent) Stop() error {
   138  	op.runner.Kill()
   139  	return op.Wait()
   140  }
   141  
   142  // Done signals the machine agent is finished
   143  func (op *CaasOperatorAgent) Done(err error) {
   144  	op.errReason = err
   145  	close(op.dead)
   146  }
   147  
   148  // maybeCopyAgentConfig copies the read-only agent config template
   149  // to the writeable agent config file if the file doesn't yet exist.
   150  func (op *CaasOperatorAgent) maybeCopyAgentConfig() error {
   151  	err := op.ReadConfig(op.Tag().String())
   152  	if err == nil {
   153  		return nil
   154  	}
   155  	if !os.IsNotExist(errors.Cause(err)) {
   156  		logger.Errorf("reading initial agent config file: %v", err)
   157  		return errors.Trace(err)
   158  	}
   159  	templateFile := filepath.Join(agent.Dir(op.DataDir(), op.Tag()), caasconstants.TemplateFileNameAgentConf)
   160  	if err := copyFile(agent.ConfigPath(op.DataDir(), op.Tag()), templateFile); err != nil {
   161  		logger.Errorf("copying agent config file template: %v", err)
   162  		return errors.Trace(err)
   163  	}
   164  	return op.ReadConfig(op.Tag().String())
   165  }
   166  
   167  func copyFile(dest, source string) error {
   168  	df, err := os.OpenFile(dest, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0660)
   169  	if err != nil {
   170  		return errors.Trace(err)
   171  	}
   172  	defer df.Close()
   173  
   174  	f, err := os.Open(source)
   175  	if err != nil {
   176  		return errors.Trace(err)
   177  	}
   178  	defer f.Close()
   179  
   180  	_, err = io.Copy(df, f)
   181  	return errors.Trace(err)
   182  }
   183  
   184  // Run implements Command.
   185  func (op *CaasOperatorAgent) Run(ctx *cmd.Context) (err error) {
   186  	defer op.Done(err)
   187  	if err := op.maybeCopyAgentConfig(); err != nil {
   188  		return errors.Annotate(err, "creating agent config from template")
   189  	}
   190  	agentConfig := op.CurrentConfig()
   191  	machineLock, err := machinelock.New(machinelock.Config{
   192  		AgentName:   op.Tag().String(),
   193  		Clock:       clock.WallClock,
   194  		Logger:      loggo.GetLogger("juju.machinelock"),
   195  		LogFilename: agent.MachineLockLogFilename(agentConfig),
   196  	})
   197  	// There will only be an error if the required configuration
   198  	// values are not passed in.
   199  	if err != nil {
   200  		return errors.Trace(err)
   201  	}
   202  	op.machineLock = machineLock
   203  	op.upgradeComplete = upgradesteps.NewLock(agentConfig)
   204  
   205  	logger.Infof("caas operator %v start (%s [%s])", op.Tag().String(), jujuversion.Current, runtime.Compiler)
   206  	if flags := featureflag.String(); flags != "" {
   207  		logger.Warningf("developer feature flags enabled: %s", flags)
   208  	}
   209  
   210  	_ = op.runner.StartWorker("api", op.Workers)
   211  	return cmdutil.AgentDone(logger, op.runner.Wait())
   212  }
   213  
   214  // Workers returns a dependency.Engine running the operator's responsibilities.
   215  func (op *CaasOperatorAgent) Workers() (worker.Worker, error) {
   216  	updateAgentConfLogging := func(loggingConfig string) error {
   217  		return op.AgentConf.ChangeConfig(func(setter agent.ConfigSetter) error {
   218  			setter.SetLoggingConfig(loggingConfig)
   219  			return nil
   220  		})
   221  	}
   222  
   223  	localHub := pubsub.NewSimpleHub(&pubsub.SimpleHubConfig{
   224  		Logger: loggo.GetLogger("juju.localhub"),
   225  	})
   226  	agentConfig := op.AgentConf.CurrentConfig()
   227  	manifoldConfig := caasoperator.ManifoldsConfig{
   228  		Agent:                agent.APIHostPortsSetter{Agent: op},
   229  		AgentConfigChanged:   op.configChangedVal,
   230  		Clock:                clock.WallClock,
   231  		LogSource:            op.bufferedLogger.Logs(),
   232  		UpdateLoggerConfig:   updateAgentConfLogging,
   233  		PrometheusRegisterer: op.prometheusRegistry,
   234  		LeadershipGuarantee:  15 * time.Second,
   235  		PreUpgradeSteps:      op.preUpgradeSteps,
   236  		UpgradeStepsLock:     op.upgradeComplete,
   237  		ValidateMigration:    op.validateMigration,
   238  		MachineLock:          op.machineLock,
   239  		PreviousAgentVersion: agentConfig.UpgradedToVersion(),
   240  		LocalHub:             localHub,
   241  	}
   242  	if op.configure != nil {
   243  		if err := op.configure(&manifoldConfig); err != nil {
   244  			return nil, errors.Trace(err)
   245  		}
   246  	}
   247  	manifolds := CaasOperatorManifolds(manifoldConfig)
   248  	metrics := engine.NewMetrics()
   249  	workerMetricsSink := metrics.ForModel(agentConfig.Model())
   250  	engine, err := dependency.NewEngine(engine.DependencyEngineConfig(workerMetricsSink))
   251  	if err != nil {
   252  		return nil, err
   253  	}
   254  	if err := dependency.Install(engine, manifolds); err != nil {
   255  		if err := worker.Stop(engine); err != nil {
   256  			logger.Errorf("while stopping engine with bad manifolds: %v", err)
   257  		}
   258  		return nil, err
   259  	}
   260  	if err := addons.StartIntrospection(addons.IntrospectionConfig{
   261  		AgentTag:           op.CurrentConfig().Tag(),
   262  		Engine:             engine,
   263  		MachineLock:        op.machineLock,
   264  		NewSocketName:      addons.DefaultIntrospectionSocketName,
   265  		PrometheusGatherer: op.prometheusRegistry,
   266  		WorkerFunc:         introspection.NewWorker,
   267  		Clock:              clock.WallClock,
   268  		LocalHub:           localHub,
   269  		// If the caas operator gains the ability to interact with the
   270  		// introspection worker, the introspection worker should be configured
   271  		// with a clock and hub. See the machine agent.
   272  	}); err != nil {
   273  		// If the introspection worker failed to start, we just log error
   274  		// but continue. It is very unlikely to happen in the real world
   275  		// as the only issue is connecting to the abstract domain socket
   276  		// and the agent is controlled by by the OS to only have one.
   277  		logger.Errorf("failed to start introspection worker: %v", err)
   278  	}
   279  	if err := addons.RegisterEngineMetrics(op.prometheusRegistry, metrics, engine, workerMetricsSink); err != nil {
   280  		// If the dependency engine metrics fail, continue on. This is unlikely
   281  		// to happen in the real world, but should't stop or bring down an
   282  		// agent.
   283  		logger.Errorf("failed to start the dependency engine metrics %v", err)
   284  	}
   285  	return engine, nil
   286  }
   287  
   288  // Tag implements Agent.
   289  func (op *CaasOperatorAgent) Tag() names.Tag {
   290  	return names.NewApplicationTag(op.ApplicationName)
   291  }
   292  
   293  // ChangeConfig implements Agent.
   294  func (op *CaasOperatorAgent) ChangeConfig(mutate agent.ConfigMutator) error {
   295  	err := op.AgentConf.ChangeConfig(mutate)
   296  	op.configChangedVal.Set(true)
   297  	return errors.Trace(err)
   298  }
   299  
   300  // validateMigration is called by the migrationminion to help check
   301  // that the agent will be ok when connected to a new controller.
   302  func (op *CaasOperatorAgent) validateMigration(apiCaller base.APICaller) error {
   303  	// TODO(wallyworld) - more extensive checks to come.
   304  	facade := apicaasoperator.NewClient(apiCaller)
   305  	_, err := facade.Life(op.ApplicationName)
   306  	if err != nil {
   307  		return errors.Trace(err)
   308  	}
   309  	model, err := facade.Model()
   310  	if err != nil {
   311  		return errors.Trace(err)
   312  	}
   313  	curModelUUID := op.CurrentConfig().Model().Id()
   314  	newModelUUID := model.UUID
   315  	if newModelUUID != curModelUUID {
   316  		return errors.Errorf("model mismatch when validating: got %q, expected %q",
   317  			newModelUUID, curModelUUID)
   318  	}
   319  	return nil
   320  }