github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/cmd/jujud/agent/caasoperator.go (about) 1 // Copyright 2017 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package agent 5 6 import ( 7 "io" 8 "os" 9 "path/filepath" 10 "runtime" 11 "time" 12 13 "github.com/juju/clock" 14 "github.com/juju/cmd/v3" 15 "github.com/juju/errors" 16 "github.com/juju/featureflag" 17 "github.com/juju/gnuflag" 18 "github.com/juju/loggo" 19 "github.com/juju/names/v5" 20 "github.com/juju/pubsub/v2" 21 "github.com/juju/utils/v3/voyeur" 22 "github.com/juju/worker/v3" 23 "github.com/juju/worker/v3/dependency" 24 "github.com/prometheus/client_golang/prometheus" 25 26 "github.com/juju/juju/agent" 27 "github.com/juju/juju/agent/addons" 28 apicaasoperator "github.com/juju/juju/api/agent/caasoperator" 29 "github.com/juju/juju/api/base" 30 caasconstants "github.com/juju/juju/caas/kubernetes/provider/constants" 31 jujucmd "github.com/juju/juju/cmd" 32 "github.com/juju/juju/cmd/jujud/agent/agentconf" 33 "github.com/juju/juju/cmd/jujud/agent/caasoperator" 34 "github.com/juju/juju/cmd/jujud/agent/engine" 35 agenterrors "github.com/juju/juju/cmd/jujud/agent/errors" 36 cmdutil "github.com/juju/juju/cmd/jujud/util" 37 "github.com/juju/juju/core/machinelock" 38 "github.com/juju/juju/upgrades" 39 jujuversion "github.com/juju/juju/version" 40 jworker "github.com/juju/juju/worker" 41 "github.com/juju/juju/worker/gate" 42 "github.com/juju/juju/worker/introspection" 43 "github.com/juju/juju/worker/logsender" 44 "github.com/juju/juju/worker/upgradesteps" 45 ) 46 47 var ( 48 // Should be an explicit dependency, can't do it cleanly yet. 49 // Exported for testing. 50 CaasOperatorManifolds = caasoperator.Manifolds 51 ) 52 53 // CaasOperatorAgent is a cmd.Command responsible for running a CAAS operator agent. 54 type CaasOperatorAgent struct { 55 cmd.CommandBase 56 agentconf.AgentConf 57 configChangedVal *voyeur.Value 58 ApplicationName string 59 runner *worker.Runner 60 bufferedLogger *logsender.BufferedLogWriter 61 ctx *cmd.Context 62 dead chan struct{} 63 errReason error 64 machineLock machinelock.Lock 65 66 preUpgradeSteps upgrades.PreUpgradeStepsFunc 67 upgradeComplete gate.Lock 68 69 prometheusRegistry *prometheus.Registry 70 71 configure func(*caasoperator.ManifoldsConfig) error 72 } 73 74 // NewCaasOperatorAgent creates a new CAASOperatorAgent instance properly initialized. 75 func NewCaasOperatorAgent( 76 ctx *cmd.Context, 77 bufferedLogger *logsender.BufferedLogWriter, 78 configure func(*caasoperator.ManifoldsConfig) error, 79 ) (*CaasOperatorAgent, error) { 80 prometheusRegistry, err := addons.NewPrometheusRegistry() 81 if err != nil { 82 return nil, errors.Trace(err) 83 } 84 return &CaasOperatorAgent{ 85 AgentConf: agentconf.NewAgentConf(""), 86 configChangedVal: voyeur.NewValue(true), 87 ctx: ctx, 88 dead: make(chan struct{}), 89 bufferedLogger: bufferedLogger, 90 prometheusRegistry: prometheusRegistry, 91 preUpgradeSteps: upgrades.PreUpgradeSteps, 92 configure: configure, 93 }, nil 94 } 95 96 // Info implements Command. 97 func (op *CaasOperatorAgent) Info() *cmd.Info { 98 return jujucmd.Info(&cmd.Info{ 99 Name: "caasoperator", 100 Purpose: "run a juju CAAS Operator", 101 }) 102 } 103 104 // SetFlags implements Command. 105 func (op *CaasOperatorAgent) SetFlags(f *gnuflag.FlagSet) { 106 op.AgentConf.AddFlags(f) 107 f.StringVar(&op.ApplicationName, "application-name", "", "name of the application") 108 } 109 110 // Init initializes the command for running. 111 func (op *CaasOperatorAgent) Init(args []string) error { 112 if op.ApplicationName == "" { 113 return agenterrors.RequiredError("application-name") 114 } 115 if !names.IsValidApplication(op.ApplicationName) { 116 return errors.Errorf(`--application-name option expects "<application>" argument`) 117 } 118 if err := op.AgentConf.CheckArgs(args); err != nil { 119 return err 120 } 121 op.runner = worker.NewRunner(worker.RunnerParams{ 122 IsFatal: agenterrors.IsFatal, 123 MoreImportant: agenterrors.MoreImportant, 124 RestartDelay: jworker.RestartDelay, 125 Logger: logger, 126 }) 127 return nil 128 } 129 130 // Wait waits for the CaasOperator agent to finish. 131 func (op *CaasOperatorAgent) Wait() error { 132 <-op.dead 133 return op.errReason 134 } 135 136 // Stop implements Worker. 137 func (op *CaasOperatorAgent) Stop() error { 138 op.runner.Kill() 139 return op.Wait() 140 } 141 142 // Done signals the machine agent is finished 143 func (op *CaasOperatorAgent) Done(err error) { 144 op.errReason = err 145 close(op.dead) 146 } 147 148 // maybeCopyAgentConfig copies the read-only agent config template 149 // to the writeable agent config file if the file doesn't yet exist. 150 func (op *CaasOperatorAgent) maybeCopyAgentConfig() error { 151 err := op.ReadConfig(op.Tag().String()) 152 if err == nil { 153 return nil 154 } 155 if !os.IsNotExist(errors.Cause(err)) { 156 logger.Errorf("reading initial agent config file: %v", err) 157 return errors.Trace(err) 158 } 159 templateFile := filepath.Join(agent.Dir(op.DataDir(), op.Tag()), caasconstants.TemplateFileNameAgentConf) 160 if err := copyFile(agent.ConfigPath(op.DataDir(), op.Tag()), templateFile); err != nil { 161 logger.Errorf("copying agent config file template: %v", err) 162 return errors.Trace(err) 163 } 164 return op.ReadConfig(op.Tag().String()) 165 } 166 167 func copyFile(dest, source string) error { 168 df, err := os.OpenFile(dest, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0660) 169 if err != nil { 170 return errors.Trace(err) 171 } 172 defer df.Close() 173 174 f, err := os.Open(source) 175 if err != nil { 176 return errors.Trace(err) 177 } 178 defer f.Close() 179 180 _, err = io.Copy(df, f) 181 return errors.Trace(err) 182 } 183 184 // Run implements Command. 185 func (op *CaasOperatorAgent) Run(ctx *cmd.Context) (err error) { 186 defer op.Done(err) 187 if err := op.maybeCopyAgentConfig(); err != nil { 188 return errors.Annotate(err, "creating agent config from template") 189 } 190 agentConfig := op.CurrentConfig() 191 machineLock, err := machinelock.New(machinelock.Config{ 192 AgentName: op.Tag().String(), 193 Clock: clock.WallClock, 194 Logger: loggo.GetLogger("juju.machinelock"), 195 LogFilename: agent.MachineLockLogFilename(agentConfig), 196 }) 197 // There will only be an error if the required configuration 198 // values are not passed in. 199 if err != nil { 200 return errors.Trace(err) 201 } 202 op.machineLock = machineLock 203 op.upgradeComplete = upgradesteps.NewLock(agentConfig) 204 205 logger.Infof("caas operator %v start (%s [%s])", op.Tag().String(), jujuversion.Current, runtime.Compiler) 206 if flags := featureflag.String(); flags != "" { 207 logger.Warningf("developer feature flags enabled: %s", flags) 208 } 209 210 _ = op.runner.StartWorker("api", op.Workers) 211 return cmdutil.AgentDone(logger, op.runner.Wait()) 212 } 213 214 // Workers returns a dependency.Engine running the operator's responsibilities. 215 func (op *CaasOperatorAgent) Workers() (worker.Worker, error) { 216 updateAgentConfLogging := func(loggingConfig string) error { 217 return op.AgentConf.ChangeConfig(func(setter agent.ConfigSetter) error { 218 setter.SetLoggingConfig(loggingConfig) 219 return nil 220 }) 221 } 222 223 localHub := pubsub.NewSimpleHub(&pubsub.SimpleHubConfig{ 224 Logger: loggo.GetLogger("juju.localhub"), 225 }) 226 agentConfig := op.AgentConf.CurrentConfig() 227 manifoldConfig := caasoperator.ManifoldsConfig{ 228 Agent: agent.APIHostPortsSetter{Agent: op}, 229 AgentConfigChanged: op.configChangedVal, 230 Clock: clock.WallClock, 231 LogSource: op.bufferedLogger.Logs(), 232 UpdateLoggerConfig: updateAgentConfLogging, 233 PrometheusRegisterer: op.prometheusRegistry, 234 LeadershipGuarantee: 15 * time.Second, 235 PreUpgradeSteps: op.preUpgradeSteps, 236 UpgradeStepsLock: op.upgradeComplete, 237 ValidateMigration: op.validateMigration, 238 MachineLock: op.machineLock, 239 PreviousAgentVersion: agentConfig.UpgradedToVersion(), 240 LocalHub: localHub, 241 } 242 if op.configure != nil { 243 if err := op.configure(&manifoldConfig); err != nil { 244 return nil, errors.Trace(err) 245 } 246 } 247 manifolds := CaasOperatorManifolds(manifoldConfig) 248 metrics := engine.NewMetrics() 249 workerMetricsSink := metrics.ForModel(agentConfig.Model()) 250 engine, err := dependency.NewEngine(engine.DependencyEngineConfig(workerMetricsSink)) 251 if err != nil { 252 return nil, err 253 } 254 if err := dependency.Install(engine, manifolds); err != nil { 255 if err := worker.Stop(engine); err != nil { 256 logger.Errorf("while stopping engine with bad manifolds: %v", err) 257 } 258 return nil, err 259 } 260 if err := addons.StartIntrospection(addons.IntrospectionConfig{ 261 AgentTag: op.CurrentConfig().Tag(), 262 Engine: engine, 263 MachineLock: op.machineLock, 264 NewSocketName: addons.DefaultIntrospectionSocketName, 265 PrometheusGatherer: op.prometheusRegistry, 266 WorkerFunc: introspection.NewWorker, 267 Clock: clock.WallClock, 268 LocalHub: localHub, 269 // If the caas operator gains the ability to interact with the 270 // introspection worker, the introspection worker should be configured 271 // with a clock and hub. See the machine agent. 272 }); err != nil { 273 // If the introspection worker failed to start, we just log error 274 // but continue. It is very unlikely to happen in the real world 275 // as the only issue is connecting to the abstract domain socket 276 // and the agent is controlled by by the OS to only have one. 277 logger.Errorf("failed to start introspection worker: %v", err) 278 } 279 if err := addons.RegisterEngineMetrics(op.prometheusRegistry, metrics, engine, workerMetricsSink); err != nil { 280 // If the dependency engine metrics fail, continue on. This is unlikely 281 // to happen in the real world, but should't stop or bring down an 282 // agent. 283 logger.Errorf("failed to start the dependency engine metrics %v", err) 284 } 285 return engine, nil 286 } 287 288 // Tag implements Agent. 289 func (op *CaasOperatorAgent) Tag() names.Tag { 290 return names.NewApplicationTag(op.ApplicationName) 291 } 292 293 // ChangeConfig implements Agent. 294 func (op *CaasOperatorAgent) ChangeConfig(mutate agent.ConfigMutator) error { 295 err := op.AgentConf.ChangeConfig(mutate) 296 op.configChangedVal.Set(true) 297 return errors.Trace(err) 298 } 299 300 // validateMigration is called by the migrationminion to help check 301 // that the agent will be ok when connected to a new controller. 302 func (op *CaasOperatorAgent) validateMigration(apiCaller base.APICaller) error { 303 // TODO(wallyworld) - more extensive checks to come. 304 facade := apicaasoperator.NewClient(apiCaller) 305 _, err := facade.Life(op.ApplicationName) 306 if err != nil { 307 return errors.Trace(err) 308 } 309 model, err := facade.Model() 310 if err != nil { 311 return errors.Trace(err) 312 } 313 curModelUUID := op.CurrentConfig().Model().Id() 314 newModelUUID := model.UUID 315 if newModelUUID != curModelUUID { 316 return errors.Errorf("model mismatch when validating: got %q, expected %q", 317 newModelUUID, curModelUUID) 318 } 319 return nil 320 }