github.com/juju/juju@v0.0.0-20240327075706-a90865de2538/worker/deployer/unit_agent.go (about) 1 // Copyright 2020 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package deployer 5 6 import ( 7 "sync" 8 "time" 9 10 "github.com/juju/clock" 11 "github.com/juju/errors" 12 "github.com/juju/loggo" 13 "github.com/juju/lumberjack/v2" 14 "github.com/juju/names/v5" 15 "github.com/juju/utils/v3/voyeur" 16 "github.com/juju/version/v2" 17 "github.com/juju/worker/v3" 18 "github.com/juju/worker/v3/dependency" 19 "github.com/prometheus/client_golang/prometheus" 20 21 "github.com/juju/juju/agent" 22 "github.com/juju/juju/agent/addons" 23 "github.com/juju/juju/agent/tools" 24 "github.com/juju/juju/api/agent/uniter" 25 "github.com/juju/juju/api/base" 26 "github.com/juju/juju/core/arch" 27 "github.com/juju/juju/core/machinelock" 28 coreos "github.com/juju/juju/core/os" 29 "github.com/juju/juju/core/paths" 30 jujuversion "github.com/juju/juju/version" 31 "github.com/juju/juju/worker/introspection" 32 "github.com/juju/juju/worker/logsender" 33 ) 34 35 // UnitAgent wraps the agent config for this unit. 36 type UnitAgent struct { 37 tag names.UnitTag 38 name string 39 clock clock.Clock 40 logger Logger 41 42 mu sync.Mutex 43 agentConf agent.ConfigSetterWriter 44 configChangedVal *voyeur.Value 45 46 setupLogging func(*loggo.Context, agent.Config) 47 unitEngineConfig func() dependency.EngineConfig 48 unitManifolds func(UnitManifoldsConfig) dependency.Manifolds 49 prometheusRegistry *prometheus.Registry 50 51 // Able to disable running units. 52 workerRunning bool 53 } 54 55 // UnitAgentConfig is a params struct with the values necessary to 56 // construct a working unit agent. 57 type UnitAgentConfig struct { 58 Name string 59 DataDir string 60 Clock clock.Clock 61 Logger Logger 62 UnitEngineConfig func() dependency.EngineConfig 63 UnitManifolds func(UnitManifoldsConfig) dependency.Manifolds 64 SetupLogging func(*loggo.Context, agent.Config) 65 } 66 67 // Validate ensures all the required values are set. 68 func (u *UnitAgentConfig) Validate() error { 69 if u.Name == "" { 70 return errors.NotValidf("missing Name") 71 } 72 if u.DataDir == "" { 73 return errors.NotValidf("missing DataDir") 74 } 75 if u.Clock == nil { 76 return errors.NotValidf("missing Clock") 77 } 78 if u.Logger == nil { 79 return errors.NotValidf("missing Logger") 80 } 81 if u.SetupLogging == nil { 82 return errors.NotValidf("missing SetupLogging") 83 } 84 if u.UnitEngineConfig == nil { 85 return errors.NotValidf("missing UnitEngineConfig") 86 } 87 if u.UnitManifolds == nil { 88 return errors.NotValidf("missing UnitManifolds") 89 } 90 return nil 91 } 92 93 // NewUnitAgent constructs an "agent" that is responsible for 94 // defining the workers for the unit and wraps access and updates 95 // to the agent.conf file for the unit. The method expects that there 96 // is an agent.conf file written in the <datadir>/agents/unit-<name> 97 // directory. It would be good to remove this need moving forwards 98 // and have unit agent logging overrides allowable in the machine 99 // agent config file. 100 func NewUnitAgent(config UnitAgentConfig) (*UnitAgent, error) { 101 if err := config.Validate(); err != nil { 102 return nil, errors.Trace(err) 103 } 104 105 // Create a symlink for the unit "agent" binaries. 106 // This is used because the uniter is still using the tools directory 107 // for the unit agent for creating the jujuc symlinks. 108 config.Logger.Tracef("creating symlink for %q to tools directory for jujuc", config.Name) 109 current := version.Binary{ 110 Number: jujuversion.Current, 111 Arch: arch.HostArch(), 112 Release: coreos.HostOSTypeName(), 113 } 114 tag := names.NewUnitTag(config.Name) 115 toolsDir := tools.ToolsDir(config.DataDir, tag.String()) 116 _, err := tools.ChangeAgentTools(config.DataDir, tag.String(), current) 117 defer removeOnErr(&err, config.Logger, toolsDir) 118 if err != nil { 119 // Any error here is indicative of a disk issue, potentially out of 120 // space or inodes. Either way, bouncing the deployer and having the 121 // exponential backoff enter play is the right decision. 122 return nil, errors.Trace(err) 123 } 124 125 config.Logger.Infof("creating new agent config for %q", config.Name) 126 conf, err := agent.ReadConfig(agent.ConfigPath(config.DataDir, tag)) 127 if err != nil { 128 return nil, errors.Trace(err) 129 } 130 prometheusRegistry, err := addons.NewPrometheusRegistry() 131 if err != nil { 132 return nil, errors.Trace(err) 133 } 134 unit := &UnitAgent{ 135 tag: tag, 136 name: config.Name, 137 clock: config.Clock, 138 logger: config.Logger, 139 agentConf: conf, 140 configChangedVal: voyeur.NewValue(true), 141 setupLogging: config.SetupLogging, 142 unitEngineConfig: config.UnitEngineConfig, 143 unitManifolds: config.UnitManifolds, 144 prometheusRegistry: prometheusRegistry, 145 } 146 // Update the 'upgradedToVersion' in the agent.conf file if it is 147 // different to the current version. 148 if conf.UpgradedToVersion() != jujuversion.Current { 149 if err := unit.ChangeConfig(func(setter agent.ConfigSetter) error { 150 setter.SetUpgradedToVersion(jujuversion.Current) 151 return nil 152 }); err != nil { 153 return nil, errors.Trace(err) 154 } 155 } 156 return unit, nil 157 } 158 159 func (a *UnitAgent) start() (worker.Worker, error) { 160 a.logger.Tracef("starting workers for %q", a.name) 161 loggingContext, bufferedLogger, closeLogging, err := a.initLogging() 162 if err != nil { 163 a.logger.Tracef("init logging failed %s", err) 164 return nil, errors.Trace(err) 165 } 166 167 updateAgentConfLogging := func(loggingConfig string) error { 168 return a.ChangeConfig(func(setter agent.ConfigSetter) error { 169 setter.SetLoggingConfig(loggingConfig) 170 return nil 171 }) 172 } 173 174 machineLock, err := machinelock.New(machinelock.Config{ 175 AgentName: a.tag.String(), 176 Clock: a.clock, 177 Logger: loggingContext.GetLogger("juju.machinelock"), 178 LogFilename: agent.MachineLockLogFilename(a.agentConf), 179 }) 180 // There will only be an error if the required configuration 181 // values are not passed in. 182 if err != nil { 183 a.logger.Tracef("creating machine lock failed %s", err) 184 return nil, errors.Trace(err) 185 } 186 187 // construct unit agent manifold 188 a.logger.Tracef("creating unit manifolds for %q", a.name) 189 manifolds := a.unitManifolds(UnitManifoldsConfig{ 190 LoggingContext: loggingContext, 191 Agent: a, 192 LogSource: bufferedLogger.Logs(), 193 LeadershipGuarantee: 30 * time.Second, 194 AgentConfigChanged: a.configChangedVal, 195 ValidateMigration: a.validateMigration, 196 UpdateLoggerConfig: updateAgentConfLogging, 197 MachineLock: machineLock, 198 Clock: a.clock, 199 }) 200 depEngineConfig := a.unitEngineConfig() 201 // TODO: tweak IsFatal error func, maybe? 202 depEngineConfig.Logger = loggingContext.GetLogger("juju.worker.dependency") 203 // Tweak as necessary. 204 engine, err := dependency.NewEngine(depEngineConfig) 205 if err != nil { 206 return nil, err 207 } 208 209 a.logger.Tracef("installing manifolds for %q", a.name) 210 if err := dependency.Install(engine, manifolds); err != nil { 211 if err := worker.Stop(engine); err != nil { 212 a.logger.Errorf("while stopping engine with bad manifolds: %v", err) 213 } 214 return nil, err 215 } 216 a.mu.Lock() 217 a.workerRunning = true 218 a.mu.Unlock() 219 go func() { 220 // Wait for the worker to finish, then mark not running. 221 _ = engine.Wait() 222 a.mu.Lock() 223 a.workerRunning = false 224 closeLogging() 225 a.mu.Unlock() 226 }() 227 if err := addons.StartIntrospection(addons.IntrospectionConfig{ 228 AgentTag: a.CurrentConfig().Tag(), 229 Engine: engine, 230 NewSocketName: addons.DefaultIntrospectionSocketName, 231 PrometheusGatherer: a.prometheusRegistry, 232 MachineLock: machineLock, 233 WorkerFunc: introspection.NewWorker, 234 }); err != nil { 235 // If the introspection worker failed to start, we just log error 236 // but continue. It is very unlikely to happen in the real world 237 // as the only issue is connecting to the abstract domain socket 238 // and the agent is controlled by by the OS to only have one. 239 a.logger.Errorf("failed to start introspection worker: %v", err) 240 } 241 a.logger.Tracef("engine for %q running", a.name) 242 return engine, nil 243 } 244 245 func (a *UnitAgent) running() bool { 246 a.mu.Lock() 247 defer a.mu.Unlock() 248 return a.workerRunning 249 } 250 251 func (a *UnitAgent) initLogging() (*loggo.Context, *logsender.BufferedLogWriter, func(), error) { 252 loggingContext := loggo.NewContext(loggo.INFO) 253 254 logFilename := agent.LogFilename(a.agentConf) 255 if err := paths.PrimeLogFile(logFilename); err != nil { 256 // This isn't a fatal error so log and continue if priming 257 // fails. 258 a.logger.Errorf("unable to prime %s (proceeding anyway): %v", logFilename, err) 259 } 260 ljLogger := &lumberjack.Logger{ 261 Filename: logFilename, // eg: "/var/log/juju/unit-mysql-0.log" 262 MaxSize: a.CurrentConfig().AgentLogfileMaxSizeMB(), 263 MaxBackups: a.CurrentConfig().AgentLogfileMaxBackups(), 264 Compress: true, 265 } 266 a.logger.Debugf("created rotating log file %q with max size %d MB and max backups %d", 267 ljLogger.Filename, ljLogger.MaxSize, ljLogger.MaxBackups) 268 if err := loggingContext.AddWriter( 269 "file", loggo.NewSimpleWriter(ljLogger, loggo.DefaultFormatter)); err != nil { 270 a.logger.Errorf("unable to configure file logging for unit %q: %v", a.name, err) 271 } 272 273 bufferedLogger, err := logsender.InstallBufferedLogWriter(loggingContext, 1048576) 274 if err != nil { 275 return nil, nil, nil, errors.Annotate(err, "unable to add buffered log writer") 276 } 277 278 closeLogging := func() { 279 if _, err = loggingContext.RemoveWriter("file"); err != nil { 280 a.logger.Errorf("%q remove writer: %s", a.name, err) 281 } 282 bufferedLogger.Close() 283 if err = ljLogger.Close(); err != nil { 284 a.logger.Errorf("%q lumberjack logger close: %s", a.name, err) 285 } 286 } 287 288 // Add line for starting agent to logging context. 289 loggingContext.GetLogger("juju").Infof("Starting unit workers for %q", a.name) 290 a.setupLogging(loggingContext, a.agentConf) 291 return loggingContext, bufferedLogger, closeLogging, nil 292 } 293 294 // ChangeConfig modifies this configuration using the given mutator. 295 func (a *UnitAgent) ChangeConfig(change agent.ConfigMutator) error { 296 a.mu.Lock() 297 defer a.mu.Unlock() 298 if err := change(a.agentConf); err != nil { 299 return errors.Trace(err) 300 } 301 if err := a.agentConf.Write(); err != nil { 302 return errors.Annotate(err, "cannot write agent configuration") 303 } 304 a.configChangedVal.Set(true) 305 return nil 306 } 307 308 // CurrentConfig returns the agent config for this agent. 309 func (a *UnitAgent) CurrentConfig() agent.Config { 310 a.mu.Lock() 311 defer a.mu.Unlock() 312 return a.agentConf.Clone() 313 } 314 315 // validateMigration is called by the migrationminion to help check 316 // that the agent will be ok when connected to a new controller. 317 func (a *UnitAgent) validateMigration(apiCaller base.APICaller) error { 318 // TODO(mjs) - more extensive checks to come. 319 facade := uniter.NewState(apiCaller, a.tag) 320 _, err := facade.Unit(a.tag) 321 if err != nil { 322 return errors.Trace(err) 323 } 324 model, err := facade.Model() 325 if err != nil { 326 return errors.Trace(err) 327 } 328 curModelUUID := a.CurrentConfig().Model().Id() 329 newModelUUID := model.UUID 330 if newModelUUID != curModelUUID { 331 return errors.Errorf("model mismatch when validating: got %q, expected %q", 332 newModelUUID, curModelUUID) 333 } 334 return nil 335 }