github.com/juju/juju@v0.0.0-20240327075706-a90865de2538/worker/deployer/unit_agent.go (about)

     1  // Copyright 2020 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package deployer
     5  
     6  import (
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/juju/clock"
    11  	"github.com/juju/errors"
    12  	"github.com/juju/loggo"
    13  	"github.com/juju/lumberjack/v2"
    14  	"github.com/juju/names/v5"
    15  	"github.com/juju/utils/v3/voyeur"
    16  	"github.com/juju/version/v2"
    17  	"github.com/juju/worker/v3"
    18  	"github.com/juju/worker/v3/dependency"
    19  	"github.com/prometheus/client_golang/prometheus"
    20  
    21  	"github.com/juju/juju/agent"
    22  	"github.com/juju/juju/agent/addons"
    23  	"github.com/juju/juju/agent/tools"
    24  	"github.com/juju/juju/api/agent/uniter"
    25  	"github.com/juju/juju/api/base"
    26  	"github.com/juju/juju/core/arch"
    27  	"github.com/juju/juju/core/machinelock"
    28  	coreos "github.com/juju/juju/core/os"
    29  	"github.com/juju/juju/core/paths"
    30  	jujuversion "github.com/juju/juju/version"
    31  	"github.com/juju/juju/worker/introspection"
    32  	"github.com/juju/juju/worker/logsender"
    33  )
    34  
    35  // UnitAgent wraps the agent config for this unit.
    36  type UnitAgent struct {
    37  	tag    names.UnitTag
    38  	name   string
    39  	clock  clock.Clock
    40  	logger Logger
    41  
    42  	mu               sync.Mutex
    43  	agentConf        agent.ConfigSetterWriter
    44  	configChangedVal *voyeur.Value
    45  
    46  	setupLogging       func(*loggo.Context, agent.Config)
    47  	unitEngineConfig   func() dependency.EngineConfig
    48  	unitManifolds      func(UnitManifoldsConfig) dependency.Manifolds
    49  	prometheusRegistry *prometheus.Registry
    50  
    51  	// Able to disable running units.
    52  	workerRunning bool
    53  }
    54  
    55  // UnitAgentConfig is a params struct with the values necessary to
    56  // construct a working unit agent.
    57  type UnitAgentConfig struct {
    58  	Name             string
    59  	DataDir          string
    60  	Clock            clock.Clock
    61  	Logger           Logger
    62  	UnitEngineConfig func() dependency.EngineConfig
    63  	UnitManifolds    func(UnitManifoldsConfig) dependency.Manifolds
    64  	SetupLogging     func(*loggo.Context, agent.Config)
    65  }
    66  
    67  // Validate ensures all the required values are set.
    68  func (u *UnitAgentConfig) Validate() error {
    69  	if u.Name == "" {
    70  		return errors.NotValidf("missing Name")
    71  	}
    72  	if u.DataDir == "" {
    73  		return errors.NotValidf("missing DataDir")
    74  	}
    75  	if u.Clock == nil {
    76  		return errors.NotValidf("missing Clock")
    77  	}
    78  	if u.Logger == nil {
    79  		return errors.NotValidf("missing Logger")
    80  	}
    81  	if u.SetupLogging == nil {
    82  		return errors.NotValidf("missing SetupLogging")
    83  	}
    84  	if u.UnitEngineConfig == nil {
    85  		return errors.NotValidf("missing UnitEngineConfig")
    86  	}
    87  	if u.UnitManifolds == nil {
    88  		return errors.NotValidf("missing UnitManifolds")
    89  	}
    90  	return nil
    91  }
    92  
    93  // NewUnitAgent constructs an "agent" that is responsible for
    94  // defining the workers for the unit and wraps access and updates
    95  // to the agent.conf file for the unit. The method expects that there
    96  // is an agent.conf file written in the <datadir>/agents/unit-<name>
    97  // directory. It would be good to remove this need moving forwards
    98  // and have unit agent logging overrides allowable in the machine
    99  // agent config file.
   100  func NewUnitAgent(config UnitAgentConfig) (*UnitAgent, error) {
   101  	if err := config.Validate(); err != nil {
   102  		return nil, errors.Trace(err)
   103  	}
   104  
   105  	// Create a symlink for the unit "agent" binaries.
   106  	// This is used because the uniter is still using the tools directory
   107  	// for the unit agent for creating the jujuc symlinks.
   108  	config.Logger.Tracef("creating symlink for %q to tools directory for jujuc", config.Name)
   109  	current := version.Binary{
   110  		Number:  jujuversion.Current,
   111  		Arch:    arch.HostArch(),
   112  		Release: coreos.HostOSTypeName(),
   113  	}
   114  	tag := names.NewUnitTag(config.Name)
   115  	toolsDir := tools.ToolsDir(config.DataDir, tag.String())
   116  	_, err := tools.ChangeAgentTools(config.DataDir, tag.String(), current)
   117  	defer removeOnErr(&err, config.Logger, toolsDir)
   118  	if err != nil {
   119  		// Any error here is indicative of a disk issue, potentially out of
   120  		// space or inodes. Either way, bouncing the deployer and having the
   121  		// exponential backoff enter play is the right decision.
   122  		return nil, errors.Trace(err)
   123  	}
   124  
   125  	config.Logger.Infof("creating new agent config for %q", config.Name)
   126  	conf, err := agent.ReadConfig(agent.ConfigPath(config.DataDir, tag))
   127  	if err != nil {
   128  		return nil, errors.Trace(err)
   129  	}
   130  	prometheusRegistry, err := addons.NewPrometheusRegistry()
   131  	if err != nil {
   132  		return nil, errors.Trace(err)
   133  	}
   134  	unit := &UnitAgent{
   135  		tag:                tag,
   136  		name:               config.Name,
   137  		clock:              config.Clock,
   138  		logger:             config.Logger,
   139  		agentConf:          conf,
   140  		configChangedVal:   voyeur.NewValue(true),
   141  		setupLogging:       config.SetupLogging,
   142  		unitEngineConfig:   config.UnitEngineConfig,
   143  		unitManifolds:      config.UnitManifolds,
   144  		prometheusRegistry: prometheusRegistry,
   145  	}
   146  	// Update the 'upgradedToVersion' in the agent.conf file if it is
   147  	// different to the current version.
   148  	if conf.UpgradedToVersion() != jujuversion.Current {
   149  		if err := unit.ChangeConfig(func(setter agent.ConfigSetter) error {
   150  			setter.SetUpgradedToVersion(jujuversion.Current)
   151  			return nil
   152  		}); err != nil {
   153  			return nil, errors.Trace(err)
   154  		}
   155  	}
   156  	return unit, nil
   157  }
   158  
   159  func (a *UnitAgent) start() (worker.Worker, error) {
   160  	a.logger.Tracef("starting workers for %q", a.name)
   161  	loggingContext, bufferedLogger, closeLogging, err := a.initLogging()
   162  	if err != nil {
   163  		a.logger.Tracef("init logging failed %s", err)
   164  		return nil, errors.Trace(err)
   165  	}
   166  
   167  	updateAgentConfLogging := func(loggingConfig string) error {
   168  		return a.ChangeConfig(func(setter agent.ConfigSetter) error {
   169  			setter.SetLoggingConfig(loggingConfig)
   170  			return nil
   171  		})
   172  	}
   173  
   174  	machineLock, err := machinelock.New(machinelock.Config{
   175  		AgentName:   a.tag.String(),
   176  		Clock:       a.clock,
   177  		Logger:      loggingContext.GetLogger("juju.machinelock"),
   178  		LogFilename: agent.MachineLockLogFilename(a.agentConf),
   179  	})
   180  	// There will only be an error if the required configuration
   181  	// values are not passed in.
   182  	if err != nil {
   183  		a.logger.Tracef("creating machine lock failed %s", err)
   184  		return nil, errors.Trace(err)
   185  	}
   186  
   187  	// construct unit agent manifold
   188  	a.logger.Tracef("creating unit manifolds for %q", a.name)
   189  	manifolds := a.unitManifolds(UnitManifoldsConfig{
   190  		LoggingContext:      loggingContext,
   191  		Agent:               a,
   192  		LogSource:           bufferedLogger.Logs(),
   193  		LeadershipGuarantee: 30 * time.Second,
   194  		AgentConfigChanged:  a.configChangedVal,
   195  		ValidateMigration:   a.validateMigration,
   196  		UpdateLoggerConfig:  updateAgentConfLogging,
   197  		MachineLock:         machineLock,
   198  		Clock:               a.clock,
   199  	})
   200  	depEngineConfig := a.unitEngineConfig()
   201  	// TODO: tweak IsFatal error func, maybe?
   202  	depEngineConfig.Logger = loggingContext.GetLogger("juju.worker.dependency")
   203  	// Tweak as necessary.
   204  	engine, err := dependency.NewEngine(depEngineConfig)
   205  	if err != nil {
   206  		return nil, err
   207  	}
   208  
   209  	a.logger.Tracef("installing manifolds for %q", a.name)
   210  	if err := dependency.Install(engine, manifolds); err != nil {
   211  		if err := worker.Stop(engine); err != nil {
   212  			a.logger.Errorf("while stopping engine with bad manifolds: %v", err)
   213  		}
   214  		return nil, err
   215  	}
   216  	a.mu.Lock()
   217  	a.workerRunning = true
   218  	a.mu.Unlock()
   219  	go func() {
   220  		// Wait for the worker to finish, then mark not running.
   221  		_ = engine.Wait()
   222  		a.mu.Lock()
   223  		a.workerRunning = false
   224  		closeLogging()
   225  		a.mu.Unlock()
   226  	}()
   227  	if err := addons.StartIntrospection(addons.IntrospectionConfig{
   228  		AgentTag:           a.CurrentConfig().Tag(),
   229  		Engine:             engine,
   230  		NewSocketName:      addons.DefaultIntrospectionSocketName,
   231  		PrometheusGatherer: a.prometheusRegistry,
   232  		MachineLock:        machineLock,
   233  		WorkerFunc:         introspection.NewWorker,
   234  	}); err != nil {
   235  		// If the introspection worker failed to start, we just log error
   236  		// but continue. It is very unlikely to happen in the real world
   237  		// as the only issue is connecting to the abstract domain socket
   238  		// and the agent is controlled by by the OS to only have one.
   239  		a.logger.Errorf("failed to start introspection worker: %v", err)
   240  	}
   241  	a.logger.Tracef("engine for %q running", a.name)
   242  	return engine, nil
   243  }
   244  
   245  func (a *UnitAgent) running() bool {
   246  	a.mu.Lock()
   247  	defer a.mu.Unlock()
   248  	return a.workerRunning
   249  }
   250  
   251  func (a *UnitAgent) initLogging() (*loggo.Context, *logsender.BufferedLogWriter, func(), error) {
   252  	loggingContext := loggo.NewContext(loggo.INFO)
   253  
   254  	logFilename := agent.LogFilename(a.agentConf)
   255  	if err := paths.PrimeLogFile(logFilename); err != nil {
   256  		// This isn't a fatal error so log and continue if priming
   257  		// fails.
   258  		a.logger.Errorf("unable to prime %s (proceeding anyway): %v", logFilename, err)
   259  	}
   260  	ljLogger := &lumberjack.Logger{
   261  		Filename:   logFilename, // eg: "/var/log/juju/unit-mysql-0.log"
   262  		MaxSize:    a.CurrentConfig().AgentLogfileMaxSizeMB(),
   263  		MaxBackups: a.CurrentConfig().AgentLogfileMaxBackups(),
   264  		Compress:   true,
   265  	}
   266  	a.logger.Debugf("created rotating log file %q with max size %d MB and max backups %d",
   267  		ljLogger.Filename, ljLogger.MaxSize, ljLogger.MaxBackups)
   268  	if err := loggingContext.AddWriter(
   269  		"file", loggo.NewSimpleWriter(ljLogger, loggo.DefaultFormatter)); err != nil {
   270  		a.logger.Errorf("unable to configure file logging for unit %q: %v", a.name, err)
   271  	}
   272  
   273  	bufferedLogger, err := logsender.InstallBufferedLogWriter(loggingContext, 1048576)
   274  	if err != nil {
   275  		return nil, nil, nil, errors.Annotate(err, "unable to add buffered log writer")
   276  	}
   277  
   278  	closeLogging := func() {
   279  		if _, err = loggingContext.RemoveWriter("file"); err != nil {
   280  			a.logger.Errorf("%q remove writer: %s", a.name, err)
   281  		}
   282  		bufferedLogger.Close()
   283  		if err = ljLogger.Close(); err != nil {
   284  			a.logger.Errorf("%q lumberjack logger close: %s", a.name, err)
   285  		}
   286  	}
   287  
   288  	// Add line for starting agent to logging context.
   289  	loggingContext.GetLogger("juju").Infof("Starting unit workers for %q", a.name)
   290  	a.setupLogging(loggingContext, a.agentConf)
   291  	return loggingContext, bufferedLogger, closeLogging, nil
   292  }
   293  
   294  // ChangeConfig modifies this configuration using the given mutator.
   295  func (a *UnitAgent) ChangeConfig(change agent.ConfigMutator) error {
   296  	a.mu.Lock()
   297  	defer a.mu.Unlock()
   298  	if err := change(a.agentConf); err != nil {
   299  		return errors.Trace(err)
   300  	}
   301  	if err := a.agentConf.Write(); err != nil {
   302  		return errors.Annotate(err, "cannot write agent configuration")
   303  	}
   304  	a.configChangedVal.Set(true)
   305  	return nil
   306  }
   307  
   308  // CurrentConfig returns the agent config for this agent.
   309  func (a *UnitAgent) CurrentConfig() agent.Config {
   310  	a.mu.Lock()
   311  	defer a.mu.Unlock()
   312  	return a.agentConf.Clone()
   313  }
   314  
   315  // validateMigration is called by the migrationminion to help check
   316  // that the agent will be ok when connected to a new controller.
   317  func (a *UnitAgent) validateMigration(apiCaller base.APICaller) error {
   318  	// TODO(mjs) - more extensive checks to come.
   319  	facade := uniter.NewState(apiCaller, a.tag)
   320  	_, err := facade.Unit(a.tag)
   321  	if err != nil {
   322  		return errors.Trace(err)
   323  	}
   324  	model, err := facade.Model()
   325  	if err != nil {
   326  		return errors.Trace(err)
   327  	}
   328  	curModelUUID := a.CurrentConfig().Model().Id()
   329  	newModelUUID := model.UUID
   330  	if newModelUUID != curModelUUID {
   331  		return errors.Errorf("model mismatch when validating: got %q, expected %q",
   332  			newModelUUID, curModelUUID)
   333  	}
   334  	return nil
   335  }