github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/cmd/jujud/agent/machine.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package agent
     5  
     6  import (
     7  	"fmt"
     8  	"net/http"
     9  	"os"
    10  	"path/filepath"
    11  	"strconv"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/juju/clock"
    16  	"github.com/juju/cmd"
    17  	"github.com/juju/collections/set"
    18  	"github.com/juju/errors"
    19  	"github.com/juju/gnuflag"
    20  	"github.com/juju/loggo"
    21  	"github.com/juju/os/series"
    22  	"github.com/juju/pubsub"
    23  	"github.com/juju/replicaset"
    24  	"github.com/juju/utils"
    25  	utilscert "github.com/juju/utils/cert"
    26  	"github.com/juju/utils/symlink"
    27  	"github.com/juju/utils/voyeur"
    28  	"github.com/juju/version"
    29  	"github.com/prometheus/client_golang/prometheus"
    30  	"gopkg.in/juju/charmrepo.v3"
    31  	"gopkg.in/juju/names.v2"
    32  	"gopkg.in/juju/worker.v1"
    33  	"gopkg.in/juju/worker.v1/dependency"
    34  	"gopkg.in/mgo.v2"
    35  	"gopkg.in/natefinch/lumberjack.v2"
    36  
    37  	"github.com/juju/juju/agent"
    38  	"github.com/juju/juju/agent/tools"
    39  	"github.com/juju/juju/api"
    40  	apiagent "github.com/juju/juju/api/agent"
    41  	"github.com/juju/juju/api/base"
    42  	apideployer "github.com/juju/juju/api/deployer"
    43  	apimachiner "github.com/juju/juju/api/machiner"
    44  	apiprovisioner "github.com/juju/juju/api/provisioner"
    45  	"github.com/juju/juju/apiserver/params"
    46  	"github.com/juju/juju/caas"
    47  	"github.com/juju/juju/cert"
    48  	jujucmd "github.com/juju/juju/cmd"
    49  	"github.com/juju/juju/cmd/jujud/agent/machine"
    50  	"github.com/juju/juju/cmd/jujud/agent/model"
    51  	"github.com/juju/juju/cmd/jujud/reboot"
    52  	cmdutil "github.com/juju/juju/cmd/jujud/util"
    53  	"github.com/juju/juju/container"
    54  	"github.com/juju/juju/container/kvm"
    55  	"github.com/juju/juju/core/instance"
    56  	"github.com/juju/juju/core/machinelock"
    57  	"github.com/juju/juju/core/presence"
    58  	"github.com/juju/juju/core/watcher"
    59  	"github.com/juju/juju/environs"
    60  	jujunames "github.com/juju/juju/juju/names"
    61  	"github.com/juju/juju/juju/paths"
    62  	"github.com/juju/juju/mongo"
    63  	"github.com/juju/juju/mongo/mongometrics"
    64  	"github.com/juju/juju/pubsub/centralhub"
    65  	"github.com/juju/juju/service"
    66  	"github.com/juju/juju/service/common"
    67  	"github.com/juju/juju/state"
    68  	"github.com/juju/juju/state/multiwatcher"
    69  	"github.com/juju/juju/state/stateenvirons"
    70  	"github.com/juju/juju/storage/looputil"
    71  	"github.com/juju/juju/upgrades"
    72  	jworker "github.com/juju/juju/worker"
    73  	workercommon "github.com/juju/juju/worker/common"
    74  	"github.com/juju/juju/worker/conv2state"
    75  	"github.com/juju/juju/worker/deployer"
    76  	"github.com/juju/juju/worker/gate"
    77  	"github.com/juju/juju/worker/introspection"
    78  	"github.com/juju/juju/worker/logsender"
    79  	"github.com/juju/juju/worker/logsender/logsendermetrics"
    80  	"github.com/juju/juju/worker/migrationmaster"
    81  	"github.com/juju/juju/worker/provisioner"
    82  	psworker "github.com/juju/juju/worker/pubsub"
    83  	"github.com/juju/juju/worker/upgradesteps"
    84  )
    85  
    86  var (
    87  	logger           = loggo.GetLogger("juju.cmd.jujud")
    88  	jujuRun          = paths.MustSucceed(paths.JujuRun(series.MustHostSeries()))
    89  	jujuDumpLogs     = paths.MustSucceed(paths.JujuDumpLogs(series.MustHostSeries()))
    90  	jujuIntrospect   = paths.MustSucceed(paths.JujuIntrospect(series.MustHostSeries()))
    91  	jujuUpdateSeries = paths.MustSucceed(paths.JujuUpdateSeries(series.MustHostSeries()))
    92  	jujudSymlinks    = []string{jujuRun, jujuDumpLogs, jujuIntrospect, jujuUpdateSeries}
    93  
    94  	// The following are defined as variables to allow the tests to
    95  	// intercept calls to the functions. In every case, they should
    96  	// be expressed as explicit dependencies, but nobody has yet had
    97  	// the intestinal fortitude to untangle this package. Be that
    98  	// person! Juju Needs You.
    99  	useMultipleCPUs   = utils.UseMultipleCPUs
   100  	reportOpenedState = func(*state.State) {}
   101  
   102  	caasModelManifolds = model.CAASManifolds
   103  	iaasModelManifolds = model.IAASManifolds
   104  	machineManifolds   = machine.Manifolds
   105  )
   106  
   107  // Variable to override in tests, default is true
   108  var ProductionMongoWriteConcern = true
   109  
   110  func init() {
   111  	stateWorkerDialOpts = mongo.DefaultDialOpts()
   112  	stateWorkerDialOpts.PostDial = func(session *mgo.Session) error {
   113  		safe := mgo.Safe{}
   114  		if ProductionMongoWriteConcern {
   115  			safe.J = true
   116  			_, err := replicaset.CurrentConfig(session)
   117  			if err == nil {
   118  				// set mongo to write-majority (writes only returned after
   119  				// replicated to a majority of replica-set members).
   120  				safe.WMode = "majority"
   121  			}
   122  		}
   123  		session.SetSafe(&safe)
   124  		return nil
   125  	}
   126  }
   127  
   128  // AgentInitializer handles initializing a type for use as a Jujud
   129  // agent.
   130  type AgentInitializer interface {
   131  	AddFlags(*gnuflag.FlagSet)
   132  	CheckArgs([]string) error
   133  }
   134  
   135  // AgentConfigWriter encapsulates disk I/O operations with the agent
   136  // config.
   137  type AgentConfigWriter interface {
   138  	// ReadConfig reads the config for the given tag from disk.
   139  	ReadConfig(tag string) error
   140  	// ChangeConfig executes the given agent.ConfigMutator in a
   141  	// thread-safe context.
   142  	ChangeConfig(agent.ConfigMutator) error
   143  	// CurrentConfig returns a copy of the in-memory agent config.
   144  	CurrentConfig() agent.Config
   145  }
   146  
   147  // NewMachineAgentCmd creates a Command which handles parsing
   148  // command-line arguments and instantiating and running a
   149  // MachineAgent.
   150  func NewMachineAgentCmd(
   151  	ctx *cmd.Context,
   152  	machineAgentFactory func(string) (*MachineAgent, error),
   153  	agentInitializer AgentInitializer,
   154  	configFetcher AgentConfigWriter,
   155  ) cmd.Command {
   156  	return &machineAgentCmd{
   157  		ctx:                 ctx,
   158  		machineAgentFactory: machineAgentFactory,
   159  		agentInitializer:    agentInitializer,
   160  		currentConfig:       configFetcher,
   161  	}
   162  }
   163  
   164  type machineAgentCmd struct {
   165  	cmd.CommandBase
   166  
   167  	// This group of arguments is required.
   168  	agentInitializer    AgentInitializer
   169  	currentConfig       AgentConfigWriter
   170  	machineAgentFactory func(string) (*MachineAgent, error)
   171  	ctx                 *cmd.Context
   172  
   173  	// This group is for debugging purposes.
   174  	logToStdErr bool
   175  
   176  	// The following are set via command-line flags.
   177  	machineId string
   178  }
   179  
   180  // Init is called by the cmd system to initialize the structure for
   181  // running.
   182  func (a *machineAgentCmd) Init(args []string) error {
   183  
   184  	if !names.IsValidMachine(a.machineId) {
   185  		return errors.Errorf("--machine-id option must be set, and expects a non-negative integer")
   186  	}
   187  	if err := a.agentInitializer.CheckArgs(args); err != nil {
   188  		return err
   189  	}
   190  
   191  	// Due to changes in the logging, and needing to care about old
   192  	// models that have been upgraded, we need to explicitly remove the
   193  	// file writer if one has been added, otherwise we will get duplicate
   194  	// lines of all logging in the log file.
   195  	loggo.RemoveWriter("logfile")
   196  
   197  	if a.logToStdErr {
   198  		return nil
   199  	}
   200  
   201  	err := a.currentConfig.ReadConfig(names.NewMachineTag(a.machineId).String())
   202  	if err != nil {
   203  		return errors.Annotate(err, "cannot read agent configuration")
   204  	}
   205  
   206  	config := a.currentConfig.CurrentConfig()
   207  	// the context's stderr is set as the loggo writer in github.com/juju/cmd/logging.go
   208  	a.ctx.Stderr = &lumberjack.Logger{
   209  		Filename:   agent.LogFilename(config),
   210  		MaxSize:    300, // megabytes
   211  		MaxBackups: 2,
   212  		Compress:   true,
   213  	}
   214  
   215  	return nil
   216  }
   217  
   218  // Run instantiates a MachineAgent and runs it.
   219  func (a *machineAgentCmd) Run(c *cmd.Context) error {
   220  	machineAgent, err := a.machineAgentFactory(a.machineId)
   221  	if err != nil {
   222  		return errors.Trace(err)
   223  	}
   224  	return machineAgent.Run(c)
   225  }
   226  
   227  // SetFlags adds the requisite flags to run this command.
   228  func (a *machineAgentCmd) SetFlags(f *gnuflag.FlagSet) {
   229  	a.agentInitializer.AddFlags(f)
   230  	f.StringVar(&a.machineId, "machine-id", "", "id of the machine to run")
   231  }
   232  
   233  // Info returns usage information for the command.
   234  func (a *machineAgentCmd) Info() *cmd.Info {
   235  	return jujucmd.Info(&cmd.Info{
   236  		Name:    "machine",
   237  		Purpose: "run a juju machine agent",
   238  	})
   239  }
   240  
   241  // MachineAgentFactoryFn returns a function which instantiates a
   242  // MachineAgent given a machineId.
   243  func MachineAgentFactoryFn(
   244  	agentConfWriter AgentConfigWriter,
   245  	bufferedLogger *logsender.BufferedLogWriter,
   246  	newIntrospectionSocketName func(names.Tag) string,
   247  	preUpgradeSteps upgrades.PreUpgradeStepsFunc,
   248  	rootDir string,
   249  ) func(string) (*MachineAgent, error) {
   250  	return func(machineId string) (*MachineAgent, error) {
   251  		return NewMachineAgent(
   252  			machineId,
   253  			agentConfWriter,
   254  			bufferedLogger,
   255  			worker.NewRunner(worker.RunnerParams{
   256  				IsFatal:       cmdutil.IsFatal,
   257  				MoreImportant: cmdutil.MoreImportant,
   258  				RestartDelay:  jworker.RestartDelay,
   259  			}),
   260  			looputil.NewLoopDeviceManager(),
   261  			newIntrospectionSocketName,
   262  			preUpgradeSteps,
   263  			rootDir,
   264  		)
   265  	}
   266  }
   267  
   268  // NewMachineAgent instantiates a new MachineAgent.
   269  func NewMachineAgent(
   270  	machineId string,
   271  	agentConfWriter AgentConfigWriter,
   272  	bufferedLogger *logsender.BufferedLogWriter,
   273  	runner *worker.Runner,
   274  	loopDeviceManager looputil.LoopDeviceManager,
   275  	newIntrospectionSocketName func(names.Tag) string,
   276  	preUpgradeSteps upgrades.PreUpgradeStepsFunc,
   277  	rootDir string,
   278  ) (*MachineAgent, error) {
   279  	prometheusRegistry, err := newPrometheusRegistry()
   280  	if err != nil {
   281  		return nil, errors.Trace(err)
   282  	}
   283  	a := &MachineAgent{
   284  		machineId:                   machineId,
   285  		AgentConfigWriter:           agentConfWriter,
   286  		configChangedVal:            voyeur.NewValue(true),
   287  		bufferedLogger:              bufferedLogger,
   288  		workersStarted:              make(chan struct{}),
   289  		dead:                        make(chan struct{}),
   290  		runner:                      runner,
   291  		rootDir:                     rootDir,
   292  		initialUpgradeCheckComplete: gate.NewLock(),
   293  		loopDeviceManager:           loopDeviceManager,
   294  		newIntrospectionSocketName:  newIntrospectionSocketName,
   295  		prometheusRegistry:          prometheusRegistry,
   296  		mongoTxnCollector:           mongometrics.NewTxnCollector(),
   297  		mongoDialCollector:          mongometrics.NewDialCollector(),
   298  		preUpgradeSteps:             preUpgradeSteps,
   299  	}
   300  	if err := a.registerPrometheusCollectors(); err != nil {
   301  		return nil, errors.Trace(err)
   302  	}
   303  	return a, nil
   304  }
   305  
   306  func (a *MachineAgent) registerPrometheusCollectors() error {
   307  	agentConfig := a.CurrentConfig()
   308  	if v := agentConfig.Value(agent.MgoStatsEnabled); v == "true" {
   309  		// Enable mgo stats collection only if requested,
   310  		// as it may affect performance.
   311  		mgo.SetStats(true)
   312  		collector := mongometrics.NewMgoStatsCollector(mgo.GetStats)
   313  		if err := a.prometheusRegistry.Register(collector); err != nil {
   314  			return errors.Annotate(err, "registering mgo stats collector")
   315  		}
   316  	}
   317  	if err := a.prometheusRegistry.Register(
   318  		logsendermetrics.BufferedLogWriterMetrics{a.bufferedLogger},
   319  	); err != nil {
   320  		return errors.Annotate(err, "registering logsender collector")
   321  	}
   322  	if err := a.prometheusRegistry.Register(a.mongoTxnCollector); err != nil {
   323  		return errors.Annotate(err, "registering mgo/txn collector")
   324  	}
   325  	if err := a.prometheusRegistry.Register(a.mongoDialCollector); err != nil {
   326  		return errors.Annotate(err, "registering mongo dial collector")
   327  	}
   328  	return nil
   329  }
   330  
   331  // MachineAgent is responsible for tying together all functionality
   332  // needed to orchestrate a Jujud instance which controls a machine.
   333  type MachineAgent struct {
   334  	AgentConfigWriter
   335  
   336  	dead             chan struct{}
   337  	errReason        error
   338  	machineId        string
   339  	runner           *worker.Runner
   340  	rootDir          string
   341  	bufferedLogger   *logsender.BufferedLogWriter
   342  	configChangedVal *voyeur.Value
   343  	upgradeComplete  gate.Lock
   344  	workersStarted   chan struct{}
   345  	machineLock      machinelock.Lock
   346  
   347  	// Used to signal that the upgrade worker will not
   348  	// reboot the agent on startup because there are no
   349  	// longer any immediately pending agent upgrades.
   350  	initialUpgradeCheckComplete gate.Lock
   351  
   352  	mongoInitMutex   sync.Mutex
   353  	mongoInitialized bool
   354  
   355  	loopDeviceManager          looputil.LoopDeviceManager
   356  	newIntrospectionSocketName func(names.Tag) string
   357  	prometheusRegistry         *prometheus.Registry
   358  	mongoTxnCollector          *mongometrics.TxnCollector
   359  	mongoDialCollector         *mongometrics.DialCollector
   360  	preUpgradeSteps            upgrades.PreUpgradeStepsFunc
   361  
   362  	// Only API servers have hubs. This is temporary until the apiserver and
   363  	// peergrouper have manifolds.
   364  	centralHub *pubsub.StructuredHub
   365  }
   366  
   367  // Wait waits for the machine agent to finish.
   368  func (a *MachineAgent) Wait() error {
   369  	<-a.dead
   370  	return a.errReason
   371  }
   372  
   373  // Stop stops the machine agent.
   374  func (a *MachineAgent) Stop() error {
   375  	a.runner.Kill()
   376  	return a.Wait()
   377  }
   378  
   379  // Done signals the machine agent is finished
   380  func (a *MachineAgent) Done(err error) {
   381  	a.errReason = err
   382  	close(a.dead)
   383  }
   384  
   385  // upgradeCertificateDNSNames ensure that the controller certificate
   386  // recorded in the agent config and also mongo server.pem contains the
   387  // DNSNames entries required by Juju.
   388  func upgradeCertificateDNSNames(config agent.ConfigSetter) error {
   389  	si, ok := config.StateServingInfo()
   390  	if !ok || si.CAPrivateKey == "" {
   391  		// No certificate information exists yet, nothing to do.
   392  		return nil
   393  	}
   394  
   395  	// Validate the current certificate and private key pair, and then
   396  	// extract the current DNS names from the certificate. If the
   397  	// certificate validation fails, or it does not contain the DNS
   398  	// names we require, we will generate a new one.
   399  	var dnsNames set.Strings
   400  	serverCert, _, err := utilscert.ParseCertAndKey(si.Cert, si.PrivateKey)
   401  	if err != nil {
   402  		// The certificate is invalid, so create a new one.
   403  		logger.Infof("parsing certificate/key failed, will generate a new one: %v", err)
   404  		dnsNames = set.NewStrings()
   405  	} else {
   406  		dnsNames = set.NewStrings(serverCert.DNSNames...)
   407  	}
   408  
   409  	update := false
   410  	requiredDNSNames := []string{"localhost", "juju-apiserver", "juju-mongodb"}
   411  	for _, dnsName := range requiredDNSNames {
   412  		if dnsNames.Contains(dnsName) {
   413  			continue
   414  		}
   415  		dnsNames.Add(dnsName)
   416  		update = true
   417  	}
   418  	if !update {
   419  		return nil
   420  	}
   421  
   422  	// Write a new certificate to the mongo pem and agent config files.
   423  	si.Cert, si.PrivateKey, err = cert.NewDefaultServer(config.CACert(), si.CAPrivateKey, dnsNames.Values())
   424  	if err != nil {
   425  		return err
   426  	}
   427  	if err := mongo.UpdateSSLKey(config.DataDir(), si.Cert, si.PrivateKey); err != nil {
   428  		return err
   429  	}
   430  	config.SetStateServingInfo(si)
   431  	return nil
   432  }
   433  
   434  // Run runs a machine agent.
   435  func (a *MachineAgent) Run(*cmd.Context) (err error) {
   436  	defer a.Done(err)
   437  	useMultipleCPUs()
   438  	if err := a.ReadConfig(a.Tag().String()); err != nil {
   439  		return errors.Errorf("cannot read agent configuration: %v", err)
   440  	}
   441  
   442  	setupAgentLogging(a.CurrentConfig())
   443  
   444  	if err := introspection.WriteProfileFunctions(); err != nil {
   445  		// This isn't fatal, just annoying.
   446  		logger.Errorf("failed to write profile funcs: %v", err)
   447  	}
   448  
   449  	// When the API server and peergrouper have manifolds, they can
   450  	// have dependencies on a central hub worker.
   451  	a.centralHub = centralhub.New(a.Tag().(names.MachineTag))
   452  
   453  	// Before doing anything else, we need to make sure the certificate generated for
   454  	// use by mongo to validate controller connections is correct. This needs to be done
   455  	// before any possible restart of the mongo service.
   456  	// See bug http://pad.lv/1434680
   457  	if err := a.AgentConfigWriter.ChangeConfig(upgradeCertificateDNSNames); err != nil {
   458  		return errors.Annotate(err, "error upgrading server certificate")
   459  	}
   460  
   461  	agentConfig := a.CurrentConfig()
   462  	agentName := a.Tag().String()
   463  	machineLock, err := machinelock.New(machinelock.Config{
   464  		AgentName:   agentName,
   465  		Clock:       clock.WallClock,
   466  		Logger:      loggo.GetLogger("juju.machinelock"),
   467  		LogFilename: agent.MachineLockLogFilename(agentConfig),
   468  	})
   469  	// There will only be an error if the required configuration
   470  	// values are not passed in.
   471  	if err != nil {
   472  		return errors.Trace(err)
   473  	}
   474  	a.machineLock = machineLock
   475  	a.upgradeComplete = upgradesteps.NewLock(agentConfig)
   476  
   477  	createEngine := a.makeEngineCreator(agentName, agentConfig.UpgradedToVersion())
   478  	charmrepo.CacheDir = filepath.Join(agentConfig.DataDir(), "charmcache")
   479  	if err := a.createJujudSymlinks(agentConfig.DataDir()); err != nil {
   480  		return err
   481  	}
   482  	a.runner.StartWorker("engine", createEngine)
   483  
   484  	// At this point, all workers will have been configured to start
   485  	close(a.workersStarted)
   486  	err = a.runner.Wait()
   487  	switch errors.Cause(err) {
   488  	case jworker.ErrTerminateAgent:
   489  		err = a.uninstallAgent()
   490  	case jworker.ErrRebootMachine:
   491  		logger.Infof("Caught reboot error")
   492  		err = a.executeRebootOrShutdown(params.ShouldReboot)
   493  	case jworker.ErrShutdownMachine:
   494  		logger.Infof("Caught shutdown error")
   495  		err = a.executeRebootOrShutdown(params.ShouldShutdown)
   496  	}
   497  	return cmdutil.AgentDone(logger, err)
   498  }
   499  
   500  func (a *MachineAgent) makeEngineCreator(agentName string, previousAgentVersion version.Number) func() (worker.Worker, error) {
   501  	return func() (worker.Worker, error) {
   502  		engine, err := dependency.NewEngine(dependencyEngineConfig())
   503  		if err != nil {
   504  			return nil, err
   505  		}
   506  		pubsubReporter := psworker.NewReporter()
   507  		presenceRecorder := presence.New(clock.WallClock)
   508  		updateAgentConfLogging := func(loggingConfig string) error {
   509  			return a.AgentConfigWriter.ChangeConfig(func(setter agent.ConfigSetter) error {
   510  				setter.SetLoggingConfig(loggingConfig)
   511  				return nil
   512  			})
   513  		}
   514  		updateControllerAPIPort := func(port int) error {
   515  			return a.AgentConfigWriter.ChangeConfig(func(setter agent.ConfigSetter) error {
   516  				setter.SetControllerAPIPort(port)
   517  				return nil
   518  			})
   519  		}
   520  
   521  		// statePoolReporter is an introspection.IntrospectionReporter,
   522  		// which is set to the current StatePool managed by the state
   523  		// tracker in controller agents.
   524  		var statePoolReporter statePoolIntrospectionReporter
   525  		registerIntrospectionHandlers := func(handle func(path string, h http.Handler)) {
   526  			introspection.RegisterHTTPHandlers(introspection.ReportSources{
   527  				DependencyEngine:   engine,
   528  				StatePool:          &statePoolReporter,
   529  				PubSub:             pubsubReporter,
   530  				PrometheusGatherer: a.prometheusRegistry,
   531  			}, handle)
   532  		}
   533  
   534  		// We need to pass this in for the peergrouper, which wants to
   535  		// know whether the controller model supports spaces.
   536  		//
   537  		// TODO(axw) this seems unnecessary, and perhaps even wrong.
   538  		// Even if the provider supports spaces, you could have manual
   539  		// machines in the mix, in which case they won't necessarily
   540  		// be in the same space. I think the peergrouper should just
   541  		// check what spaces the machines are in, rather than trying
   542  		// to short cut anything.
   543  		controllerSupportsSpaces := func(st *state.State) (bool, error) {
   544  			env, err := stateenvirons.GetNewEnvironFunc(environs.New)(st)
   545  			if err != nil {
   546  				return false, errors.Annotate(err, "getting environ from state")
   547  			}
   548  			return environs.SupportsSpaces(state.CallContext(st), env), nil
   549  		}
   550  
   551  		manifolds := machineManifolds(machine.ManifoldsConfig{
   552  			PreviousAgentVersion:    previousAgentVersion,
   553  			AgentName:               agentName,
   554  			Agent:                   agent.APIHostPortsSetter{Agent: a},
   555  			RootDir:                 a.rootDir,
   556  			AgentConfigChanged:      a.configChangedVal,
   557  			UpgradeStepsLock:        a.upgradeComplete,
   558  			UpgradeCheckLock:        a.initialUpgradeCheckComplete,
   559  			OpenController:          a.initController,
   560  			OpenStatePool:           a.initState,
   561  			OpenStateForUpgrade:     a.openStateForUpgrade,
   562  			StartAPIWorkers:         a.startAPIWorkers,
   563  			PreUpgradeSteps:         a.preUpgradeSteps,
   564  			LogSource:               a.bufferedLogger.Logs(),
   565  			NewDeployContext:        newDeployContext,
   566  			Clock:                   clock.WallClock,
   567  			ValidateMigration:       a.validateMigration,
   568  			PrometheusRegisterer:    a.prometheusRegistry,
   569  			CentralHub:              a.centralHub,
   570  			PubSubReporter:          pubsubReporter,
   571  			PresenceRecorder:        presenceRecorder,
   572  			UpdateLoggerConfig:      updateAgentConfLogging,
   573  			UpdateControllerAPIPort: updateControllerAPIPort,
   574  			NewAgentStatusSetter: func(apiConn api.Connection) (upgradesteps.StatusSetter, error) {
   575  				return a.machine(apiConn)
   576  			},
   577  			ControllerLeaseDuration:           time.Minute,
   578  			LogPruneInterval:                  5 * time.Minute,
   579  			TransactionPruneInterval:          time.Hour,
   580  			MachineLock:                       a.machineLock,
   581  			SetStatePool:                      statePoolReporter.set,
   582  			RegisterIntrospectionHTTPHandlers: registerIntrospectionHandlers,
   583  			NewModelWorker:                    a.startModelWorkers,
   584  			ControllerSupportsSpaces:          controllerSupportsSpaces,
   585  			MuxShutdownWait:                   1 * time.Minute,
   586  		})
   587  		if err := dependency.Install(engine, manifolds); err != nil {
   588  			if err := worker.Stop(engine); err != nil {
   589  				logger.Errorf("while stopping engine with bad manifolds: %v", err)
   590  			}
   591  			return nil, err
   592  		}
   593  		if err := startIntrospection(introspectionConfig{
   594  			Agent:              a,
   595  			Engine:             engine,
   596  			StatePoolReporter:  &statePoolReporter,
   597  			PubSubReporter:     pubsubReporter,
   598  			MachineLock:        a.machineLock,
   599  			NewSocketName:      a.newIntrospectionSocketName,
   600  			PrometheusGatherer: a.prometheusRegistry,
   601  			PresenceRecorder:   presenceRecorder,
   602  			WorkerFunc:         introspection.NewWorker,
   603  		}); err != nil {
   604  			// If the introspection worker failed to start, we just log error
   605  			// but continue. It is very unlikely to happen in the real world
   606  			// as the only issue is connecting to the abstract domain socket
   607  			// and the agent is controlled by by the OS to only have one.
   608  			logger.Errorf("failed to start introspection worker: %v", err)
   609  		}
   610  		return engine, nil
   611  	}
   612  }
   613  
   614  func (a *MachineAgent) executeRebootOrShutdown(action params.RebootAction) error {
   615  	// block until all units/containers are ready, and reboot/shutdown
   616  	finalize, err := reboot.NewRebootWaiter(a.CurrentConfig())
   617  	if err != nil {
   618  		return errors.Trace(err)
   619  	}
   620  
   621  	logger.Infof("Reboot: Executing reboot")
   622  	err = finalize.ExecuteReboot(action)
   623  	if err != nil {
   624  		logger.Infof("Reboot: Error executing reboot: %v", err)
   625  		return errors.Trace(err)
   626  	}
   627  	// On windows, the shutdown command is asynchronous. We return ErrRebootMachine
   628  	// so the agent will simply exit without error pending reboot/shutdown.
   629  	return jworker.ErrRebootMachine
   630  }
   631  
   632  func (a *MachineAgent) ChangeConfig(mutate agent.ConfigMutator) error {
   633  	err := a.AgentConfigWriter.ChangeConfig(mutate)
   634  	a.configChangedVal.Set(true)
   635  	return errors.Trace(err)
   636  }
   637  
   638  var (
   639  	newEnvirons   = environs.New
   640  	newCAASBroker = caas.New
   641  )
   642  
   643  // startAPIWorkers is called to start workers which rely on the
   644  // machine agent's API connection (via the apiworkers manifold). It
   645  // returns a Runner with a number of workers attached to it.
   646  //
   647  // The workers started here need to be converted to run under the
   648  // dependency engine. Once they have all been converted, this method -
   649  // and the apiworkers manifold - can be removed.
   650  func (a *MachineAgent) startAPIWorkers(apiConn api.Connection) (_ worker.Worker, outErr error) {
   651  	agentConfig := a.CurrentConfig()
   652  
   653  	apiSt, err := apiagent.NewState(apiConn)
   654  	if err != nil {
   655  		return nil, errors.Trace(err)
   656  	}
   657  	entity, err := apiSt.Entity(a.Tag())
   658  	if err != nil {
   659  		return nil, errors.Trace(err)
   660  	}
   661  
   662  	var isModelManager bool
   663  	for _, job := range entity.Jobs() {
   664  		switch job {
   665  		case multiwatcher.JobManageModel:
   666  			isModelManager = true
   667  		default:
   668  			// TODO(dimitern): Once all workers moved over to using
   669  			// the API, report "unknown job type" here.
   670  		}
   671  	}
   672  
   673  	runner := worker.NewRunner(worker.RunnerParams{
   674  		IsFatal:       cmdutil.ConnectionIsFatal(logger, apiConn),
   675  		MoreImportant: cmdutil.MoreImportant,
   676  		RestartDelay:  jworker.RestartDelay,
   677  	})
   678  	defer func() {
   679  		// If startAPIWorkers exits early with an error, stop the
   680  		// runner so that any already started runners aren't leaked.
   681  		if outErr != nil {
   682  			worker.Stop(runner)
   683  		}
   684  	}()
   685  
   686  	// Perform the operations needed to set up hosting for containers.
   687  	if err := a.setupContainerSupport(runner, apiConn, agentConfig); err != nil {
   688  		cause := errors.Cause(err)
   689  		if params.IsCodeDead(cause) || cause == jworker.ErrTerminateAgent {
   690  			return nil, jworker.ErrTerminateAgent
   691  		}
   692  		return nil, errors.Errorf("setting up container support: %v", err)
   693  	}
   694  
   695  	if isModelManager {
   696  		// We don't have instance info set and the network config for the
   697  		// bootstrap machine only, so update it now. All the other machines will
   698  		// have instance info including network config set at provisioning time.
   699  		if err := a.setControllerNetworkConfig(apiConn); err != nil {
   700  			return nil, errors.Annotate(err, "setting controller network config")
   701  		}
   702  	} else {
   703  		runner.StartWorker("stateconverter", func() (worker.Worker, error) {
   704  			// TODO(fwereade): this worker needs its own facade.
   705  			facade := apimachiner.NewState(apiConn)
   706  			handler := conv2state.New(facade, a)
   707  			w, err := watcher.NewNotifyWorker(watcher.NotifyConfig{
   708  				Handler: handler,
   709  			})
   710  			if err != nil {
   711  				return nil, errors.Annotate(err, "cannot start controller promoter worker")
   712  			}
   713  			return w, nil
   714  		})
   715  	}
   716  	return runner, nil
   717  }
   718  
   719  func (a *MachineAgent) machine(apiConn api.Connection) (*apimachiner.Machine, error) {
   720  	machinerAPI := apimachiner.NewState(apiConn)
   721  	agentConfig := a.CurrentConfig()
   722  
   723  	tag := agentConfig.Tag().(names.MachineTag)
   724  	return machinerAPI.Machine(tag)
   725  }
   726  
   727  func (a *MachineAgent) setControllerNetworkConfig(apiConn api.Connection) error {
   728  	machine, err := a.machine(apiConn)
   729  	if errors.IsNotFound(err) || err == nil && machine.Life() == params.Dead {
   730  		return jworker.ErrTerminateAgent
   731  	}
   732  	if err != nil {
   733  		return errors.Annotatef(err, "cannot load machine %s from state", a.CurrentConfig().Tag())
   734  	}
   735  
   736  	if err := machine.SetProviderNetworkConfig(); err != nil {
   737  		return errors.Annotate(err, "cannot set controller provider network config")
   738  	}
   739  	return nil
   740  }
   741  
   742  // Restart restarts the agent's service.
   743  func (a *MachineAgent) Restart() error {
   744  	name := a.CurrentConfig().Value(agent.AgentServiceName)
   745  	return service.Restart(name)
   746  }
   747  
   748  // openStateForUpgrade exists to be passed into the upgradesteps
   749  // worker. The upgradesteps worker opens state independently of the
   750  // state worker so that it isn't affected by the state worker's
   751  // lifetime. It ensures the MongoDB server is configured and started,
   752  // and then opens a state connection.
   753  //
   754  // TODO(mjs)- review the need for this once the dependency engine is
   755  // in use. Why can't upgradesteps depend on the main state connection?
   756  func (a *MachineAgent) openStateForUpgrade() (*state.StatePool, error) {
   757  	agentConfig := a.CurrentConfig()
   758  	if err := a.ensureMongoServer(agentConfig); err != nil {
   759  		return nil, errors.Trace(err)
   760  	}
   761  	info, ok := agentConfig.MongoInfo()
   762  	if !ok {
   763  		return nil, errors.New("no state info available")
   764  	}
   765  	dialOpts, err := mongoDialOptions(
   766  		mongo.DefaultDialOpts(),
   767  		agentConfig,
   768  		a.mongoDialCollector,
   769  	)
   770  	if err != nil {
   771  		return nil, errors.Trace(err)
   772  	}
   773  	session, err := mongo.DialWithInfo(*info, dialOpts)
   774  	if err != nil {
   775  		return nil, errors.Trace(err)
   776  	}
   777  	defer session.Close()
   778  
   779  	pool, err := state.OpenStatePool(state.OpenParams{
   780  		Clock:              clock.WallClock,
   781  		ControllerTag:      agentConfig.Controller(),
   782  		ControllerModelTag: agentConfig.Model(),
   783  		MongoSession:       session,
   784  		NewPolicy:          stateenvirons.GetNewPolicyFunc(),
   785  		// state.InitDatabase is idempotent and needs to be called just
   786  		// prior to performing any upgrades since a new Juju binary may
   787  		// declare new indices or explicit collections.
   788  		// NB until https://jira.mongodb.org/browse/SERVER-1864 is resolved,
   789  		// it is not possible to resize capped collections so there's no
   790  		// point in reading existing controller config from state in order
   791  		// to pass in the max-txn-log-size value.
   792  		InitDatabaseFunc:       state.InitDatabase,
   793  		RunTransactionObserver: a.mongoTxnCollector.AfterRunTransaction,
   794  	})
   795  	if err != nil {
   796  		return nil, errors.Trace(err)
   797  	}
   798  	return pool, nil
   799  }
   800  
   801  // validateMigration is called by the migrationminion to help check
   802  // that the agent will be ok when connected to a new controller.
   803  func (a *MachineAgent) validateMigration(apiCaller base.APICaller) error {
   804  	// TODO(mjs) - more extensive checks to come.
   805  	facade := apimachiner.NewState(apiCaller)
   806  	_, err := facade.Machine(names.NewMachineTag(a.machineId))
   807  	return errors.Trace(err)
   808  }
   809  
   810  // setupContainerSupport determines what containers can be run on this machine and
   811  // initialises suitable infrastructure to support such containers.
   812  func (a *MachineAgent) setupContainerSupport(runner *worker.Runner, st api.Connection, agentConfig agent.Config) error {
   813  	var supportedContainers []instance.ContainerType
   814  	supportsContainers := container.ContainersSupported()
   815  	if supportsContainers {
   816  		supportedContainers = append(supportedContainers, instance.LXD)
   817  	}
   818  
   819  	supportsKvm, err := kvm.IsKVMSupported()
   820  	if err != nil {
   821  		logger.Warningf("determining kvm support: %v\nno kvm containers possible", err)
   822  	}
   823  	if err == nil && supportsKvm {
   824  		supportedContainers = append(supportedContainers, instance.KVM)
   825  	}
   826  
   827  	return a.updateSupportedContainers(runner, st, supportedContainers, agentConfig)
   828  }
   829  
   830  // updateSupportedContainers records in state that a machine can run the specified containers.
   831  // It starts a watcher and when a container of a given type is first added to the machine,
   832  // the watcher is killed, the machine is set up to be able to start containers of the given type,
   833  // and a suitable provisioner is started.
   834  func (a *MachineAgent) updateSupportedContainers(
   835  	runner *worker.Runner,
   836  	st api.Connection,
   837  	containers []instance.ContainerType,
   838  	agentConfig agent.Config,
   839  ) error {
   840  	pr := apiprovisioner.NewState(st)
   841  	tag := agentConfig.Tag().(names.MachineTag)
   842  	result, err := pr.Machines(tag)
   843  	if err != nil {
   844  		return errors.Annotatef(err, "cannot load machine %s from state", tag)
   845  	}
   846  	if len(result) != 1 {
   847  		return errors.Annotatef(err, "expected 1 result, got %d", len(result))
   848  	}
   849  	if errors.IsNotFound(result[0].Err) || (result[0].Err == nil && result[0].Machine.Life() == params.Dead) {
   850  		return jworker.ErrTerminateAgent
   851  	}
   852  	machine := result[0].Machine
   853  	if len(containers) == 0 {
   854  		if err := machine.SupportsNoContainers(); err != nil {
   855  			return errors.Annotatef(err, "clearing supported containers for %s", tag)
   856  		}
   857  		return nil
   858  	}
   859  	if err := machine.SetSupportedContainers(containers...); err != nil {
   860  		return errors.Annotatef(err, "setting supported containers for %s", tag)
   861  	}
   862  	// Start the watcher to fire when a container is first requested on the machine.
   863  	watcherName := fmt.Sprintf("%s-container-watcher", machine.Id())
   864  
   865  	credentialAPI, err := workercommon.NewCredentialInvalidatorFacade(st)
   866  	if err != nil {
   867  		return errors.Annotatef(err, "cannot get credential invalidator facade")
   868  	}
   869  	params := provisioner.ContainerSetupParams{
   870  		Runner:              runner,
   871  		WorkerName:          watcherName,
   872  		SupportedContainers: containers,
   873  		Machine:             machine,
   874  		Provisioner:         pr,
   875  		Config:              agentConfig,
   876  		MachineLock:         a.machineLock,
   877  		CredentialAPI:       credentialAPI,
   878  	}
   879  	handler := provisioner.NewContainerSetupHandler(params)
   880  	a.startWorkerAfterUpgrade(runner, watcherName, func() (worker.Worker, error) {
   881  		w, err := watcher.NewStringsWorker(watcher.StringsConfig{
   882  			Handler: handler,
   883  		})
   884  		if err != nil {
   885  			return nil, errors.Annotatef(err, "cannot start %s worker", watcherName)
   886  		}
   887  		return w, nil
   888  	})
   889  	return nil
   890  }
   891  
   892  func mongoDialOptions(
   893  	baseOpts mongo.DialOpts,
   894  	agentConfig agent.Config,
   895  	mongoDialCollector *mongometrics.DialCollector,
   896  ) (mongo.DialOpts, error) {
   897  	dialOpts := baseOpts
   898  	if limitStr := agentConfig.Value("MONGO_SOCKET_POOL_LIMIT"); limitStr != "" {
   899  		limit, err := strconv.Atoi(limitStr)
   900  		if err != nil {
   901  			return mongo.DialOpts{}, errors.Errorf("invalid mongo socket pool limit %q", limitStr)
   902  		} else {
   903  			logger.Infof("using mongo socker pool limit = %d", limit)
   904  			dialOpts.PoolLimit = limit
   905  		}
   906  	}
   907  	if dialOpts.PostDialServer != nil {
   908  		return mongo.DialOpts{}, errors.New("did not expect PostDialServer to be set")
   909  	}
   910  	dialOpts.PostDialServer = mongoDialCollector.PostDialServer
   911  	return dialOpts, nil
   912  }
   913  
   914  func (a *MachineAgent) initController(agentConfig agent.Config) (*state.Controller, error) {
   915  	info, ok := agentConfig.MongoInfo()
   916  	if !ok {
   917  		return nil, errors.Errorf("no state info available")
   918  	}
   919  
   920  	// Start MongoDB server and dial.
   921  	if err := a.ensureMongoServer(agentConfig); err != nil {
   922  		return nil, err
   923  	}
   924  	dialOpts, err := mongoDialOptions(
   925  		stateWorkerDialOpts,
   926  		agentConfig,
   927  		a.mongoDialCollector,
   928  	)
   929  	if err != nil {
   930  		return nil, errors.Trace(err)
   931  	}
   932  	session, err := mongo.DialWithInfo(*info, dialOpts)
   933  	if err != nil {
   934  		return nil, errors.Trace(err)
   935  	}
   936  	defer session.Close()
   937  
   938  	ctlr, err := state.OpenController(state.OpenParams{
   939  		Clock:                  clock.WallClock,
   940  		ControllerTag:          agentConfig.Controller(),
   941  		ControllerModelTag:     agentConfig.Model(),
   942  		MongoSession:           session,
   943  		NewPolicy:              stateenvirons.GetNewPolicyFunc(),
   944  		RunTransactionObserver: a.mongoTxnCollector.AfterRunTransaction,
   945  	})
   946  	return ctlr, nil
   947  }
   948  
   949  func (a *MachineAgent) initState(agentConfig agent.Config) (*state.StatePool, error) {
   950  	// Start MongoDB server and dial.
   951  	if err := a.ensureMongoServer(agentConfig); err != nil {
   952  		return nil, err
   953  	}
   954  
   955  	dialOpts, err := mongoDialOptions(
   956  		stateWorkerDialOpts,
   957  		agentConfig,
   958  		a.mongoDialCollector,
   959  	)
   960  	if err != nil {
   961  		return nil, errors.Trace(err)
   962  	}
   963  	pool, _, err := openStatePool(
   964  		agentConfig,
   965  		dialOpts,
   966  		a.mongoTxnCollector.AfterRunTransaction,
   967  	)
   968  	if err != nil {
   969  		return nil, err
   970  	}
   971  
   972  	reportOpenedState(pool.SystemState())
   973  
   974  	return pool, nil
   975  }
   976  
   977  // startModelWorkers starts the set of workers that run for every model
   978  // in each controller, both IAAS and CAAS.
   979  func (a *MachineAgent) startModelWorkers(modelUUID string, modelType state.ModelType) (worker.Worker, error) {
   980  	controllerUUID := a.CurrentConfig().Controller().Id()
   981  	modelAgent, err := model.WrapAgent(a, controllerUUID, modelUUID)
   982  	if err != nil {
   983  		return nil, errors.Trace(err)
   984  	}
   985  	config := dependencyEngineConfig()
   986  	config.IsFatal = model.IsFatal
   987  	config.WorstError = model.WorstError
   988  	config.Filter = model.IgnoreErrRemoved
   989  	engine, err := dependency.NewEngine(config)
   990  	if err != nil {
   991  		return nil, errors.Trace(err)
   992  	}
   993  
   994  	manifoldsCfg := model.ManifoldsConfig{
   995  		Agent:                       modelAgent,
   996  		AgentConfigChanged:          a.configChangedVal,
   997  		Clock:                       clock.WallClock,
   998  		RunFlagDuration:             time.Minute,
   999  		CharmRevisionUpdateInterval: 24 * time.Hour,
  1000  		InstPollerAggregationDelay:  3 * time.Second,
  1001  		StatusHistoryPrunerInterval: 5 * time.Minute,
  1002  		ActionPrunerInterval:        24 * time.Hour,
  1003  		NewEnvironFunc:              newEnvirons,
  1004  		NewContainerBrokerFunc:      newCAASBroker,
  1005  		NewMigrationMaster:          migrationmaster.NewWorker,
  1006  	}
  1007  	var manifolds dependency.Manifolds
  1008  	if modelType == state.ModelTypeIAAS {
  1009  		manifolds = iaasModelManifolds(manifoldsCfg)
  1010  	} else {
  1011  		manifolds = caasModelManifolds(manifoldsCfg)
  1012  	}
  1013  	if err := dependency.Install(engine, manifolds); err != nil {
  1014  		if err := worker.Stop(engine); err != nil {
  1015  			logger.Errorf("while stopping engine with bad manifolds: %v", err)
  1016  		}
  1017  		return nil, errors.Trace(err)
  1018  	}
  1019  	return engine, nil
  1020  }
  1021  
  1022  // stateWorkerDialOpts is a mongo.DialOpts suitable
  1023  // for use by StateWorker to dial mongo.
  1024  //
  1025  // This must be overridden in tests, as it assumes
  1026  // journaling is enabled.
  1027  var stateWorkerDialOpts mongo.DialOpts
  1028  
  1029  // ensureMongoServer ensures that mongo is installed and running,
  1030  // and ready for opening a state connection.
  1031  func (a *MachineAgent) ensureMongoServer(agentConfig agent.Config) (err error) {
  1032  	a.mongoInitMutex.Lock()
  1033  	defer a.mongoInitMutex.Unlock()
  1034  	if a.mongoInitialized {
  1035  		logger.Debugf("mongo is already initialized")
  1036  		return nil
  1037  	}
  1038  	defer func() {
  1039  		if err == nil {
  1040  			a.mongoInitialized = true
  1041  		}
  1042  	}()
  1043  
  1044  	// EnsureMongoServer installs/upgrades the init config as necessary.
  1045  	ensureServerParams, err := cmdutil.NewEnsureServerParams(agentConfig)
  1046  	if err != nil {
  1047  		return err
  1048  	}
  1049  	var mongodVersion mongo.Version
  1050  	if mongodVersion, err = cmdutil.EnsureMongoServer(ensureServerParams); err != nil {
  1051  		return err
  1052  	}
  1053  	logger.Debugf("mongodb service is installed")
  1054  
  1055  	// Mongo is installed, record the version.
  1056  	err = a.ChangeConfig(func(config agent.ConfigSetter) error {
  1057  		config.SetMongoVersion(mongodVersion)
  1058  		return nil
  1059  	})
  1060  	if err != nil {
  1061  		return errors.Annotate(err, "cannot set mongo version")
  1062  	}
  1063  	return nil
  1064  }
  1065  
  1066  func openStatePool(
  1067  	agentConfig agent.Config,
  1068  	dialOpts mongo.DialOpts,
  1069  	runTransactionObserver state.RunTransactionObserverFunc,
  1070  ) (_ *state.StatePool, _ *state.Machine, err error) {
  1071  	info, ok := agentConfig.MongoInfo()
  1072  	if !ok {
  1073  		return nil, nil, errors.Errorf("no state info available")
  1074  	}
  1075  	session, err := mongo.DialWithInfo(*info, dialOpts)
  1076  	if err != nil {
  1077  		return nil, nil, errors.Trace(err)
  1078  	}
  1079  	defer session.Close()
  1080  
  1081  	pool, err := state.OpenStatePool(state.OpenParams{
  1082  		Clock:                  clock.WallClock,
  1083  		ControllerTag:          agentConfig.Controller(),
  1084  		ControllerModelTag:     agentConfig.Model(),
  1085  		MongoSession:           session,
  1086  		NewPolicy:              stateenvirons.GetNewPolicyFunc(),
  1087  		RunTransactionObserver: runTransactionObserver,
  1088  	})
  1089  	if err != nil {
  1090  		return nil, nil, err
  1091  	}
  1092  	defer func() {
  1093  		if err != nil {
  1094  			pool.Close()
  1095  		}
  1096  	}()
  1097  	st := pool.SystemState()
  1098  	m0, err := st.FindEntity(agentConfig.Tag())
  1099  	if err != nil {
  1100  		if errors.IsNotFound(err) {
  1101  			err = jworker.ErrTerminateAgent
  1102  		}
  1103  		return nil, nil, err
  1104  	}
  1105  	m := m0.(*state.Machine)
  1106  	if m.Life() == state.Dead {
  1107  		return nil, nil, jworker.ErrTerminateAgent
  1108  	}
  1109  	// Check the machine nonce as provisioned matches the agent.Conf value.
  1110  	if !m.CheckProvisioned(agentConfig.Nonce()) {
  1111  		// The agent is running on a different machine to the one it
  1112  		// should be according to state. It must stop immediately.
  1113  		logger.Errorf("running machine %v agent on inappropriate instance", m)
  1114  		return nil, nil, jworker.ErrTerminateAgent
  1115  	}
  1116  	return pool, m, nil
  1117  }
  1118  
  1119  // startWorkerAfterUpgrade starts a worker to run the specified child worker
  1120  // but only after waiting for upgrades to complete.
  1121  func (a *MachineAgent) startWorkerAfterUpgrade(runner jworker.Runner, name string, start func() (worker.Worker, error)) {
  1122  	runner.StartWorker(name, func() (worker.Worker, error) {
  1123  		return a.upgradeWaiterWorker(name, start), nil
  1124  	})
  1125  }
  1126  
  1127  // upgradeWaiterWorker runs the specified worker after upgrades have completed.
  1128  func (a *MachineAgent) upgradeWaiterWorker(name string, start func() (worker.Worker, error)) worker.Worker {
  1129  	return jworker.NewSimpleWorker(func(stop <-chan struct{}) error {
  1130  		// Wait for the agent upgrade and upgrade steps to complete (or for us to be stopped).
  1131  		for _, ch := range []<-chan struct{}{
  1132  			a.upgradeComplete.Unlocked(),
  1133  			a.initialUpgradeCheckComplete.Unlocked(),
  1134  		} {
  1135  			select {
  1136  			case <-stop:
  1137  				return nil
  1138  			case <-ch:
  1139  			}
  1140  		}
  1141  		logger.Debugf("upgrades done, starting worker %q", name)
  1142  
  1143  		// Upgrades are done, start the worker.
  1144  		w, err := start()
  1145  		if err != nil {
  1146  			return err
  1147  		}
  1148  		// Wait for worker to finish or for us to be stopped.
  1149  		done := make(chan error, 1)
  1150  		go func() {
  1151  			done <- w.Wait()
  1152  		}()
  1153  		select {
  1154  		case err := <-done:
  1155  			return errors.Annotatef(err, "worker %q exited", name)
  1156  		case <-stop:
  1157  			logger.Debugf("stopping so killing worker %q", name)
  1158  			return worker.Stop(w)
  1159  		}
  1160  	})
  1161  }
  1162  
  1163  // WorkersStarted returns a channel that's closed once all top level workers
  1164  // have been started. This is provided for testing purposes.
  1165  func (a *MachineAgent) WorkersStarted() <-chan struct{} {
  1166  	return a.workersStarted
  1167  }
  1168  
  1169  func (a *MachineAgent) Tag() names.Tag {
  1170  	return names.NewMachineTag(a.machineId)
  1171  }
  1172  
  1173  func (a *MachineAgent) createJujudSymlinks(dataDir string) error {
  1174  	jujud := filepath.Join(tools.ToolsDir(dataDir, a.Tag().String()), jujunames.Jujud)
  1175  	for _, link := range jujudSymlinks {
  1176  		err := a.createSymlink(jujud, link)
  1177  		if err != nil {
  1178  			return errors.Annotatef(err, "failed to create %s symlink", link)
  1179  		}
  1180  	}
  1181  	return nil
  1182  }
  1183  
  1184  func (a *MachineAgent) createSymlink(target, link string) error {
  1185  	fullLink := utils.EnsureBaseDir(a.rootDir, link)
  1186  
  1187  	currentTarget, err := symlink.Read(fullLink)
  1188  	if err != nil && !os.IsNotExist(err) {
  1189  		return err
  1190  	} else if err == nil {
  1191  		// Link already in place - check it.
  1192  		if currentTarget == target {
  1193  			// Link already points to the right place - nothing to do.
  1194  			return nil
  1195  		}
  1196  		// Link points to the wrong place - delete it.
  1197  		if err := os.Remove(fullLink); err != nil {
  1198  			return err
  1199  		}
  1200  	}
  1201  
  1202  	if err := os.MkdirAll(filepath.Dir(fullLink), os.FileMode(0755)); err != nil {
  1203  		return err
  1204  	}
  1205  	return symlink.New(target, fullLink)
  1206  }
  1207  
  1208  func (a *MachineAgent) removeJujudSymlinks() (errs []error) {
  1209  	for _, link := range jujudSymlinks {
  1210  		err := os.Remove(utils.EnsureBaseDir(a.rootDir, link))
  1211  		if err != nil && !os.IsNotExist(err) {
  1212  			errs = append(errs, errors.Annotatef(err, "failed to remove %s symlink", link))
  1213  		}
  1214  	}
  1215  	return
  1216  }
  1217  
  1218  func (a *MachineAgent) uninstallAgent() error {
  1219  	// We should only uninstall if the uninstall file is present.
  1220  	if !agent.CanUninstall(a) {
  1221  		logger.Infof("ignoring uninstall request")
  1222  		return nil
  1223  	}
  1224  	logger.Infof("uninstalling agent")
  1225  
  1226  	agentConfig := a.CurrentConfig()
  1227  	var errs []error
  1228  	agentServiceName := agentConfig.Value(agent.AgentServiceName)
  1229  	if agentServiceName == "" {
  1230  		// For backwards compatibility, handle lack of AgentServiceName.
  1231  		agentServiceName = os.Getenv("UPSTART_JOB")
  1232  	}
  1233  
  1234  	if agentServiceName != "" {
  1235  		svc, err := service.DiscoverService(agentServiceName, common.Conf{})
  1236  		if err != nil {
  1237  			errs = append(errs, errors.Errorf("cannot remove service %q: %v", agentServiceName, err))
  1238  		} else if err := svc.Remove(); err != nil {
  1239  			errs = append(errs, errors.Errorf("cannot remove service %q: %v", agentServiceName, err))
  1240  		}
  1241  	}
  1242  
  1243  	errs = append(errs, a.removeJujudSymlinks()...)
  1244  
  1245  	// TODO(fwereade): surely this shouldn't be happening here? Once we're
  1246  	// at this point we should expect to be killed in short order; if this
  1247  	// work is remotely important we should be blocking machine death on
  1248  	// its completion.
  1249  	insideContainer := container.RunningInContainer()
  1250  	if insideContainer {
  1251  		// We're running inside a container, so loop devices may leak. Detach
  1252  		// any loop devices that are backed by files on this machine.
  1253  		if err := a.loopDeviceManager.DetachLoopDevices("/", agentConfig.DataDir()); err != nil {
  1254  			errs = append(errs, err)
  1255  		}
  1256  	}
  1257  
  1258  	if err := mongo.RemoveService(); err != nil {
  1259  		errs = append(errs, errors.Annotate(err, "cannot stop/remove mongo service"))
  1260  	}
  1261  	if err := os.RemoveAll(agentConfig.DataDir()); err != nil {
  1262  		errs = append(errs, err)
  1263  	}
  1264  	if len(errs) == 0 {
  1265  		return nil
  1266  	}
  1267  	return errors.Errorf("uninstall failed: %v", errs)
  1268  }
  1269  
  1270  // newDeployContext gives the tests the opportunity to create a deployer.Context
  1271  // that can be used for testing so as to avoid (1) deploying units to the system
  1272  // running the tests and (2) get access to the *State used internally, so that
  1273  // tests can be run without waiting for the 5s watcher refresh time to which we would
  1274  // otherwise be restricted.
  1275  var newDeployContext = func(st *apideployer.State, agentConfig agent.Config) deployer.Context {
  1276  	return deployer.NewSimpleContext(agentConfig, st)
  1277  }