
     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     4  package agent
     6  import (
     7  	"fmt"
     8  	"io"
     9  	"net"
    10  	"os"
    11  	"path/filepath"
    12  	"runtime"
    13  	"strconv"
    14  	"sync"
    15  	"time"
    17  	""
    18  	""
    19  	apiagent ""
    20  	apimachiner ""
    21  	""
    22  	""
    23  	""
    24  	""
    25  	""
    26  	""
    27  	""
    28  	""
    29  	""
    30  	""
    31  	""
    32  	""
    33  	""
    34  	""
    35  	""
    36  	""
    38  	""
    39  	""
    40  	""
    41  	apideployer ""
    42  	""
    43  	""
    44  	""
    45  	""
    46  	""
    47  	""
    48  	""
    49  	cmdutil ""
    50  	""
    51  	""
    52  	""
    53  	""
    54  	""
    55  	jujunames ""
    56  	""
    57  	""
    58  	""
    59  	""
    60  	""
    61  	""
    62  	""
    63  	""
    64  	""
    65  	jujuversion ""
    66  	""
    67  	""
    68  	""
    69  	""
    70  	""
    71  	""
    72  	""
    73  	""
    74  	""
    75  	""
    76  	""
    77  	""
    78  	""
    79  	""
    80  	""
    81  	""
    82  	""
    83  	""
    84  )
    86  var (
    87  	logger       = loggo.GetLogger("juju.cmd.jujud")
    88  	jujuRun      = paths.MustSucceed(paths.JujuRun(series.HostSeries()))
    89  	jujuDumpLogs = paths.MustSucceed(paths.JujuDumpLogs(series.HostSeries()))
    91  	// The following are defined as variables to allow the tests to
    92  	// intercept calls to the functions.
    93  	useMultipleCPUs       = utils.UseMultipleCPUs
    94  	modelManifolds        = model.Manifolds
    95  	newSingularRunner     = singular.New
    96  	peergrouperNew        = peergrouper.New
    97  	newCertificateUpdater = certupdater.NewCertificateUpdater
    98  	newMetadataUpdater    = imagemetadataworker.NewWorker
    99  	newUpgradeMongoWorker = mongoupgrader.New
   100  	reportOpenedState     = func(io.Closer) {}
   101  )
   103  // Variable to override in tests, default is true
   104  var ProductionMongoWriteConcern = true
   106  func init() {
   107  	stateWorkerDialOpts = mongo.DefaultDialOpts()
   108  	stateWorkerDialOpts.PostDial = func(session *mgo.Session) error {
   109  		safe := mgo.Safe{}
   110  		if ProductionMongoWriteConcern {
   111  			safe.J = true
   112  			_, err := replicaset.CurrentConfig(session)
   113  			if err == nil {
   114  				// set mongo to write-majority (writes only returned after
   115  				// replicated to a majority of replica-set members).
   116  				safe.WMode = "majority"
   117  			}
   118  		}
   119  		session.SetSafe(&safe)
   120  		return nil
   121  	}
   122  }
   124  // AgentInitializer handles initializing a type for use as a Jujud
   125  // agent.
   126  type AgentInitializer interface {
   127  	AddFlags(*gnuflag.FlagSet)
   128  	CheckArgs([]string) error
   129  }
   131  // AgentConfigWriter encapsulates disk I/O operations with the agent
   132  // config.
   133  type AgentConfigWriter interface {
   134  	// ReadConfig reads the config for the given tag from disk.
   135  	ReadConfig(tag string) error
   136  	// ChangeConfig executes the given agent.ConfigMutator in a
   137  	// thread-safe context.
   138  	ChangeConfig(agent.ConfigMutator) error
   139  	// CurrentConfig returns a copy of the in-memory agent config.
   140  	CurrentConfig() agent.Config
   141  }
   143  // NewMachineAgentCmd creates a Command which handles parsing
   144  // command-line arguments and instantiating and running a
   145  // MachineAgent.
   146  func NewMachineAgentCmd(
   147  	ctx *cmd.Context,
   148  	machineAgentFactory func(string) *MachineAgent,
   149  	agentInitializer AgentInitializer,
   150  	configFetcher AgentConfigWriter,
   151  ) cmd.Command {
   152  	return &machineAgentCmd{
   153  		ctx:                 ctx,
   154  		machineAgentFactory: machineAgentFactory,
   155  		agentInitializer:    agentInitializer,
   156  		currentConfig:       configFetcher,
   157  	}
   158  }
   160  type machineAgentCmd struct {
   161  	cmd.CommandBase
   163  	// This group of arguments is required.
   164  	agentInitializer    AgentInitializer
   165  	currentConfig       AgentConfigWriter
   166  	machineAgentFactory func(string) *MachineAgent
   167  	ctx                 *cmd.Context
   169  	// This group is for debugging purposes.
   170  	logToStdErr bool
   172  	// The following are set via command-line flags.
   173  	machineId string
   174  }
   176  // Init is called by the cmd system to initialize the structure for
   177  // running.
   178  func (a *machineAgentCmd) Init(args []string) error {
   180  	if !names.IsValidMachine(a.machineId) {
   181  		return fmt.Errorf("--machine-id option must be set, and expects a non-negative integer")
   182  	}
   183  	if err := a.agentInitializer.CheckArgs(args); err != nil {
   184  		return err
   185  	}
   187  	// Due to changes in the logging, and needing to care about old
   188  	// models that have been upgraded, we need to explicitly remove the
   189  	// file writer if one has been added, otherwise we will get duplicate
   190  	// lines of all logging in the log file.
   191  	loggo.RemoveWriter("logfile")
   193  	if a.logToStdErr {
   194  		return nil
   195  	}
   197  	err := a.currentConfig.ReadConfig(names.NewMachineTag(a.machineId).String())
   198  	if err != nil {
   199  		return errors.Annotate(err, "cannot read agent configuration")
   200  	}
   202  	// the context's stderr is set as the loggo writer in
   203  	a.ctx.Stderr = &lumberjack.Logger{
   204  		Filename:   agent.LogFilename(a.currentConfig.CurrentConfig()),
   205  		MaxSize:    300, // megabytes
   206  		MaxBackups: 2,
   207  	}
   209  	return nil
   210  }
   212  // Run instantiates a MachineAgent and runs it.
   213  func (a *machineAgentCmd) Run(c *cmd.Context) error {
   214  	machineAgent := a.machineAgentFactory(a.machineId)
   215  	return machineAgent.Run(c)
   216  }
   218  // SetFlags adds the requisite flags to run this command.
   219  func (a *machineAgentCmd) SetFlags(f *gnuflag.FlagSet) {
   220  	a.agentInitializer.AddFlags(f)
   221  	f.StringVar(&a.machineId, "machine-id", "", "id of the machine to run")
   222  }
   224  // Info returns usage information for the command.
   225  func (a *machineAgentCmd) Info() *cmd.Info {
   226  	return &cmd.Info{
   227  		Name:    "machine",
   228  		Purpose: "run a juju machine agent",
   229  	}
   230  }
   232  // MachineAgentFactoryFn returns a function which instantiates a
   233  // MachineAgent given a machineId.
   234  func MachineAgentFactoryFn(
   235  	agentConfWriter AgentConfigWriter,
   236  	bufferedLogs logsender.LogRecordCh,
   237  	rootDir string,
   238  ) func(string) *MachineAgent {
   239  	return func(machineId string) *MachineAgent {
   240  		return NewMachineAgent(
   241  			machineId,
   242  			agentConfWriter,
   243  			bufferedLogs,
   244  			worker.NewRunner(cmdutil.IsFatal, cmdutil.MoreImportant, worker.RestartDelay),
   245  			looputil.NewLoopDeviceManager(),
   246  			rootDir,
   247  		)
   248  	}
   249  }
   251  // NewMachineAgent instantiates a new MachineAgent.
   252  func NewMachineAgent(
   253  	machineId string,
   254  	agentConfWriter AgentConfigWriter,
   255  	bufferedLogs logsender.LogRecordCh,
   256  	runner worker.Runner,
   257  	loopDeviceManager looputil.LoopDeviceManager,
   258  	rootDir string,
   259  ) *MachineAgent {
   260  	return &MachineAgent{
   261  		machineId:                   machineId,
   262  		AgentConfigWriter:           agentConfWriter,
   263  		configChangedVal:            voyeur.NewValue(true),
   264  		bufferedLogs:                bufferedLogs,
   265  		workersStarted:              make(chan struct{}),
   266  		runner:                      runner,
   267  		rootDir:                     rootDir,
   268  		initialUpgradeCheckComplete: gate.NewLock(),
   269  		loopDeviceManager:           loopDeviceManager,
   270  	}
   271  }
   273  // MachineAgent is responsible for tying together all functionality
   274  // needed to orchestrate a Jujud instance which controls a machine.
   275  type MachineAgent struct {
   276  	AgentConfigWriter
   278  	tomb             tomb.Tomb
   279  	machineId        string
   280  	runner           worker.Runner
   281  	rootDir          string
   282  	bufferedLogs     logsender.LogRecordCh
   283  	configChangedVal *voyeur.Value
   284  	upgradeComplete  gate.Lock
   285  	workersStarted   chan struct{}
   287  	// XXX(fwereade): these smell strongly of goroutine-unsafeness.
   288  	restoreMode bool
   289  	restoring   bool
   291  	// Used to signal that the upgrade worker will not
   292  	// reboot the agent on startup because there are no
   293  	// longer any immediately pending agent upgrades.
   294  	initialUpgradeCheckComplete gate.Lock
   296  	discoverSpacesComplete gate.Lock
   298  	mongoInitMutex   sync.Mutex
   299  	mongoInitialized bool
   301  	loopDeviceManager looputil.LoopDeviceManager
   302  }
   304  // IsRestorePreparing returns bool representing if we are in restore mode
   305  // but not running restore.
   306  func (a *MachineAgent) IsRestorePreparing() bool {
   307  	return a.restoreMode && !a.restoring
   308  }
   310  // IsRestoreRunning returns bool representing if we are in restore mode
   311  // and running the actual restore process.
   312  func (a *MachineAgent) IsRestoreRunning() bool {
   313  	return a.restoring
   314  }
   316  func (a *MachineAgent) isUpgradeRunning() bool {
   317  	return !a.upgradeComplete.IsUnlocked()
   318  }
   320  func (a *MachineAgent) isInitialUpgradeCheckPending() bool {
   321  	return !a.initialUpgradeCheckComplete.IsUnlocked()
   322  }
   324  // Wait waits for the machine agent to finish.
   325  func (a *MachineAgent) Wait() error {
   326  	return a.tomb.Wait()
   327  }
   329  // Stop stops the machine agent.
   330  func (a *MachineAgent) Stop() error {
   331  	a.runner.Kill()
   332  	return a.tomb.Wait()
   333  }
   335  // upgradeCertificateDNSNames ensure that the controller certificate
   336  // recorded in the agent config and also mongo server.pem contains the
   337  // DNSNames entires required by Juju/
   338  func (a *MachineAgent) upgradeCertificateDNSNames() error {
   339  	agentConfig := a.CurrentConfig()
   340  	si, ok := agentConfig.StateServingInfo()
   341  	if !ok || si.CAPrivateKey == "" {
   342  		// No certificate information exists yet, nothing to do.
   343  		return nil
   344  	}
   345  	// Parse the current certificate to get the current dns names.
   346  	serverCert, err := cert.ParseCert(si.Cert)
   347  	if err != nil {
   348  		return err
   349  	}
   350  	update := false
   351  	dnsNames := set.NewStrings(serverCert.DNSNames...)
   352  	requiredDNSNames := []string{"local", "juju-apiserver", "juju-mongodb"}
   353  	for _, dnsName := range requiredDNSNames {
   354  		if dnsNames.Contains(dnsName) {
   355  			continue
   356  		}
   357  		dnsNames.Add(dnsName)
   358  		update = true
   359  	}
   360  	if !update {
   361  		return nil
   362  	}
   363  	// Write a new certificate to the mongo pem and agent config files.
   364  	si.Cert, si.PrivateKey, err = cert.NewDefaultServer(agentConfig.CACert(), si.CAPrivateKey, dnsNames.Values())
   365  	if err != nil {
   366  		return err
   367  	}
   368  	if err := mongo.UpdateSSLKey(agentConfig.DataDir(), si.Cert, si.PrivateKey); err != nil {
   369  		return err
   370  	}
   371  	return a.AgentConfigWriter.ChangeConfig(func(config agent.ConfigSetter) error {
   372  		config.SetStateServingInfo(si)
   373  		return nil
   374  	})
   375  }
   377  // Run runs a machine agent.
   378  func (a *MachineAgent) Run(*cmd.Context) error {
   380  	defer a.tomb.Done()
   381  	if err := a.ReadConfig(a.Tag().String()); err != nil {
   382  		return fmt.Errorf("cannot read agent configuration: %v", err)
   383  	}
   385  	logger.Infof("machine agent %v start (%s [%s])", a.Tag(), jujuversion.Current, runtime.Compiler)
   386  	if flags := featureflag.String(); flags != "" {
   387  		logger.Warningf("developer feature flags enabled: %s", flags)
   388  	}
   390  	// Before doing anything else, we need to make sure the certificate generated for
   391  	// use by mongo to validate controller connections is correct. This needs to be done
   392  	// before any possible restart of the mongo service.
   393  	// See bug
   394  	if err := a.upgradeCertificateDNSNames(); err != nil {
   395  		return errors.Annotate(err, "error upgrading server certificate")
   396  	}
   398  	if upgradeComplete, err := upgradesteps.NewLock(a); err != nil {
   399  		return errors.Annotate(err, "error during creating upgrade completion channel")
   400  	} else {
   401  		a.upgradeComplete = upgradeComplete
   402  	}
   404  	agentConfig := a.CurrentConfig()
   405  	createEngine := a.makeEngineCreator(agentConfig.UpgradedToVersion())
   406  	network.SetPreferIPv6(agentConfig.PreferIPv6())
   407  	charmrepo.CacheDir = filepath.Join(agentConfig.DataDir(), "charmcache")
   408  	if err := a.createJujudSymlinks(agentConfig.DataDir()); err != nil {
   409  		return err
   410  	}
   411  	a.runner.StartWorker("engine", createEngine)
   413  	// At this point, all workers will have been configured to start
   414  	close(a.workersStarted)
   415  	err := a.runner.Wait()
   416  	switch errors.Cause(err) {
   417  	case worker.ErrTerminateAgent:
   418  		err = a.uninstallAgent()
   419  	case worker.ErrRebootMachine:
   420  		logger.Infof("Caught reboot error")
   421  		err = a.executeRebootOrShutdown(params.ShouldReboot)
   422  	case worker.ErrShutdownMachine:
   423  		logger.Infof("Caught shutdown error")
   424  		err = a.executeRebootOrShutdown(params.ShouldShutdown)
   425  	}
   426  	err = cmdutil.AgentDone(logger, err)
   427  	a.tomb.Kill(err)
   428  	return err
   429  }
   431  func (a *MachineAgent) makeEngineCreator(previousAgentVersion version.Number) func() (worker.Worker, error) {
   432  	return func() (worker.Worker, error) {
   433  		config := dependency.EngineConfig{
   434  			IsFatal:     cmdutil.IsFatal,
   435  			WorstError:  cmdutil.MoreImportantError,
   436  			ErrorDelay:  3 * time.Second,
   437  			BounceDelay: 10 * time.Millisecond,
   438  		}
   439  		engine, err := dependency.NewEngine(config)
   440  		if err != nil {
   441  			return nil, err
   442  		}
   443  		manifolds := machine.Manifolds(machine.ManifoldsConfig{
   444  			PreviousAgentVersion: previousAgentVersion,
   445  			Agent:                agent.APIHostPortsSetter{Agent: a},
   446  			RootDir:              a.rootDir,
   447  			AgentConfigChanged:   a.configChangedVal,
   448  			UpgradeStepsLock:     a.upgradeComplete,
   449  			UpgradeCheckLock:     a.initialUpgradeCheckComplete,
   450  			OpenState:            a.initState,
   451  			OpenStateForUpgrade:  a.openStateForUpgrade,
   452  			StartStateWorkers:    a.startStateWorkers,
   453  			StartAPIWorkers:      a.startAPIWorkers,
   454  			PreUpgradeSteps:      upgrades.PreUpgradeSteps,
   455  			LogSource:            a.bufferedLogs,
   456  			NewDeployContext:     newDeployContext,
   457  			Clock:                clock.WallClock,
   458  		})
   459  		if err := dependency.Install(engine, manifolds); err != nil {
   460  			if err := worker.Stop(engine); err != nil {
   461  				logger.Errorf("while stopping engine with bad manifolds: %v", err)
   462  			}
   463  			return nil, err
   464  		}
   465  		return engine, nil
   466  	}
   467  }
   469  func (a *MachineAgent) executeRebootOrShutdown(action params.RebootAction) error {
   470  	// At this stage, all API connections would have been closed
   471  	// We need to reopen the API to clear the reboot flag after
   472  	// scheduling the reboot. It may be cleaner to do this in the reboot
   473  	// worker, before returning the ErrRebootMachine.
   474  	conn, err := apicaller.OnlyConnect(a, apicaller.APIOpen)
   475  	if err != nil {
   476  		logger.Infof("Reboot: Error connecting to state")
   477  		return errors.Trace(err)
   478  	}
   480  	// block until all units/containers are ready, and reboot/shutdown
   481  	finalize, err := reboot.NewRebootWaiter(conn, a.CurrentConfig())
   482  	if err != nil {
   483  		return errors.Trace(err)
   484  	}
   486  	logger.Infof("Reboot: Executing reboot")
   487  	err = finalize.ExecuteReboot(action)
   488  	if err != nil {
   489  		logger.Infof("Reboot: Error executing reboot: %v", err)
   490  		return errors.Trace(err)
   491  	}
   492  	// On windows, the shutdown command is asynchronous. We return ErrRebootMachine
   493  	// so the agent will simply exit without error pending reboot/shutdown.
   494  	return worker.ErrRebootMachine
   495  }
   497  func (a *MachineAgent) ChangeConfig(mutate agent.ConfigMutator) error {
   498  	err := a.AgentConfigWriter.ChangeConfig(mutate)
   499  	a.configChangedVal.Set(true)
   500  	return errors.Trace(err)
   501  }
   503  func (a *MachineAgent) maybeStopMongo(ver mongo.Version, isMaster bool) error {
   504  	if !a.mongoInitialized {
   505  		return nil
   506  	}
   508  	conf := a.AgentConfigWriter.CurrentConfig()
   509  	v := conf.MongoVersion()
   511  	logger.Errorf("Got version change %v", ver)
   512  	// TODO(perrito666) replace with "read-only" mode for environment when
   513  	// it is available.
   514  	if ver.NewerThan(v) > 0 {
   515  		err := a.AgentConfigWriter.ChangeConfig(func(config agent.ConfigSetter) error {
   516  			config.SetMongoVersion(mongo.MongoUpgrade)
   517  			return nil
   518  		})
   519  		if err != nil {
   520  			return err
   521  		}
   523  	}
   524  	return nil
   526  }
   528  // PrepareRestore will flag the agent to allow only a limited set
   529  // of commands defined in
   530  // "".allowedMethodsAboutToRestore
   531  // the most noteworthy is:
   532  // Backups.Restore: this will ensure that we can do all the file movements
   533  // required for restore and no one will do changes while we do that.
   534  // it will return error if the machine is already in this state.
   535  func (a *MachineAgent) PrepareRestore() error {
   536  	if a.restoreMode {
   537  		return errors.Errorf("already in restore mode")
   538  	}
   539  	a.restoreMode = true
   540  	return nil
   541  }
   543  // BeginRestore will flag the agent to disallow all commands since
   544  // restore should be running and therefore making changes that
   545  // would override anything done.
   546  func (a *MachineAgent) BeginRestore() error {
   547  	switch {
   548  	case !a.restoreMode:
   549  		return errors.Errorf("not in restore mode, cannot begin restoration")
   550  	case a.restoring:
   551  		return errors.Errorf("already restoring")
   552  	}
   553  	a.restoring = true
   554  	return nil
   555  }
   557  // EndRestore will flag the agent to allow all commands
   558  // This being invoked means that restore process failed
   559  // since success restarts the agent.
   560  func (a *MachineAgent) EndRestore() {
   561  	a.restoreMode = false
   562  	a.restoring = false
   563  }
   565  // newRestoreStateWatcherWorker will return a worker or err if there
   566  // is a failure, the worker takes care of watching the state of
   567  // restoreInfo doc and put the agent in the different restore modes.
   568  func (a *MachineAgent) newRestoreStateWatcherWorker(st *state.State) (worker.Worker, error) {
   569  	rWorker := func(stopch <-chan struct{}) error {
   570  		return a.restoreStateWatcher(st, stopch)
   571  	}
   572  	return worker.NewSimpleWorker(rWorker), nil
   573  }
   575  // restoreChanged will be called whenever restoreInfo doc changes signaling a new
   576  // step in the restore process.
   577  func (a *MachineAgent) restoreChanged(st *state.State) error {
   578  	rinfo, err := st.RestoreInfoSetter()
   579  	if err != nil {
   580  		return errors.Annotate(err, "cannot read restore state")
   581  	}
   582  	switch rinfo.Status() {
   583  	case state.RestorePending:
   584  		a.PrepareRestore()
   585  	case state.RestoreInProgress:
   586  		a.BeginRestore()
   587  	case state.RestoreFailed:
   588  		a.EndRestore()
   589  	}
   590  	return nil
   591  }
   593  // restoreStateWatcher watches for restoreInfo looking for changes in the restore process.
   594  func (a *MachineAgent) restoreStateWatcher(st *state.State, stopch <-chan struct{}) error {
   595  	restoreWatch := st.WatchRestoreInfoChanges()
   596  	defer func() {
   597  		restoreWatch.Kill()
   598  		restoreWatch.Wait()
   599  	}()
   601  	for {
   602  		select {
   603  		case <-restoreWatch.Changes():
   604  			if err := a.restoreChanged(st); err != nil {
   605  				return err
   606  			}
   607  		case <-stopch:
   608  			return nil
   609  		}
   610  	}
   611  }
   613  var newEnvirons = environs.New
   615  // startAPIWorkers is called to start workers which rely on the
   616  // machine agent's API connection (via the apiworkers manifold). It
   617  // returns a Runner with a number of workers attached to it.
   618  //
   619  // The workers started here need to be converted to run under the
   620  // dependency engine. Once they have all been converted, this method -
   621  // and the apiworkers manifold - can be removed.
   622  func (a *MachineAgent) startAPIWorkers(apiConn api.Connection) (_ worker.Worker, outErr error) {
   623  	agentConfig := a.CurrentConfig()
   625  	entity, err := apiagent.NewState(apiConn).Entity(a.Tag())
   626  	if err != nil {
   627  		return nil, errors.Trace(err)
   628  	}
   630  	var isModelManager bool
   631  	for _, job := range entity.Jobs() {
   632  		switch job {
   633  		case multiwatcher.JobManageModel:
   634  			isModelManager = true
   635  		default:
   636  			// TODO(dimitern): Once all workers moved over to using
   637  			// the API, report "unknown job type" here.
   638  		}
   639  	}
   641  	runner := newConnRunner(apiConn)
   642  	defer func() {
   643  		// If startAPIWorkers exits early with an error, stop the
   644  		// runner so that any already started runners aren't leaked.
   645  		if outErr != nil {
   646  			worker.Stop(runner)
   647  		}
   648  	}()
   650  	modelConfig, err := apiagent.NewState(apiConn).ModelConfig()
   651  	if err != nil {
   652  		return nil, fmt.Errorf("cannot read model config: %v", err)
   653  	}
   655  	// Perform the operations needed to set up hosting for containers.
   656  	if err := a.setupContainerSupport(runner, apiConn, agentConfig); err != nil {
   657  		cause := errors.Cause(err)
   658  		if params.IsCodeDead(cause) || cause == worker.ErrTerminateAgent {
   659  			return nil, worker.ErrTerminateAgent
   660  		}
   661  		return nil, fmt.Errorf("setting up container support: %v", err)
   662  	}
   664  	if isModelManager {
   666  		// Published image metadata for some providers are in simple streams.
   667  		// Providers that do not depend on simple streams do not need this worker.
   668  		env, err := newEnvirons(modelConfig)
   669  		if err != nil {
   670  			return nil, errors.Annotate(err, "getting environ")
   671  		}
   672  		if _, ok := env.(simplestreams.HasRegion); ok {
   673  			// Start worker that stores published image metadata in state.
   674  			runner.StartWorker("imagemetadata", func() (worker.Worker, error) {
   675  				return newMetadataUpdater(apiConn.MetadataUpdater()), nil
   676  			})
   677  		}
   679  		// We don't have instance info set and the network config for the
   680  		// bootstrap machine only, so update it now. All the other machines will
   681  		// have instance info including network config set at provisioning time.
   682  		if err := a.setControllerNetworkConfig(apiConn); err != nil {
   683  			return nil, errors.Annotate(err, "setting controller network config")
   684  		}
   685  	} else {
   686  		runner.StartWorker("stateconverter", func() (worker.Worker, error) {
   687  			// TODO(fwereade): this worker needs its own facade.
   688  			facade := apimachiner.NewState(apiConn)
   689  			handler := conv2state.New(facade, a)
   690  			w, err := watcher.NewNotifyWorker(watcher.NotifyConfig{
   691  				Handler: handler,
   692  			})
   693  			if err != nil {
   694  				return nil, errors.Annotate(err, "cannot start controller promoter worker")
   695  			}
   696  			return w, nil
   697  		})
   698  	}
   699  	return runner, nil
   700  }
   702  func (a *MachineAgent) setControllerNetworkConfig(apiConn api.Connection) error {
   703  	machinerAPI := apimachiner.NewState(apiConn)
   704  	agentConfig := a.CurrentConfig()
   706  	tag := agentConfig.Tag().(names.MachineTag)
   707  	machine, err := machinerAPI.Machine(tag)
   708  	if errors.IsNotFound(err) || err == nil && machine.Life() == params.Dead {
   709  		return worker.ErrTerminateAgent
   710  	}
   711  	if err != nil {
   712  		return errors.Annotatef(err, "cannot load machine %s from state", tag)
   713  	}
   715  	if err := machine.SetProviderNetworkConfig(); err != nil {
   716  		return errors.Annotate(err, "cannot set controller provider network config")
   717  	}
   718  	return nil
   719  }
   721  // Restart restarts the agent's service.
   722  func (a *MachineAgent) Restart() error {
   723  	name := a.CurrentConfig().Value(agent.AgentServiceName)
   724  	return service.Restart(name)
   725  }
   727  // openStateForUpgrade exists to be passed into the upgradesteps
   728  // worker. The upgradesteps worker opens state independently of the
   729  // state worker so that it isn't affected by the state worker's
   730  // lifetime. It ensures the MongoDB server is configured and started,
   731  // and then opens a state connection.
   732  //
   733  // TODO(mjs)- review the need for this once the dependency engine is
   734  // in use. Why can't upgradesteps depend on the main state connection?
   735  func (a *MachineAgent) openStateForUpgrade() (*state.State, error) {
   736  	agentConfig := a.CurrentConfig()
   737  	if err := a.ensureMongoServer(agentConfig); err != nil {
   738  		return nil, errors.Trace(err)
   739  	}
   740  	info, ok := agentConfig.MongoInfo()
   741  	if !ok {
   742  		return nil, errors.New("no state info available")
   743  	}
   744  	st, err := state.Open(agentConfig.Model(), info, mongo.DefaultDialOpts(), environs.NewStatePolicy())
   745  	if err != nil {
   746  		return nil, errors.Trace(err)
   747  	}
   748  	return st, nil
   749  }
   751  // setupContainerSupport determines what containers can be run on this machine and
   752  // initialises suitable infrastructure to support such containers.
   753  func (a *MachineAgent) setupContainerSupport(runner worker.Runner, st api.Connection, agentConfig agent.Config) error {
   754  	var supportedContainers []instance.ContainerType
   755  	supportsContainers := container.ContainersSupported()
   756  	if supportsContainers {
   757  		supportedContainers = append(supportedContainers, instance.LXC, instance.LXD)
   758  	}
   760  	supportsKvm, err := kvm.IsKVMSupported()
   761  	if err != nil {
   762  		logger.Warningf("determining kvm support: %v\nno kvm containers possible", err)
   763  	}
   764  	if err == nil && supportsKvm {
   765  		supportedContainers = append(supportedContainers, instance.KVM)
   766  	}
   768  	return a.updateSupportedContainers(runner, st, supportedContainers, agentConfig)
   769  }
   771  // updateSupportedContainers records in state that a machine can run the specified containers.
   772  // It starts a watcher and when a container of a given type is first added to the machine,
   773  // the watcher is killed, the machine is set up to be able to start containers of the given type,
   774  // and a suitable provisioner is started.
   775  func (a *MachineAgent) updateSupportedContainers(
   776  	runner worker.Runner,
   777  	st api.Connection,
   778  	containers []instance.ContainerType,
   779  	agentConfig agent.Config,
   780  ) error {
   781  	pr := st.Provisioner()
   782  	tag := agentConfig.Tag().(names.MachineTag)
   783  	machine, err := pr.Machine(tag)
   784  	if errors.IsNotFound(err) || err == nil && machine.Life() == params.Dead {
   785  		return worker.ErrTerminateAgent
   786  	}
   787  	if err != nil {
   788  		return errors.Annotatef(err, "cannot load machine %s from state", tag)
   789  	}
   790  	if len(containers) == 0 {
   791  		if err := machine.SupportsNoContainers(); err != nil {
   792  			return errors.Annotatef(err, "clearing supported containers for %s", tag)
   793  		}
   794  		return nil
   795  	}
   796  	if err := machine.SetSupportedContainers(containers...); err != nil {
   797  		return errors.Annotatef(err, "setting supported containers for %s", tag)
   798  	}
   799  	initLock, err := cmdutil.HookExecutionLock(agentConfig.DataDir())
   800  	if err != nil {
   801  		return err
   802  	}
   803  	// Start the watcher to fire when a container is first requested on the machine.
   804  	modelUUID, err := st.ModelTag()
   805  	if err != nil {
   806  		return err
   807  	}
   808  	watcherName := fmt.Sprintf("%s-container-watcher", machine.Id())
   809  	// There may not be a CA certificate private key available, and without
   810  	// it we can't ensure that other Juju nodes can connect securely, so only
   811  	// use an image URL getter if there's a private key.
   812  	var imageURLGetter container.ImageURLGetter
   813  	if agentConfig.Value(agent.AllowsSecureConnection) == "true" {
   814  		cfg, err := pr.ModelConfig()
   815  		if err != nil {
   816  			return errors.Annotate(err, "unable to get environ config")
   817  		}
   818  		imageURLGetter = container.NewImageURLGetter(
   819  			// Explicitly call the non-named constructor so if anyone
   820  			// adds additional fields, this fails.
   821  			container.ImageURLGetterConfig{
   822  				ServerRoot:        st.Addr(),
   823  				ModelUUID:         modelUUID.Id(),
   824  				CACert:            []byte(agentConfig.CACert()),
   825  				CloudimgBaseUrl:   cfg.CloudImageBaseURL(),
   826  				Stream:            cfg.ImageStream(),
   827  				ImageDownloadFunc: container.ImageDownloadURL,
   828  			})
   829  	}
   830  	params := provisioner.ContainerSetupParams{
   831  		Runner:              runner,
   832  		WorkerName:          watcherName,
   833  		SupportedContainers: containers,
   834  		ImageURLGetter:      imageURLGetter,
   835  		Machine:             machine,
   836  		Provisioner:         pr,
   837  		Config:              agentConfig,
   838  		InitLock:            initLock,
   839  	}
   840  	handler := provisioner.NewContainerSetupHandler(params)
   841  	a.startWorkerAfterUpgrade(runner, watcherName, func() (worker.Worker, error) {
   842  		w, err := watcher.NewStringsWorker(watcher.StringsConfig{
   843  			Handler: handler,
   844  		})
   845  		if err != nil {
   846  			return nil, errors.Annotatef(err, "cannot start %s worker", watcherName)
   847  		}
   848  		return w, nil
   849  	})
   850  	return nil
   851  }
   853  func (a *MachineAgent) initState(agentConfig agent.Config) (*state.State, error) {
   854  	// Start MongoDB server and dial.
   855  	if err := a.ensureMongoServer(agentConfig); err != nil {
   856  		return nil, err
   857  	}
   859  	st, _, err := openState(agentConfig, stateWorkerDialOpts)
   860  	if err != nil {
   861  		return nil, err
   862  	}
   864  	reportOpenedState(st)
   866  	return st, nil
   867  }
   869  // startStateWorkers returns a worker running all the workers that
   870  // require a *state.State connection.
   871  func (a *MachineAgent) startStateWorkers(st *state.State) (worker.Worker, error) {
   872  	agentConfig := a.CurrentConfig()
   874  	m, err := getMachine(st, agentConfig.Tag())
   875  	if err != nil {
   876  		return nil, errors.Annotate(err, "machine lookup")
   877  	}
   879  	runner := newConnRunner(st)
   880  	singularRunner, err := newSingularStateRunner(runner, st, m)
   881  	if err != nil {
   882  		return nil, errors.Trace(err)
   883  	}
   885  	for _, job := range m.Jobs() {
   886  		switch job {
   887  		case state.JobHostUnits:
   888  			// Implemented elsewhere with workers that use the API.
   889  		case state.JobManageNetworking:
   890  			// Not used by state workers.
   891  		case state.JobManageModel:
   892  			useMultipleCPUs()
   893  			a.startWorkerAfterUpgrade(runner, "model worker manager", func() (worker.Worker, error) {
   894  				w, err := modelworkermanager.New(modelworkermanager.Config{
   895  					Backend:    st,
   896  					NewWorker:  a.startModelWorkers,
   897  					ErrorDelay: worker.RestartDelay,
   898  				})
   899  				if err != nil {
   900  					return nil, errors.Annotate(err, "cannot start model worker manager")
   901  				}
   902  				return w, nil
   903  			})
   904  			a.startWorkerAfterUpgrade(runner, "peergrouper", func() (worker.Worker, error) {
   905  				w, err := peergrouperNew(st)
   906  				if err != nil {
   907  					return nil, errors.Annotate(err, "cannot start peergrouper worker")
   908  				}
   909  				return w, nil
   910  			})
   911  			a.startWorkerAfterUpgrade(runner, "restore", func() (worker.Worker, error) {
   912  				w, err := a.newRestoreStateWatcherWorker(st)
   913  				if err != nil {
   914  					return nil, errors.Annotate(err, "cannot start backup-restorer worker")
   915  				}
   916  				return w, nil
   917  			})
   918  			a.startWorkerAfterUpgrade(runner, "mongoupgrade", func() (worker.Worker, error) {
   919  				return newUpgradeMongoWorker(st, a.machineId, a.maybeStopMongo)
   920  			})
   922  			// certChangedChan is shared by multiple workers it's up
   923  			// to the agent to close it rather than any one of the
   924  			// workers.  It is possible that multiple cert changes
   925  			// come in before the apiserver is up to receive them.
   926  			// Specify a bigger buffer to prevent deadlock when
   927  			// the apiserver isn't up yet.  Use a size of 10 since we
   928  			// allow up to 7 controllers, and might also update the
   929  			// addresses of the local machine (, ::1, etc).
   930  			//
   931  			// TODO(cherylj/waigani) Remove this workaround when
   932  			// certupdater and apiserver can properly manage dependencies
   933  			// through the dependency engine.
   934  			//
   935  			// TODO(ericsnow) For now we simply do not close the channel.
   936  			certChangedChan := make(chan params.StateServingInfo, 10)
   937  			// Each time apiserver worker is restarted, we need a fresh copy of state due
   938  			// to the fact that state holds lease managers which are killed and need to be reset.
   939  			stateOpener := func() (*state.State, error) {
   940  				logger.Debugf("opening state for apiserver worker")
   941  				st, _, err := openState(agentConfig, stateWorkerDialOpts)
   942  				return st, err
   943  			}
   944  			runner.StartWorker("apiserver", a.apiserverWorkerStarter(stateOpener, certChangedChan))
   945  			var stateServingSetter certupdater.StateServingInfoSetter = func(info params.StateServingInfo, done <-chan struct{}) error {
   946  				return a.ChangeConfig(func(config agent.ConfigSetter) error {
   947  					config.SetStateServingInfo(info)
   948  					logger.Infof("update apiserver worker with new certificate")
   949  					select {
   950  					case certChangedChan <- info:
   951  						return nil
   952  					case <-done:
   953  						return nil
   954  					}
   955  				})
   956  			}
   957  			a.startWorkerAfterUpgrade(runner, "certupdater", func() (worker.Worker, error) {
   958  				return newCertificateUpdater(m, agentConfig, st, st, stateServingSetter), nil
   959  			})
   961  			a.startWorkerAfterUpgrade(singularRunner, "dblogpruner", func() (worker.Worker, error) {
   962  				return dblogpruner.New(st, dblogpruner.NewLogPruneParams()), nil
   963  			})
   965  			a.startWorkerAfterUpgrade(singularRunner, "txnpruner", func() (worker.Worker, error) {
   966  				return txnpruner.New(st, time.Hour*2), nil
   967  			})
   968  		default:
   969  			return nil, errors.Errorf("unknown job type %q", job)
   970  		}
   971  	}
   972  	return runner, nil
   973  }
   975  // startModelWorkers starts the set of workers that run for every model
   976  // in each controller.
   977  func (a *MachineAgent) startModelWorkers(uuid string) (worker.Worker, error) {
   978  	modelAgent, err := model.WrapAgent(a, uuid)
   979  	if err != nil {
   980  		return nil, errors.Trace(err)
   981  	}
   983  	engine, err := dependency.NewEngine(dependency.EngineConfig{
   984  		IsFatal:     model.IsFatal,
   985  		WorstError:  model.WorstError,
   986  		Filter:      model.IgnoreErrRemoved,
   987  		ErrorDelay:  3 * time.Second,
   988  		BounceDelay: 10 * time.Millisecond,
   989  	})
   990  	if err != nil {
   991  		return nil, errors.Trace(err)
   992  	}
   994  	manifolds := modelManifolds(model.ManifoldsConfig{
   995  		Agent:                       modelAgent,
   996  		AgentConfigChanged:          a.configChangedVal,
   997  		Clock:                       clock.WallClock,
   998  		RunFlagDuration:             time.Minute,
   999  		CharmRevisionUpdateInterval: 24 * time.Hour,
  1000  		EntityStatusHistoryCount:    100,
  1001  		EntityStatusHistoryInterval: 5 * time.Minute,
  1002  		SpacesImportedGate:          a.discoverSpacesComplete,
  1003  	})
  1004  	if err := dependency.Install(engine, manifolds); err != nil {
  1005  		if err := worker.Stop(engine); err != nil {
  1006  			logger.Errorf("while stopping engine with bad manifolds: %v", err)
  1007  		}
  1008  		return nil, errors.Trace(err)
  1009  	}
  1010  	return engine, nil
  1011  }
  1013  // stateWorkerDialOpts is a mongo.DialOpts suitable
  1014  // for use by StateWorker to dial mongo.
  1015  //
  1016  // This must be overridden in tests, as it assumes
  1017  // journaling is enabled.
  1018  var stateWorkerDialOpts mongo.DialOpts
  1020  func (a *MachineAgent) apiserverWorkerStarter(
  1021  	stateOpener func() (*state.State, error), certChanged chan params.StateServingInfo,
  1022  ) func() (worker.Worker, error) {
  1023  	return func() (worker.Worker, error) {
  1024  		st, err := stateOpener()
  1025  		if err != nil {
  1026  			return nil, errors.Trace(err)
  1027  		}
  1028  		return a.newApiserverWorker(st, certChanged)
  1029  	}
  1030  }
  1032  func (a *MachineAgent) newApiserverWorker(st *state.State, certChanged chan params.StateServingInfo) (worker.Worker, error) {
  1033  	agentConfig := a.CurrentConfig()
  1034  	// If the configuration does not have the required information,
  1035  	// it is currently not a recoverable error, so we kill the whole
  1036  	// agent, potentially enabling human intervention to fix
  1037  	// the agent's configuration file.
  1038  	info, ok := agentConfig.StateServingInfo()
  1039  	if !ok {
  1040  		return nil, &cmdutil.FatalError{"StateServingInfo not available and we need it"}
  1041  	}
  1042  	cert := []byte(info.Cert)
  1043  	key := []byte(info.PrivateKey)
  1045  	if len(cert) == 0 || len(key) == 0 {
  1046  		return nil, &cmdutil.FatalError{"configuration does not have controller cert/key"}
  1047  	}
  1048  	tag := agentConfig.Tag()
  1049  	dataDir := agentConfig.DataDir()
  1050  	logDir := agentConfig.LogDir()
  1052  	endpoint := net.JoinHostPort("", strconv.Itoa(info.APIPort))
  1053  	listener, err := net.Listen("tcp", endpoint)
  1054  	if err != nil {
  1055  		return nil, err
  1056  	}
  1057  	w, err := apiserver.NewServer(st, listener, apiserver.ServerConfig{
  1058  		Cert:        cert,
  1059  		Key:         key,
  1060  		Tag:         tag,
  1061  		DataDir:     dataDir,
  1062  		LogDir:      logDir,
  1063  		Validator:   a.limitLogins,
  1064  		CertChanged: certChanged,
  1065  	})
  1066  	if err != nil {
  1067  		return nil, errors.Annotate(err, "cannot start api server worker")
  1068  	}
  1069  	return w, nil
  1070  }
  1072  // limitLogins is called by the API server for each login attempt.
  1073  // it returns an error if upgrades or restore are running.
  1074  func (a *MachineAgent) limitLogins(req params.LoginRequest) error {
  1075  	if err := a.limitLoginsDuringRestore(req); err != nil {
  1076  		return err
  1077  	}
  1078  	if err := a.limitLoginsDuringUpgrade(req); err != nil {
  1079  		return err
  1080  	}
  1081  	return a.limitLoginsDuringMongoUpgrade(req)
  1082  }
  1084  func (a *MachineAgent) limitLoginsDuringMongoUpgrade(req params.LoginRequest) error {
  1085  	// If upgrade is running we will not be able to lock AgentConfigWriter
  1086  	// and it also means we are not upgrading mongo.
  1087  	if a.isUpgradeRunning() {
  1088  		return nil
  1089  	}
  1090  	cfg := a.AgentConfigWriter.CurrentConfig()
  1091  	ver := cfg.MongoVersion()
  1092  	if ver == mongo.MongoUpgrade {
  1093  		return errors.New("Upgrading Mongo")
  1094  	}
  1095  	return nil
  1096  }
  1098  // limitLoginsDuringRestore will only allow logins for restore related purposes
  1099  // while the different steps of restore are running.
  1100  func (a *MachineAgent) limitLoginsDuringRestore(req params.LoginRequest) error {
  1101  	var err error
  1102  	switch {
  1103  	case a.IsRestoreRunning():
  1104  		err = apiserver.RestoreInProgressError
  1105  	case a.IsRestorePreparing():
  1106  		err = apiserver.AboutToRestoreError
  1107  	}
  1108  	if err != nil {
  1109  		authTag, parseErr := names.ParseTag(req.AuthTag)
  1110  		if parseErr != nil {
  1111  			return errors.Annotate(err, "could not parse auth tag")
  1112  		}
  1113  		switch authTag := authTag.(type) {
  1114  		case names.UserTag:
  1115  			// use a restricted API mode
  1116  			return err
  1117  		case names.MachineTag:
  1118  			if authTag == a.Tag() {
  1119  				// allow logins from the local machine
  1120  				return nil
  1121  			}
  1122  		}
  1123  		return errors.Errorf("login for %q blocked because restore is in progress", authTag)
  1124  	}
  1125  	return nil
  1126  }
  1128  // limitLoginsDuringUpgrade is called by the API server for each login
  1129  // attempt. It returns an error if upgrades are in progress unless the
  1130  // login is for a user (i.e. a client) or the local machine.
  1131  func (a *MachineAgent) limitLoginsDuringUpgrade(req params.LoginRequest) error {
  1132  	if a.isUpgradeRunning() || a.isInitialUpgradeCheckPending() {
  1133  		authTag, err := names.ParseTag(req.AuthTag)
  1134  		if err != nil {
  1135  			return errors.Annotate(err, "could not parse auth tag")
  1136  		}
  1137  		switch authTag := authTag.(type) {
  1138  		case names.UserTag:
  1139  			// use a restricted API mode
  1140  			return params.UpgradeInProgressError
  1141  		case names.MachineTag:
  1142  			if authTag == a.Tag() {
  1143  				// allow logins from the local machine
  1144  				return nil
  1145  			}
  1146  		}
  1147  		return errors.Errorf("login for %q blocked because %s", authTag, params.CodeUpgradeInProgress)
  1148  	} else {
  1149  		return nil // allow all logins
  1150  	}
  1151  }
  1153  var stateWorkerServingConfigErr = errors.New("state worker started with no state serving info")
  1155  // ensureMongoServer ensures that mongo is installed and running,
  1156  // and ready for opening a state connection.
  1157  func (a *MachineAgent) ensureMongoServer(agentConfig agent.Config) (err error) {
  1158  	a.mongoInitMutex.Lock()
  1159  	defer a.mongoInitMutex.Unlock()
  1160  	if a.mongoInitialized {
  1161  		logger.Debugf("mongo is already initialized")
  1162  		return nil
  1163  	}
  1164  	defer func() {
  1165  		if err == nil {
  1166  			a.mongoInitialized = true
  1167  		}
  1168  	}()
  1170  	mongoInstalled, err := mongo.IsServiceInstalled()
  1171  	if err != nil {
  1172  		return errors.Annotate(err, "error while checking if mongodb service is installed")
  1173  	}
  1175  	if !mongoInstalled {
  1176  		// EnsureMongoServer installs/upgrades the init config as necessary.
  1177  		ensureServerParams, err := cmdutil.NewEnsureServerParams(agentConfig)
  1178  		if err != nil {
  1179  			return err
  1180  		}
  1181  		if err := cmdutil.EnsureMongoServer(ensureServerParams); err != nil {
  1182  			return err
  1183  		}
  1184  	}
  1185  	logger.Debugf("mongodb service is installed")
  1187  	// Mongo is installed, record the version.
  1188  	err = a.ChangeConfig(func(config agent.ConfigSetter) error {
  1189  		config.SetMongoVersion(mongo.InstalledVersion())
  1190  		return nil
  1191  	})
  1192  	if err != nil {
  1193  		return errors.Annotate(err, "cannot set mongo version")
  1194  	}
  1195  	return nil
  1196  }
  1198  func openState(agentConfig agent.Config, dialOpts mongo.DialOpts) (_ *state.State, _ *state.Machine, err error) {
  1199  	info, ok := agentConfig.MongoInfo()
  1200  	if !ok {
  1201  		return nil, nil, fmt.Errorf("no state info available")
  1202  	}
  1203  	st, err := state.Open(agentConfig.Model(), info, dialOpts, environs.NewStatePolicy())
  1204  	if err != nil {
  1205  		return nil, nil, err
  1206  	}
  1207  	defer func() {
  1208  		if err != nil {
  1209  			st.Close()
  1210  		}
  1211  	}()
  1212  	m0, err := st.FindEntity(agentConfig.Tag())
  1213  	if err != nil {
  1214  		if errors.IsNotFound(err) {
  1215  			err = worker.ErrTerminateAgent
  1216  		}
  1217  		return nil, nil, err
  1218  	}
  1219  	m := m0.(*state.Machine)
  1220  	if m.Life() == state.Dead {
  1221  		return nil, nil, worker.ErrTerminateAgent
  1222  	}
  1223  	// Check the machine nonce as provisioned matches the agent.Conf value.
  1224  	if !m.CheckProvisioned(agentConfig.Nonce()) {
  1225  		// The agent is running on a different machine to the one it
  1226  		// should be according to state. It must stop immediately.
  1227  		logger.Errorf("running machine %v agent on inappropriate instance", m)
  1228  		return nil, nil, worker.ErrTerminateAgent
  1229  	}
  1230  	return st, m, nil
  1231  }
  1233  func getMachine(st *state.State, tag names.Tag) (*state.Machine, error) {
  1234  	m0, err := st.FindEntity(tag)
  1235  	if err != nil {
  1236  		return nil, err
  1237  	}
  1238  	return m0.(*state.Machine), nil
  1239  }
  1241  // startWorkerAfterUpgrade starts a worker to run the specified child worker
  1242  // but only after waiting for upgrades to complete.
  1243  func (a *MachineAgent) startWorkerAfterUpgrade(runner worker.Runner, name string, start func() (worker.Worker, error)) {
  1244  	runner.StartWorker(name, func() (worker.Worker, error) {
  1245  		return a.upgradeWaiterWorker(name, start), nil
  1246  	})
  1247  }
  1249  // upgradeWaiterWorker runs the specified worker after upgrades have completed.
  1250  func (a *MachineAgent) upgradeWaiterWorker(name string, start func() (worker.Worker, error)) worker.Worker {
  1251  	return worker.NewSimpleWorker(func(stop <-chan struct{}) error {
  1252  		// Wait for the agent upgrade and upgrade steps to complete (or for us to be stopped).
  1253  		for _, ch := range []<-chan struct{}{
  1254  			a.upgradeComplete.Unlocked(),
  1255  			a.initialUpgradeCheckComplete.Unlocked(),
  1256  		} {
  1257  			select {
  1258  			case <-stop:
  1259  				return nil
  1260  			case <-ch:
  1261  			}
  1262  		}
  1263  		logger.Debugf("upgrades done, starting worker %q", name)
  1265  		// Upgrades are done, start the worker.
  1266  		worker, err := start()
  1267  		if err != nil {
  1268  			return err
  1269  		}
  1270  		// Wait for worker to finish or for us to be stopped.
  1271  		waitCh := make(chan error)
  1272  		go func() {
  1273  			waitCh <- worker.Wait()
  1274  		}()
  1275  		select {
  1276  		case err := <-waitCh:
  1277  			logger.Debugf("worker %q exited with %v", name, err)
  1278  			return err
  1279  		case <-stop:
  1280  			logger.Debugf("stopping so killing worker %q", name)
  1281  			worker.Kill()
  1282  		}
  1283  		return <-waitCh // Ensure worker has stopped before returning.
  1284  	})
  1285  }
  1287  // WorkersStarted returns a channel that's closed once all top level workers
  1288  // have been started. This is provided for testing purposes.
  1289  func (a *MachineAgent) WorkersStarted() <-chan struct{} {
  1290  	return a.workersStarted
  1291  }
  1293  func (a *MachineAgent) Tag() names.Tag {
  1294  	return names.NewMachineTag(a.machineId)
  1295  }
  1297  func (a *MachineAgent) createJujudSymlinks(dataDir string) error {
  1298  	jujud := filepath.Join(tools.ToolsDir(dataDir, a.Tag().String()), jujunames.Jujud)
  1299  	for _, link := range []string{jujuRun, jujuDumpLogs} {
  1300  		err := a.createSymlink(jujud, link)
  1301  		if err != nil {
  1302  			return errors.Annotatef(err, "failed to create %s symlink", link)
  1303  		}
  1304  	}
  1305  	return nil
  1306  }
  1308  func (a *MachineAgent) createSymlink(target, link string) error {
  1309  	fullLink := utils.EnsureBaseDir(a.rootDir, link)
  1311  	currentTarget, err := symlink.Read(fullLink)
  1312  	if err != nil && !os.IsNotExist(err) {
  1313  		return err
  1314  	} else if err == nil {
  1315  		// Link already in place - check it.
  1316  		if currentTarget == target {
  1317  			// Link already points to the right place - nothing to do.
  1318  			return nil
  1319  		}
  1320  		// Link points to the wrong place - delete it.
  1321  		if err := os.Remove(fullLink); err != nil {
  1322  			return err
  1323  		}
  1324  	}
  1326  	if err := os.MkdirAll(filepath.Dir(fullLink), os.FileMode(0755)); err != nil {
  1327  		return err
  1328  	}
  1329  	return symlink.New(target, fullLink)
  1330  }
  1332  func (a *MachineAgent) removeJujudSymlinks() (errs []error) {
  1333  	for _, link := range []string{jujuRun, jujuDumpLogs} {
  1334  		err := os.Remove(utils.EnsureBaseDir(a.rootDir, link))
  1335  		if err != nil && !os.IsNotExist(err) {
  1336  			errs = append(errs, errors.Annotatef(err, "failed to remove %s symlink", link))
  1337  		}
  1338  	}
  1339  	return
  1340  }
  1342  func (a *MachineAgent) uninstallAgent() error {
  1343  	// We should only uninstall if the uninstall file is present.
  1344  	if !agent.CanUninstall(a) {
  1345  		logger.Infof("ignoring uninstall request")
  1346  		return nil
  1347  	}
  1348  	logger.Infof("uninstalling agent")
  1350  	agentConfig := a.CurrentConfig()
  1351  	var errs []error
  1352  	agentServiceName := agentConfig.Value(agent.AgentServiceName)
  1353  	if agentServiceName == "" {
  1354  		// For backwards compatibility, handle lack of AgentServiceName.
  1355  		agentServiceName = os.Getenv("UPSTART_JOB")
  1356  	}
  1358  	if agentServiceName != "" {
  1359  		svc, err := service.DiscoverService(agentServiceName, common.Conf{})
  1360  		if err != nil {
  1361  			errs = append(errs, fmt.Errorf("cannot remove service %q: %v", agentServiceName, err))
  1362  		} else if err := svc.Remove(); err != nil {
  1363  			errs = append(errs, fmt.Errorf("cannot remove service %q: %v", agentServiceName, err))
  1364  		}
  1365  	}
  1367  	errs = append(errs, a.removeJujudSymlinks()...)
  1369  	// TODO(fwereade): surely this shouldn't be happening here? Once we're
  1370  	// at this point we should expect to be killed in short order; if this
  1371  	// work is remotely important we should be blocking machine death on
  1372  	// its completion.
  1373  	insideContainer := container.RunningInContainer()
  1374  	if insideContainer {
  1375  		// We're running inside LXC, so loop devices may leak. Detach
  1376  		// any loop devices that are backed by files on this machine.
  1377  		//
  1378  		// It is necessary to do this here as well as in container/lxc,
  1379  		// as container/lxc needs to check in the container's rootfs
  1380  		// to see if the loop device is attached to the container; that
  1381  		// will fail if the data-dir is removed first.
  1382  		if err := a.loopDeviceManager.DetachLoopDevices("/", agentConfig.DataDir()); err != nil {
  1383  			errs = append(errs, err)
  1384  		}
  1385  	}
  1387  	if err := mongo.RemoveService(); err != nil {
  1388  		errs = append(errs, errors.Annotate(err, "cannot stop/remove mongo service"))
  1389  	}
  1390  	if err := os.RemoveAll(agentConfig.DataDir()); err != nil {
  1391  		errs = append(errs, err)
  1392  	}
  1393  	if len(errs) == 0 {
  1394  		return nil
  1395  	}
  1396  	return fmt.Errorf("uninstall failed: %v", errs)
  1397  }
  1399  func newConnRunner(conns ...cmdutil.Pinger) worker.Runner {
  1400  	return worker.NewRunner(cmdutil.ConnectionIsFatal(logger, conns...), cmdutil.MoreImportant, worker.RestartDelay)
  1401  }
  1403  type MongoSessioner interface {
  1404  	MongoSession() *mgo.Session
  1405  }
  1407  func newSingularStateRunner(runner worker.Runner, st MongoSessioner, m *state.Machine) (worker.Runner, error) {
  1408  	singularStateConn := singularStateConn{st.MongoSession(), m}
  1409  	singularRunner, err := newSingularRunner(runner, singularStateConn)
  1410  	if err != nil {
  1411  		return nil, errors.Annotate(err, "cannot make singular State Runner")
  1412  	}
  1413  	return singularRunner, err
  1414  }
  1416  // singularStateConn implements singular.Conn on
  1417  // top of a State connection.
  1418  type singularStateConn struct {
  1419  	session *mgo.Session
  1420  	machine *state.Machine
  1421  }
  1423  func (c singularStateConn) IsMaster() (bool, error) {
  1424  	return mongo.IsMaster(c.session, c.machine)
  1425  }
  1427  func (c singularStateConn) Ping() error {
  1428  	return c.session.Ping()
  1429  }
  1431  func metricAPI(st api.Connection) (metricsmanager.MetricsManagerClient, error) {
  1432  	client, err := metricsmanager.NewClient(st)
  1433  	if err != nil {
  1434  		return nil, errors.Trace(err)
  1435  	}
  1436  	return client, nil
  1437  }
  1439  // newDeployContext gives the tests the opportunity to create a deployer.Context
  1440  // that can be used for testing so as to avoid (1) deploying units to the system
  1441  // running the tests and (2) get access to the *State used internally, so that
  1442  // tests can be run without waiting for the 5s watcher refresh time to which we would
  1443  // otherwise be restricted.
  1444  var newDeployContext = func(st *apideployer.State, agentConfig agent.Config) deployer.Context {
  1445  	return deployer.NewSimpleContext(agentConfig, st)
  1446  }