github.com/cloud-green/juju@v0.0.0-20151002100041-a00291338d3d/cmd/jujud/agent/machine.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package agent
     5  
     6  import (
     7  	"fmt"
     8  	"io"
     9  	"net"
    10  	"os"
    11  	"path/filepath"
    12  	"runtime"
    13  	"strconv"
    14  	"sync"
    15  	"time"
    16  
    17  	"github.com/juju/cmd"
    18  	"github.com/juju/errors"
    19  	"github.com/juju/loggo"
    20  	"github.com/juju/names"
    21  	"github.com/juju/replicaset"
    22  	"github.com/juju/utils"
    23  	"github.com/juju/utils/clock"
    24  	"github.com/juju/utils/featureflag"
    25  	"github.com/juju/utils/series"
    26  	"github.com/juju/utils/set"
    27  	"github.com/juju/utils/symlink"
    28  	"github.com/juju/utils/voyeur"
    29  	"gopkg.in/juju/charmrepo.v1"
    30  	"gopkg.in/mgo.v2"
    31  	"gopkg.in/natefinch/lumberjack.v2"
    32  	"launchpad.net/gnuflag"
    33  	"launchpad.net/tomb"
    34  
    35  	"github.com/juju/juju/agent"
    36  	"github.com/juju/juju/api"
    37  	apiagent "github.com/juju/juju/api/agent"
    38  	apideployer "github.com/juju/juju/api/deployer"
    39  	"github.com/juju/juju/api/metricsmanager"
    40  	apiupgrader "github.com/juju/juju/api/upgrader"
    41  	"github.com/juju/juju/apiserver"
    42  	"github.com/juju/juju/apiserver/params"
    43  	"github.com/juju/juju/cert"
    44  	"github.com/juju/juju/cmd/jujud/reboot"
    45  	cmdutil "github.com/juju/juju/cmd/jujud/util"
    46  	"github.com/juju/juju/container"
    47  	"github.com/juju/juju/container/kvm"
    48  	"github.com/juju/juju/container/lxc"
    49  	"github.com/juju/juju/container/lxc/lxcutils"
    50  	"github.com/juju/juju/environs"
    51  	"github.com/juju/juju/environs/config"
    52  	"github.com/juju/juju/feature"
    53  	"github.com/juju/juju/instance"
    54  	jujunames "github.com/juju/juju/juju/names"
    55  	"github.com/juju/juju/juju/paths"
    56  	"github.com/juju/juju/mongo"
    57  	"github.com/juju/juju/network"
    58  	"github.com/juju/juju/provider"
    59  	"github.com/juju/juju/service"
    60  	"github.com/juju/juju/service/common"
    61  	"github.com/juju/juju/state"
    62  	"github.com/juju/juju/state/multiwatcher"
    63  	statestorage "github.com/juju/juju/state/storage"
    64  	"github.com/juju/juju/storage/looputil"
    65  	"github.com/juju/juju/version"
    66  	"github.com/juju/juju/worker"
    67  	"github.com/juju/juju/worker/addresser"
    68  	"github.com/juju/juju/worker/apiaddressupdater"
    69  	"github.com/juju/juju/worker/apicaller"
    70  	"github.com/juju/juju/worker/authenticationworker"
    71  	"github.com/juju/juju/worker/certupdater"
    72  	"github.com/juju/juju/worker/charmrevisionworker"
    73  	"github.com/juju/juju/worker/cleaner"
    74  	"github.com/juju/juju/worker/conv2state"
    75  	"github.com/juju/juju/worker/dblogpruner"
    76  	"github.com/juju/juju/worker/deployer"
    77  	"github.com/juju/juju/worker/diskmanager"
    78  	"github.com/juju/juju/worker/envworkermanager"
    79  	"github.com/juju/juju/worker/firewaller"
    80  	"github.com/juju/juju/worker/gate"
    81  	"github.com/juju/juju/worker/imagemetadataworker"
    82  	"github.com/juju/juju/worker/instancepoller"
    83  	"github.com/juju/juju/worker/localstorage"
    84  	workerlogger "github.com/juju/juju/worker/logger"
    85  	"github.com/juju/juju/worker/logsender"
    86  	"github.com/juju/juju/worker/machiner"
    87  	"github.com/juju/juju/worker/metricworker"
    88  	"github.com/juju/juju/worker/minunitsworker"
    89  	"github.com/juju/juju/worker/networker"
    90  	"github.com/juju/juju/worker/peergrouper"
    91  	"github.com/juju/juju/worker/provisioner"
    92  	"github.com/juju/juju/worker/proxyupdater"
    93  	rebootworker "github.com/juju/juju/worker/reboot"
    94  	"github.com/juju/juju/worker/resumer"
    95  	"github.com/juju/juju/worker/rsyslog"
    96  	"github.com/juju/juju/worker/singular"
    97  	"github.com/juju/juju/worker/statushistorypruner"
    98  	"github.com/juju/juju/worker/storageprovisioner"
    99  	"github.com/juju/juju/worker/terminationworker"
   100  	"github.com/juju/juju/worker/toolsversionchecker"
   101  	"github.com/juju/juju/worker/txnpruner"
   102  	"github.com/juju/juju/worker/upgrader"
   103  )
   104  
   105  const bootstrapMachineId = "0"
   106  
   107  var (
   108  	logger     = loggo.GetLogger("juju.cmd.jujud")
   109  	retryDelay = 3 * time.Second
   110  	JujuRun    = paths.MustSucceed(paths.JujuRun(series.HostSeries()))
   111  
   112  	// The following are defined as variables to allow the tests to
   113  	// intercept calls to the functions.
   114  	useMultipleCPUs          = utils.UseMultipleCPUs
   115  	maybeInitiateMongoServer = peergrouper.MaybeInitiateMongoServer
   116  	ensureMongoAdminUser     = mongo.EnsureAdminUser
   117  	newSingularRunner        = singular.New
   118  	peergrouperNew           = peergrouper.New
   119  	newMachiner              = machiner.NewMachiner
   120  	newNetworker             = networker.NewNetworker
   121  	newFirewaller            = firewaller.NewFirewaller
   122  	newDiskManager           = diskmanager.NewWorker
   123  	newStorageWorker         = storageprovisioner.NewStorageProvisioner
   124  	newCertificateUpdater    = certupdater.NewCertificateUpdater
   125  	newResumer               = resumer.NewResumer
   126  	newInstancePoller        = instancepoller.NewWorker
   127  	newCleaner               = cleaner.NewCleaner
   128  	newAddresser             = addresser.NewWorker
   129  	newMetadataUpdater       = imagemetadataworker.NewWorker
   130  	reportOpenedState        = func(io.Closer) {}
   131  	reportOpenedAPI          = func(io.Closer) {}
   132  	getMetricAPI             = metricAPI
   133  )
   134  
   135  // Variable to override in tests, default is true
   136  var ProductionMongoWriteConcern = true
   137  
   138  func init() {
   139  	stateWorkerDialOpts = mongo.DefaultDialOpts()
   140  	stateWorkerDialOpts.PostDial = func(session *mgo.Session) error {
   141  		safe := mgo.Safe{}
   142  		if ProductionMongoWriteConcern {
   143  			safe.J = true
   144  			_, err := replicaset.CurrentConfig(session)
   145  			if err == nil {
   146  				// set mongo to write-majority (writes only returned after
   147  				// replicated to a majority of replica-set members).
   148  				safe.WMode = "majority"
   149  			}
   150  		}
   151  		session.SetSafe(&safe)
   152  		return nil
   153  	}
   154  }
   155  
   156  // AgentInitializer handles initializing a type for use as a Jujud
   157  // agent.
   158  type AgentInitializer interface {
   159  	AddFlags(*gnuflag.FlagSet)
   160  	CheckArgs([]string) error
   161  }
   162  
   163  // AgentConfigWriter encapsulates disk I/O operations with the agent
   164  // config.
   165  type AgentConfigWriter interface {
   166  	// ReadConfig reads the config for the given tag from disk.
   167  	ReadConfig(tag string) error
   168  	// ChangeConfig executes the given agent.ConfigMutator in a
   169  	// thread-safe context.
   170  	ChangeConfig(agent.ConfigMutator) error
   171  	// CurrentConfig returns a copy of the in-memory agent config.
   172  	CurrentConfig() agent.Config
   173  }
   174  
   175  // NewMachineAgentCmd creates a Command which handles parsing
   176  // command-line arguments and instantiating and running a
   177  // MachineAgent.
   178  func NewMachineAgentCmd(
   179  	ctx *cmd.Context,
   180  	machineAgentFactory func(string) *MachineAgent,
   181  	agentInitializer AgentInitializer,
   182  	configFetcher AgentConfigWriter,
   183  ) cmd.Command {
   184  	return &machineAgentCmd{
   185  		ctx:                 ctx,
   186  		machineAgentFactory: machineAgentFactory,
   187  		agentInitializer:    agentInitializer,
   188  		currentConfig:       configFetcher,
   189  	}
   190  }
   191  
   192  type machineAgentCmd struct {
   193  	cmd.CommandBase
   194  
   195  	// This group of arguments is required.
   196  	agentInitializer    AgentInitializer
   197  	currentConfig       AgentConfigWriter
   198  	machineAgentFactory func(string) *MachineAgent
   199  	ctx                 *cmd.Context
   200  
   201  	// This group is for debugging purposes.
   202  	logToStdErr bool
   203  
   204  	// The following are set via command-line flags.
   205  	machineId string
   206  }
   207  
   208  // Init is called by the cmd system to initialize the structure for
   209  // running.
   210  func (a *machineAgentCmd) Init(args []string) error {
   211  
   212  	if !names.IsValidMachine(a.machineId) {
   213  		return fmt.Errorf("--machine-id option must be set, and expects a non-negative integer")
   214  	}
   215  	if err := a.agentInitializer.CheckArgs(args); err != nil {
   216  		return err
   217  	}
   218  
   219  	// Due to changes in the logging, and needing to care about old
   220  	// environments that have been upgraded, we need to explicitly remove the
   221  	// file writer if one has been added, otherwise we will get duplicate
   222  	// lines of all logging in the log file.
   223  	loggo.RemoveWriter("logfile")
   224  
   225  	if a.logToStdErr {
   226  		return nil
   227  	}
   228  
   229  	err := a.currentConfig.ReadConfig(names.NewMachineTag(a.machineId).String())
   230  	if err != nil {
   231  		return errors.Annotate(err, "cannot read agent configuration")
   232  	}
   233  	agentConfig := a.currentConfig.CurrentConfig()
   234  
   235  	// the context's stderr is set as the loggo writer in github.com/juju/cmd/logging.go
   236  	a.ctx.Stderr = &lumberjack.Logger{
   237  		Filename:   agent.LogFilename(agentConfig),
   238  		MaxSize:    300, // megabytes
   239  		MaxBackups: 2,
   240  	}
   241  
   242  	return nil
   243  }
   244  
   245  // Run instantiates a MachineAgent and runs it.
   246  func (a *machineAgentCmd) Run(c *cmd.Context) error {
   247  	machineAgent := a.machineAgentFactory(a.machineId)
   248  	return machineAgent.Run(c)
   249  }
   250  
   251  // SetFlags adds the requisite flags to run this command.
   252  func (a *machineAgentCmd) SetFlags(f *gnuflag.FlagSet) {
   253  	a.agentInitializer.AddFlags(f)
   254  	f.StringVar(&a.machineId, "machine-id", "", "id of the machine to run")
   255  }
   256  
   257  // Info returns usage information for the command.
   258  func (a *machineAgentCmd) Info() *cmd.Info {
   259  	return &cmd.Info{
   260  		Name:    "machine",
   261  		Purpose: "run a juju machine agent",
   262  	}
   263  }
   264  
   265  // MachineAgentFactoryFn returns a function which instantiates a
   266  // MachineAgent given a machineId.
   267  func MachineAgentFactoryFn(
   268  	agentConfWriter AgentConfigWriter,
   269  	bufferedLogs logsender.LogRecordCh,
   270  	loopDeviceManager looputil.LoopDeviceManager,
   271  ) func(string) *MachineAgent {
   272  	return func(machineId string) *MachineAgent {
   273  		return NewMachineAgent(
   274  			machineId,
   275  			agentConfWriter,
   276  			bufferedLogs,
   277  			NewUpgradeWorkerContext(),
   278  			worker.NewRunner(cmdutil.IsFatal, cmdutil.MoreImportant),
   279  			loopDeviceManager,
   280  		)
   281  	}
   282  }
   283  
   284  // NewMachineAgent instantiates a new MachineAgent.
   285  func NewMachineAgent(
   286  	machineId string,
   287  	agentConfWriter AgentConfigWriter,
   288  	bufferedLogs logsender.LogRecordCh,
   289  	upgradeWorkerContext *upgradeWorkerContext,
   290  	runner worker.Runner,
   291  	loopDeviceManager looputil.LoopDeviceManager,
   292  ) *MachineAgent {
   293  	return &MachineAgent{
   294  		machineId:            machineId,
   295  		AgentConfigWriter:    agentConfWriter,
   296  		bufferedLogs:         bufferedLogs,
   297  		upgradeWorkerContext: upgradeWorkerContext,
   298  		workersStarted:       make(chan struct{}),
   299  		runner:               runner,
   300  		initialAgentUpgradeCheckComplete: make(chan struct{}),
   301  		loopDeviceManager:                loopDeviceManager,
   302  	}
   303  }
   304  
   305  // MachineAgent is responsible for tying together all functionality
   306  // needed to orchestrate a Jujud instance which controls a machine.
   307  type MachineAgent struct {
   308  	AgentConfigWriter
   309  
   310  	tomb                 tomb.Tomb
   311  	machineId            string
   312  	previousAgentVersion version.Number
   313  	runner               worker.Runner
   314  	bufferedLogs         logsender.LogRecordCh
   315  	configChangedVal     voyeur.Value
   316  	upgradeWorkerContext *upgradeWorkerContext
   317  	workersStarted       chan struct{}
   318  
   319  	// XXX(fwereade): these smell strongly of goroutine-unsafeness.
   320  	restoreMode bool
   321  	restoring   bool
   322  
   323  	// Used to signal that the upgrade worker will not
   324  	// reboot the agent on startup because there are no
   325  	// longer any immediately pending agent upgrades.
   326  	// Channel used as a selectable bool (closed means true).
   327  	initialAgentUpgradeCheckComplete chan struct{}
   328  
   329  	mongoInitMutex   sync.Mutex
   330  	mongoInitialized bool
   331  
   332  	loopDeviceManager looputil.LoopDeviceManager
   333  }
   334  
   335  // IsRestorePreparing returns bool representing if we are in restore mode
   336  // but not running restore.
   337  func (a *MachineAgent) IsRestorePreparing() bool {
   338  	return a.restoreMode && !a.restoring
   339  }
   340  
   341  // IsRestoreRunning returns bool representing if we are in restore mode
   342  // and running the actual restore process.
   343  func (a *MachineAgent) IsRestoreRunning() bool {
   344  	return a.restoring
   345  }
   346  
   347  func (a *MachineAgent) isAgentUpgradePending() bool {
   348  	select {
   349  	case <-a.initialAgentUpgradeCheckComplete:
   350  		return false
   351  	default:
   352  		return true
   353  	}
   354  }
   355  
   356  // Wait waits for the machine agent to finish.
   357  func (a *MachineAgent) Wait() error {
   358  	return a.tomb.Wait()
   359  }
   360  
   361  // Stop stops the machine agent.
   362  func (a *MachineAgent) Stop() error {
   363  	a.runner.Kill()
   364  	return a.tomb.Wait()
   365  }
   366  
   367  // Dying returns the channel that can be used to see if the machine
   368  // agent is terminating.
   369  func (a *MachineAgent) Dying() <-chan struct{} {
   370  	return a.tomb.Dying()
   371  }
   372  
   373  // upgradeCertificateDNSNames ensure that the state server certificate
   374  // recorded in the agent config and also mongo server.pem contains the
   375  // DNSNames entires required by Juju/
   376  func (a *MachineAgent) upgradeCertificateDNSNames() error {
   377  	agentConfig := a.CurrentConfig()
   378  	si, ok := agentConfig.StateServingInfo()
   379  	if !ok || si.CAPrivateKey == "" {
   380  		// No certificate information exists yet, nothing to do.
   381  		return nil
   382  	}
   383  	// Parse the current certificate to get the current dns names.
   384  	serverCert, err := cert.ParseCert(si.Cert)
   385  	if err != nil {
   386  		return err
   387  	}
   388  	update := false
   389  	dnsNames := set.NewStrings(serverCert.DNSNames...)
   390  	requiredDNSNames := []string{"local", "juju-apiserver", "juju-mongodb"}
   391  	for _, dnsName := range requiredDNSNames {
   392  		if dnsNames.Contains(dnsName) {
   393  			continue
   394  		}
   395  		dnsNames.Add(dnsName)
   396  		update = true
   397  	}
   398  	if !update {
   399  		return nil
   400  	}
   401  	// Write a new certificate to the mongo pem and agent config files.
   402  	si.Cert, si.PrivateKey, err = cert.NewDefaultServer(agentConfig.CACert(), si.CAPrivateKey, dnsNames.Values())
   403  	if err != nil {
   404  		return err
   405  	}
   406  	if err := mongo.UpdateSSLKey(agentConfig.DataDir(), si.Cert, si.PrivateKey); err != nil {
   407  		return err
   408  	}
   409  	return a.AgentConfigWriter.ChangeConfig(func(config agent.ConfigSetter) error {
   410  		config.SetStateServingInfo(si)
   411  		return nil
   412  	})
   413  }
   414  
   415  // Run runs a machine agent.
   416  func (a *MachineAgent) Run(*cmd.Context) error {
   417  
   418  	defer a.tomb.Done()
   419  	if err := a.ReadConfig(a.Tag().String()); err != nil {
   420  		return fmt.Errorf("cannot read agent configuration: %v", err)
   421  	}
   422  
   423  	logger.Infof("machine agent %v start (%s [%s])", a.Tag(), version.Current, runtime.Compiler)
   424  	if flags := featureflag.String(); flags != "" {
   425  		logger.Warningf("developer feature flags enabled: %s", flags)
   426  	}
   427  
   428  	// Before doing anything else, we need to make sure the certificate generated for
   429  	// use by mongo to validate state server connections is correct. This needs to be done
   430  	// before any possible restart of the mongo service.
   431  	// See bug http://pad.lv/1434680
   432  	if err := a.upgradeCertificateDNSNames(); err != nil {
   433  		return errors.Annotate(err, "error upgrading server certificate")
   434  	}
   435  
   436  	agentConfig := a.CurrentConfig()
   437  
   438  	if err := a.upgradeWorkerContext.InitializeUsingAgent(a); err != nil {
   439  		return errors.Annotate(err, "error during upgradeWorkerContext initialisation")
   440  	}
   441  	a.configChangedVal.Set(struct{}{})
   442  	a.previousAgentVersion = agentConfig.UpgradedToVersion()
   443  
   444  	network.InitializeFromConfig(agentConfig)
   445  	charmrepo.CacheDir = filepath.Join(agentConfig.DataDir(), "charmcache")
   446  	if err := a.createJujuRun(agentConfig.DataDir()); err != nil {
   447  		return fmt.Errorf("cannot create juju run symlink: %v", err)
   448  	}
   449  	a.runner.StartWorker("api", a.APIWorker)
   450  	a.runner.StartWorker("statestarter", a.newStateStarterWorker)
   451  	a.runner.StartWorker("termination", func() (worker.Worker, error) {
   452  		return terminationworker.NewWorker(), nil
   453  	})
   454  
   455  	// At this point, all workers will have been configured to start
   456  	close(a.workersStarted)
   457  	err := a.runner.Wait()
   458  	switch err {
   459  	case worker.ErrTerminateAgent:
   460  		err = a.uninstallAgent(agentConfig)
   461  	case worker.ErrRebootMachine:
   462  		logger.Infof("Caught reboot error")
   463  		err = a.executeRebootOrShutdown(params.ShouldReboot)
   464  	case worker.ErrShutdownMachine:
   465  		logger.Infof("Caught shutdown error")
   466  		err = a.executeRebootOrShutdown(params.ShouldShutdown)
   467  	}
   468  	err = cmdutil.AgentDone(logger, err)
   469  	a.tomb.Kill(err)
   470  	return err
   471  }
   472  
   473  func (a *MachineAgent) executeRebootOrShutdown(action params.RebootAction) error {
   474  	agentCfg := a.CurrentConfig()
   475  	// At this stage, all API connections would have been closed
   476  	// We need to reopen the API to clear the reboot flag after
   477  	// scheduling the reboot. It may be cleaner to do this in the reboot
   478  	// worker, before returning the ErrRebootMachine.
   479  	st, _, err := apicaller.OpenAPIState(a)
   480  	if err != nil {
   481  		logger.Infof("Reboot: Error connecting to state")
   482  		return errors.Trace(err)
   483  	}
   484  	// block until all units/containers are ready, and reboot/shutdown
   485  	finalize, err := reboot.NewRebootWaiter(st, agentCfg)
   486  	if err != nil {
   487  		return errors.Trace(err)
   488  	}
   489  
   490  	logger.Infof("Reboot: Executing reboot")
   491  	err = finalize.ExecuteReboot(action)
   492  	if err != nil {
   493  		logger.Infof("Reboot: Error executing reboot: %v", err)
   494  		return errors.Trace(err)
   495  	}
   496  	// On windows, the shutdown command is asynchronous. We return ErrRebootMachine
   497  	// so the agent will simply exit without error pending reboot/shutdown.
   498  	return worker.ErrRebootMachine
   499  }
   500  
   501  func (a *MachineAgent) ChangeConfig(mutate agent.ConfigMutator) error {
   502  	err := a.AgentConfigWriter.ChangeConfig(mutate)
   503  	a.configChangedVal.Set(struct{}{})
   504  	if err != nil {
   505  		return errors.Trace(err)
   506  	}
   507  	return nil
   508  }
   509  
   510  // PrepareRestore will flag the agent to allow only a limited set
   511  // of commands defined in
   512  // "github.com/juju/juju/apiserver".allowedMethodsAboutToRestore
   513  // the most noteworthy is:
   514  // Backups.Restore: this will ensure that we can do all the file movements
   515  // required for restore and no one will do changes while we do that.
   516  // it will return error if the machine is already in this state.
   517  func (a *MachineAgent) PrepareRestore() error {
   518  	if a.restoreMode {
   519  		return errors.Errorf("already in restore mode")
   520  	}
   521  	a.restoreMode = true
   522  	return nil
   523  }
   524  
   525  // BeginRestore will flag the agent to disallow all commands since
   526  // restore should be running and therefore making changes that
   527  // would override anything done.
   528  func (a *MachineAgent) BeginRestore() error {
   529  	switch {
   530  	case !a.restoreMode:
   531  		return errors.Errorf("not in restore mode, cannot begin restoration")
   532  	case a.restoring:
   533  		return errors.Errorf("already restoring")
   534  	}
   535  	a.restoring = true
   536  	return nil
   537  }
   538  
   539  // EndRestore will flag the agent to allow all commands
   540  // This being invoked means that restore process failed
   541  // since success restarts the agent.
   542  func (a *MachineAgent) EndRestore() {
   543  	a.restoreMode = false
   544  	a.restoring = false
   545  }
   546  
   547  // newRestoreStateWatcherWorker will return a worker or err if there
   548  // is a failure, the worker takes care of watching the state of
   549  // restoreInfo doc and put the agent in the different restore modes.
   550  func (a *MachineAgent) newRestoreStateWatcherWorker(st *state.State) (worker.Worker, error) {
   551  	rWorker := func(stopch <-chan struct{}) error {
   552  		return a.restoreStateWatcher(st, stopch)
   553  	}
   554  	return worker.NewSimpleWorker(rWorker), nil
   555  }
   556  
   557  // restoreChanged will be called whenever restoreInfo doc changes signaling a new
   558  // step in the restore process.
   559  func (a *MachineAgent) restoreChanged(st *state.State) error {
   560  	rinfo, err := st.RestoreInfoSetter()
   561  	if err != nil {
   562  		return errors.Annotate(err, "cannot read restore state")
   563  	}
   564  	switch rinfo.Status() {
   565  	case state.RestorePending:
   566  		a.PrepareRestore()
   567  	case state.RestoreInProgress:
   568  		a.BeginRestore()
   569  	case state.RestoreFailed:
   570  		a.EndRestore()
   571  	}
   572  	return nil
   573  }
   574  
   575  // restoreStateWatcher watches for restoreInfo looking for changes in the restore process.
   576  func (a *MachineAgent) restoreStateWatcher(st *state.State, stopch <-chan struct{}) error {
   577  	restoreWatch := st.WatchRestoreInfoChanges()
   578  	defer func() {
   579  		restoreWatch.Kill()
   580  		restoreWatch.Wait()
   581  	}()
   582  
   583  	for {
   584  		select {
   585  		case <-restoreWatch.Changes():
   586  			if err := a.restoreChanged(st); err != nil {
   587  				return err
   588  			}
   589  		case <-stopch:
   590  			return nil
   591  		}
   592  	}
   593  }
   594  
   595  // newStateStarterWorker wraps stateStarter in a simple worker for use in
   596  // a.runner.StartWorker.
   597  func (a *MachineAgent) newStateStarterWorker() (worker.Worker, error) {
   598  	return worker.NewSimpleWorker(a.stateStarter), nil
   599  }
   600  
   601  // stateStarter watches for changes to the agent configuration, and
   602  // starts or stops the state worker as appropriate. We watch the agent
   603  // configuration because the agent configuration has all the details
   604  // that we need to start a state server, whether they have been cached
   605  // or read from the state.
   606  //
   607  // It will stop working as soon as stopch is closed.
   608  func (a *MachineAgent) stateStarter(stopch <-chan struct{}) error {
   609  	confWatch := a.configChangedVal.Watch()
   610  	defer confWatch.Close()
   611  	watchCh := make(chan struct{})
   612  	go func() {
   613  		for confWatch.Next() {
   614  			watchCh <- struct{}{}
   615  		}
   616  	}()
   617  	for {
   618  		select {
   619  		case <-watchCh:
   620  			agentConfig := a.CurrentConfig()
   621  
   622  			// N.B. StartWorker and StopWorker are idempotent.
   623  			_, ok := agentConfig.StateServingInfo()
   624  			if ok {
   625  				a.runner.StartWorker("state", func() (worker.Worker, error) {
   626  					return a.StateWorker()
   627  				})
   628  			} else {
   629  				a.runner.StopWorker("state")
   630  			}
   631  		case <-stopch:
   632  			return nil
   633  		}
   634  	}
   635  }
   636  
   637  // APIWorker returns a Worker that connects to the API and starts any
   638  // workers that need an API connection.
   639  func (a *MachineAgent) APIWorker() (_ worker.Worker, err error) {
   640  	st, entity, err := apicaller.OpenAPIState(a)
   641  	if err != nil {
   642  		return nil, err
   643  	}
   644  	reportOpenedAPI(st)
   645  
   646  	defer func() {
   647  		// TODO(fwereade): this is not properly tested. Old tests were evil
   648  		// (dependent on injecting an error in a patched-out upgrader API
   649  		// that shouldn't even be used at this level)... so I just deleted
   650  		// them. Not a major worry: this whole method will become redundant
   651  		// when we switch to the dependency engine (and specifically use
   652  		// worker/apicaller to connect).
   653  		if err != nil {
   654  			if err := st.Close(); err != nil {
   655  				logger.Errorf("while closing API: %v", err)
   656  			}
   657  		}
   658  	}()
   659  
   660  	agentConfig := a.CurrentConfig()
   661  	for _, job := range entity.Jobs() {
   662  		if job.NeedsState() {
   663  			info, err := st.Agent().StateServingInfo()
   664  			if err != nil {
   665  				return nil, fmt.Errorf("cannot get state serving info: %v", err)
   666  			}
   667  			err = a.ChangeConfig(func(config agent.ConfigSetter) error {
   668  				config.SetStateServingInfo(info)
   669  				return nil
   670  			})
   671  			if err != nil {
   672  				return nil, err
   673  			}
   674  			agentConfig = a.CurrentConfig()
   675  			break
   676  		}
   677  	}
   678  
   679  	runner := newConnRunner(st)
   680  
   681  	// Run the agent upgrader and the upgrade-steps worker without waiting for
   682  	// the upgrade steps to complete.
   683  	runner.StartWorker("upgrader", a.agentUpgraderWorkerStarter(st.Upgrader(), agentConfig))
   684  	runner.StartWorker("upgrade-steps", a.upgradeStepsWorkerStarter(st, entity.Jobs()))
   685  
   686  	// All other workers must wait for the upgrade steps to complete before starting.
   687  	a.startWorkerAfterUpgrade(runner, "api-post-upgrade", func() (worker.Worker, error) {
   688  		return a.postUpgradeAPIWorker(st, agentConfig, entity)
   689  	})
   690  
   691  	return cmdutil.NewCloseWorker(logger, runner, st), nil // Note: a worker.Runner is itself a worker.Worker.
   692  }
   693  
   694  func (a *MachineAgent) postUpgradeAPIWorker(
   695  	st api.Connection,
   696  	agentConfig agent.Config,
   697  	entity *apiagent.Entity,
   698  ) (worker.Worker, error) {
   699  
   700  	var isEnvironManager bool
   701  	for _, job := range entity.Jobs() {
   702  		if job == multiwatcher.JobManageEnviron {
   703  			isEnvironManager = true
   704  			break
   705  		}
   706  	}
   707  
   708  	runner := newConnRunner(st)
   709  
   710  	// TODO(fwereade): this is *still* a hideous layering violation, but at least
   711  	// it's confined to jujud rather than extending into the worker itself.
   712  	// Start this worker first to try and get proxy settings in place
   713  	// before we do anything else.
   714  	writeSystemFiles := shouldWriteProxyFiles(agentConfig)
   715  	runner.StartWorker("proxyupdater", func() (worker.Worker, error) {
   716  		return proxyupdater.New(st.Environment(), writeSystemFiles), nil
   717  	})
   718  
   719  	if isEnvironManager {
   720  		runner.StartWorker("resumer", func() (worker.Worker, error) {
   721  			// The action of resumer is so subtle that it is not tested,
   722  			// because we can't figure out how to do so without
   723  			// brutalising the transaction log.
   724  			return newResumer(st.Resumer()), nil
   725  		})
   726  	}
   727  
   728  	if feature.IsDbLogEnabled() {
   729  		runner.StartWorker("logsender", func() (worker.Worker, error) {
   730  			return logsender.New(a.bufferedLogs, gate.AlreadyUnlocked{}, a), nil
   731  		})
   732  	}
   733  
   734  	envConfig, err := st.Environment().EnvironConfig()
   735  	if err != nil {
   736  		return nil, fmt.Errorf("cannot read environment config: %v", err)
   737  	}
   738  	ignoreMachineAddresses, _ := envConfig.IgnoreMachineAddresses()
   739  	if ignoreMachineAddresses {
   740  		logger.Infof("machine addresses not used, only addresses from provider")
   741  	}
   742  	runner.StartWorker("machiner", func() (worker.Worker, error) {
   743  		accessor := machiner.APIMachineAccessor{st.Machiner()}
   744  		return newMachiner(accessor, agentConfig, ignoreMachineAddresses), nil
   745  	})
   746  	runner.StartWorker("reboot", func() (worker.Worker, error) {
   747  		reboot, err := st.Reboot()
   748  		if err != nil {
   749  			return nil, errors.Trace(err)
   750  		}
   751  		lock, err := cmdutil.HookExecutionLock(cmdutil.DataDir)
   752  		if err != nil {
   753  			return nil, errors.Trace(err)
   754  		}
   755  		return rebootworker.NewReboot(reboot, agentConfig, lock)
   756  	})
   757  	runner.StartWorker("apiaddressupdater", func() (worker.Worker, error) {
   758  		addressUpdater := agent.APIHostPortsSetter{a}
   759  		return apiaddressupdater.NewAPIAddressUpdater(st.Machiner(), addressUpdater), nil
   760  	})
   761  	runner.StartWorker("logger", func() (worker.Worker, error) {
   762  		return workerlogger.NewLogger(st.Logger(), agentConfig), nil
   763  	})
   764  
   765  	if !featureflag.Enabled(feature.DisableRsyslog) {
   766  		rsyslogMode := rsyslog.RsyslogModeForwarding
   767  		if isEnvironManager {
   768  			rsyslogMode = rsyslog.RsyslogModeAccumulate
   769  		}
   770  
   771  		runner.StartWorker("rsyslog", func() (worker.Worker, error) {
   772  			return cmdutil.NewRsyslogConfigWorker(st.Rsyslog(), agentConfig, rsyslogMode)
   773  		})
   774  	}
   775  
   776  	if !isEnvironManager {
   777  		runner.StartWorker("stateconverter", func() (worker.Worker, error) {
   778  			return worker.NewNotifyWorker(conv2state.New(st.Machiner(), a)), nil
   779  		})
   780  	}
   781  
   782  	runner.StartWorker("diskmanager", func() (worker.Worker, error) {
   783  		api, err := st.DiskManager()
   784  		if err != nil {
   785  			return nil, errors.Trace(err)
   786  		}
   787  		return newDiskManager(diskmanager.DefaultListBlockDevices, api), nil
   788  	})
   789  	runner.StartWorker("storageprovisioner-machine", func() (worker.Worker, error) {
   790  		scope := agentConfig.Tag()
   791  		api := st.StorageProvisioner(scope)
   792  		storageDir := filepath.Join(agentConfig.DataDir(), "storage")
   793  		return newStorageWorker(
   794  			scope, storageDir, api, api, api, api, api, api,
   795  			clock.WallClock,
   796  		), nil
   797  	})
   798  
   799  	if isEnvironManager {
   800  		// Start worker that stores missing published image metadata in state.
   801  		runner.StartWorker("imagemetadata", func() (worker.Worker, error) {
   802  			return newMetadataUpdater(st.MetadataUpdater()), nil
   803  		})
   804  	}
   805  
   806  	// Check if the network management is disabled.
   807  	disableNetworkManagement, _ := envConfig.DisableNetworkManagement()
   808  	if disableNetworkManagement {
   809  		logger.Infof("network management is disabled")
   810  	}
   811  
   812  	// Start networker depending on configuration and job.
   813  	intrusiveMode := false
   814  	for _, job := range entity.Jobs() {
   815  		if job == multiwatcher.JobManageNetworking {
   816  			intrusiveMode = true
   817  			break
   818  		}
   819  	}
   820  	intrusiveMode = intrusiveMode && !disableNetworkManagement
   821  	runner.StartWorker("networker", func() (worker.Worker, error) {
   822  		return newNetworker(st.Networker(), agentConfig, intrusiveMode, networker.DefaultConfigBaseDir)
   823  	})
   824  
   825  	// If not a local provider bootstrap machine, start the worker to
   826  	// manage SSH keys.
   827  	providerType := agentConfig.Value(agent.ProviderType)
   828  	if providerType != provider.Local || a.machineId != bootstrapMachineId {
   829  		runner.StartWorker("authenticationworker", func() (worker.Worker, error) {
   830  			return authenticationworker.NewWorker(st.KeyUpdater(), agentConfig), nil
   831  		})
   832  	}
   833  
   834  	// Perform the operations needed to set up hosting for containers.
   835  	if err := a.setupContainerSupport(runner, st, entity, agentConfig); err != nil {
   836  		cause := errors.Cause(err)
   837  		if params.IsCodeDead(cause) || cause == worker.ErrTerminateAgent {
   838  			return nil, worker.ErrTerminateAgent
   839  		}
   840  		return nil, fmt.Errorf("setting up container support: %v", err)
   841  	}
   842  	for _, job := range entity.Jobs() {
   843  		switch job {
   844  		case multiwatcher.JobHostUnits:
   845  			runner.StartWorker("deployer", func() (worker.Worker, error) {
   846  				apiDeployer := st.Deployer()
   847  				context := newDeployContext(apiDeployer, agentConfig)
   848  				return deployer.NewDeployer(apiDeployer, context), nil
   849  			})
   850  		case multiwatcher.JobManageEnviron:
   851  			runner.StartWorker("identity-file-writer", func() (worker.Worker, error) {
   852  				inner := func(<-chan struct{}) error {
   853  					agentConfig := a.CurrentConfig()
   854  					return agent.WriteSystemIdentityFile(agentConfig)
   855  				}
   856  				return worker.NewSimpleWorker(inner), nil
   857  			})
   858  			runner.StartWorker("toolsversionchecker", func() (worker.Worker, error) {
   859  				// 4 times a day seems a decent enough amount of checks.
   860  				checkerParams := toolsversionchecker.VersionCheckerParams{
   861  					CheckInterval: time.Hour * 6,
   862  				}
   863  				return toolsversionchecker.New(st.Environment(), &checkerParams), nil
   864  			})
   865  
   866  		case multiwatcher.JobManageStateDeprecated:
   867  			// Legacy environments may set this, but we ignore it.
   868  		default:
   869  			// TODO(dimitern): Once all workers moved over to using
   870  			// the API, report "unknown job type" here.
   871  		}
   872  	}
   873  
   874  	return cmdutil.NewCloseWorker(logger, runner, st), nil // Note: a worker.Runner is itself a worker.Worker.
   875  }
   876  
   877  // Restart restarts the agent's service.
   878  func (a *MachineAgent) Restart() error {
   879  	name := a.CurrentConfig().Value(agent.AgentServiceName)
   880  	return service.Restart(name)
   881  }
   882  
   883  func (a *MachineAgent) upgradeStepsWorkerStarter(
   884  	st api.Connection,
   885  	jobs []multiwatcher.MachineJob,
   886  ) func() (worker.Worker, error) {
   887  	return func() (worker.Worker, error) {
   888  		return a.upgradeWorkerContext.Worker(a, st, jobs), nil
   889  	}
   890  }
   891  
   892  func (a *MachineAgent) agentUpgraderWorkerStarter(
   893  	st *apiupgrader.State,
   894  	agentConfig agent.Config,
   895  ) func() (worker.Worker, error) {
   896  	return func() (worker.Worker, error) {
   897  		return upgrader.NewAgentUpgrader(
   898  			st,
   899  			agentConfig,
   900  			a.previousAgentVersion,
   901  			a.upgradeWorkerContext.IsUpgradeRunning,
   902  			a.initialAgentUpgradeCheckComplete,
   903  		), nil
   904  	}
   905  }
   906  
   907  // shouldWriteProxyFiles returns true, unless the supplied conf identifies the
   908  // machine agent running directly on the host system in a local environment.
   909  var shouldWriteProxyFiles = func(conf agent.Config) bool {
   910  	if conf.Value(agent.ProviderType) != provider.Local {
   911  		return true
   912  	}
   913  	return conf.Tag() != names.NewMachineTag(bootstrapMachineId)
   914  }
   915  
   916  // setupContainerSupport determines what containers can be run on this machine and
   917  // initialises suitable infrastructure to support such containers.
   918  func (a *MachineAgent) setupContainerSupport(runner worker.Runner, st api.Connection, entity *apiagent.Entity, agentConfig agent.Config) error {
   919  	var supportedContainers []instance.ContainerType
   920  	// LXC containers are only supported on bare metal and fully virtualized linux systems
   921  	// Nested LXC containers and Windows machines cannot run LXC containers
   922  	supportsLXC, err := lxc.IsLXCSupported()
   923  	if err != nil {
   924  		logger.Warningf("no lxc containers possible: %v", err)
   925  	}
   926  	if err == nil && supportsLXC {
   927  		supportedContainers = append(supportedContainers, instance.LXC)
   928  	}
   929  
   930  	supportsKvm, err := kvm.IsKVMSupported()
   931  	if err != nil {
   932  		logger.Warningf("determining kvm support: %v\nno kvm containers possible", err)
   933  	}
   934  	if err == nil && supportsKvm {
   935  		supportedContainers = append(supportedContainers, instance.KVM)
   936  	}
   937  	return a.updateSupportedContainers(runner, st, entity.Tag(), supportedContainers, agentConfig)
   938  }
   939  
   940  // updateSupportedContainers records in state that a machine can run the specified containers.
   941  // It starts a watcher and when a container of a given type is first added to the machine,
   942  // the watcher is killed, the machine is set up to be able to start containers of the given type,
   943  // and a suitable provisioner is started.
   944  func (a *MachineAgent) updateSupportedContainers(
   945  	runner worker.Runner,
   946  	st api.Connection,
   947  	machineTag string,
   948  	containers []instance.ContainerType,
   949  	agentConfig agent.Config,
   950  ) error {
   951  	pr := st.Provisioner()
   952  	tag, err := names.ParseMachineTag(machineTag)
   953  	if err != nil {
   954  		return err
   955  	}
   956  	machine, err := pr.Machine(tag)
   957  	if errors.IsNotFound(err) || err == nil && machine.Life() == params.Dead {
   958  		return worker.ErrTerminateAgent
   959  	}
   960  	if err != nil {
   961  		return errors.Annotatef(err, "cannot load machine %s from state", tag)
   962  	}
   963  	if len(containers) == 0 {
   964  		if err := machine.SupportsNoContainers(); err != nil {
   965  			return errors.Annotatef(err, "clearing supported containers for %s", tag)
   966  		}
   967  		return nil
   968  	}
   969  	if err := machine.SetSupportedContainers(containers...); err != nil {
   970  		return errors.Annotatef(err, "setting supported containers for %s", tag)
   971  	}
   972  	initLock, err := cmdutil.HookExecutionLock(agentConfig.DataDir())
   973  	if err != nil {
   974  		return err
   975  	}
   976  	// Start the watcher to fire when a container is first requested on the machine.
   977  	envUUID, err := st.EnvironTag()
   978  	if err != nil {
   979  		return err
   980  	}
   981  	watcherName := fmt.Sprintf("%s-container-watcher", machine.Id())
   982  	// There may not be a CA certificate private key available, and without
   983  	// it we can't ensure that other Juju nodes can connect securely, so only
   984  	// use an image URL getter if there's a private key.
   985  	var imageURLGetter container.ImageURLGetter
   986  	if agentConfig.Value(agent.AllowsSecureConnection) == "true" {
   987  		imageURLGetter = container.NewImageURLGetter(st.Addr(), envUUID.Id(), []byte(agentConfig.CACert()))
   988  	}
   989  	params := provisioner.ContainerSetupParams{
   990  		Runner:              runner,
   991  		WorkerName:          watcherName,
   992  		SupportedContainers: containers,
   993  		ImageURLGetter:      imageURLGetter,
   994  		Machine:             machine,
   995  		Provisioner:         pr,
   996  		Config:              agentConfig,
   997  		InitLock:            initLock,
   998  	}
   999  	handler := provisioner.NewContainerSetupHandler(params)
  1000  	a.startWorkerAfterUpgrade(runner, watcherName, func() (worker.Worker, error) {
  1001  		return worker.NewStringsWorker(handler), nil
  1002  	})
  1003  	return nil
  1004  }
  1005  
  1006  // StateWorker returns a worker running all the workers that require
  1007  // a *state.State connection.
  1008  func (a *MachineAgent) StateWorker() (worker.Worker, error) {
  1009  	agentConfig := a.CurrentConfig()
  1010  
  1011  	// Start MongoDB server and dial.
  1012  	if err := a.ensureMongoServer(agentConfig); err != nil {
  1013  		return nil, err
  1014  	}
  1015  	st, m, err := openState(agentConfig, stateWorkerDialOpts)
  1016  	if err != nil {
  1017  		return nil, err
  1018  	}
  1019  	reportOpenedState(st)
  1020  
  1021  	stor := statestorage.NewStorage(st.EnvironUUID(), st.MongoSession())
  1022  	registerSimplestreamsDataSource(stor)
  1023  
  1024  	runner := newConnRunner(st)
  1025  	singularRunner, err := newSingularStateRunner(runner, st, m)
  1026  	if err != nil {
  1027  		return nil, errors.Trace(err)
  1028  	}
  1029  
  1030  	// Take advantage of special knowledge here in that we will only ever want
  1031  	// the storage provider on one machine, and that is the "bootstrap" node.
  1032  	providerType := agentConfig.Value(agent.ProviderType)
  1033  	if (providerType == provider.Local || provider.IsManual(providerType)) && m.Id() == bootstrapMachineId {
  1034  		a.startWorkerAfterUpgrade(runner, "local-storage", func() (worker.Worker, error) {
  1035  			// TODO(axw) 2013-09-24 bug #1229507
  1036  			// Make another job to enable storage.
  1037  			// There's nothing special about this.
  1038  			return localstorage.NewWorker(agentConfig), nil
  1039  		})
  1040  	}
  1041  	for _, job := range m.Jobs() {
  1042  		switch job {
  1043  		case state.JobHostUnits:
  1044  			// Implemented in APIWorker.
  1045  		case state.JobManageEnviron:
  1046  			useMultipleCPUs()
  1047  			a.startWorkerAfterUpgrade(runner, "env worker manager", func() (worker.Worker, error) {
  1048  				return envworkermanager.NewEnvWorkerManager(st, a.startEnvWorkers), nil
  1049  			})
  1050  			a.startWorkerAfterUpgrade(runner, "peergrouper", func() (worker.Worker, error) {
  1051  				return peergrouperNew(st)
  1052  			})
  1053  			a.startWorkerAfterUpgrade(runner, "restore", func() (worker.Worker, error) {
  1054  				return a.newRestoreStateWatcherWorker(st)
  1055  			})
  1056  
  1057  			// certChangedChan is shared by multiple workers it's up
  1058  			// to the agent to close it rather than any one of the
  1059  			// workers.
  1060  			//
  1061  			// TODO(ericsnow) For now we simply do not close the channel.
  1062  			certChangedChan := make(chan params.StateServingInfo, 1)
  1063  			runner.StartWorker("apiserver", a.apiserverWorkerStarter(st, certChangedChan))
  1064  			var stateServingSetter certupdater.StateServingInfoSetter = func(info params.StateServingInfo, done <-chan struct{}) error {
  1065  				return a.ChangeConfig(func(config agent.ConfigSetter) error {
  1066  					config.SetStateServingInfo(info)
  1067  					logger.Infof("update apiserver worker with new certificate")
  1068  					select {
  1069  					case certChangedChan <- info:
  1070  						return nil
  1071  					case <-done:
  1072  						return nil
  1073  					}
  1074  				})
  1075  			}
  1076  			a.startWorkerAfterUpgrade(runner, "certupdater", func() (worker.Worker, error) {
  1077  				return newCertificateUpdater(m, agentConfig, st, st, stateServingSetter), nil
  1078  			})
  1079  
  1080  			if feature.IsDbLogEnabled() {
  1081  				a.startWorkerAfterUpgrade(singularRunner, "dblogpruner", func() (worker.Worker, error) {
  1082  					return dblogpruner.New(st, dblogpruner.NewLogPruneParams()), nil
  1083  				})
  1084  			}
  1085  			a.startWorkerAfterUpgrade(singularRunner, "statushistorypruner", func() (worker.Worker, error) {
  1086  				return statushistorypruner.New(st, statushistorypruner.NewHistoryPrunerParams()), nil
  1087  			})
  1088  
  1089  			a.startWorkerAfterUpgrade(singularRunner, "txnpruner", func() (worker.Worker, error) {
  1090  				return txnpruner.New(st, time.Hour*2), nil
  1091  			})
  1092  
  1093  		case state.JobManageStateDeprecated:
  1094  			// Legacy environments may set this, but we ignore it.
  1095  		default:
  1096  			logger.Warningf("ignoring unknown job %q", job)
  1097  		}
  1098  	}
  1099  	return cmdutil.NewCloseWorker(logger, runner, stateWorkerCloser{st}), nil
  1100  }
  1101  
  1102  type stateWorkerCloser struct {
  1103  	stateCloser io.Closer
  1104  }
  1105  
  1106  func (s stateWorkerCloser) Close() error {
  1107  	// This state-dependent data source will be useless once state is closed -
  1108  	// un-register it before closing state.
  1109  	unregisterSimplestreamsDataSource()
  1110  	return s.stateCloser.Close()
  1111  }
  1112  
  1113  // startEnvWorkers starts state server workers that need to run per
  1114  // environment.
  1115  func (a *MachineAgent) startEnvWorkers(
  1116  	ssSt envworkermanager.InitialState,
  1117  	st *state.State,
  1118  ) (_ worker.Worker, err error) {
  1119  	envUUID := st.EnvironUUID()
  1120  	defer errors.DeferredAnnotatef(&err, "failed to start workers for env %s", envUUID)
  1121  	logger.Infof("starting workers for env %s", envUUID)
  1122  
  1123  	// Establish API connection for this environment.
  1124  	agentConfig := a.CurrentConfig()
  1125  	apiInfo := agentConfig.APIInfo()
  1126  	apiInfo.EnvironTag = st.EnvironTag()
  1127  	apiSt, err := apicaller.OpenAPIStateUsingInfo(apiInfo, agentConfig.OldPassword())
  1128  	if err != nil {
  1129  		return nil, errors.Trace(err)
  1130  	}
  1131  
  1132  	// Create a runner for workers specific to this
  1133  	// environment. Either the State or API connection failing will be
  1134  	// considered fatal, killing the runner and all its workers.
  1135  	runner := newConnRunner(st, apiSt)
  1136  	defer func() {
  1137  		if err != nil && runner != nil {
  1138  			runner.Kill()
  1139  			runner.Wait()
  1140  		}
  1141  	}()
  1142  	// Close the API connection when the runner for this environment dies.
  1143  	go func() {
  1144  		runner.Wait()
  1145  		err := apiSt.Close()
  1146  		if err != nil {
  1147  			logger.Errorf("failed to close API connection for env %s: %v", envUUID, err)
  1148  		}
  1149  	}()
  1150  
  1151  	// Create a singular runner for this environment.
  1152  	machine, err := ssSt.Machine(a.machineId)
  1153  	if err != nil {
  1154  		return nil, errors.Trace(err)
  1155  	}
  1156  	singularRunner, err := newSingularStateRunner(runner, ssSt, machine)
  1157  	if err != nil {
  1158  		return nil, errors.Trace(err)
  1159  	}
  1160  	defer func() {
  1161  		if err != nil && singularRunner != nil {
  1162  			singularRunner.Kill()
  1163  			singularRunner.Wait()
  1164  		}
  1165  	}()
  1166  
  1167  	// Start workers that depend on a *state.State.
  1168  	// TODO(fwereade): 2015-04-21 THIS SHALL NOT PASS
  1169  	// Seriously, these should all be using the API.
  1170  	singularRunner.StartWorker("minunitsworker", func() (worker.Worker, error) {
  1171  		return minunitsworker.NewMinUnitsWorker(st), nil
  1172  	})
  1173  
  1174  	// Start workers that use an API connection.
  1175  	singularRunner.StartWorker("environ-provisioner", func() (worker.Worker, error) {
  1176  		return provisioner.NewEnvironProvisioner(apiSt.Provisioner(), agentConfig), nil
  1177  	})
  1178  	singularRunner.StartWorker("environ-storageprovisioner", func() (worker.Worker, error) {
  1179  		scope := st.EnvironTag()
  1180  		api := apiSt.StorageProvisioner(scope)
  1181  		return newStorageWorker(
  1182  			scope, "", api, api, api, api, api, api,
  1183  			clock.WallClock,
  1184  		), nil
  1185  	})
  1186  	singularRunner.StartWorker("charm-revision-updater", func() (worker.Worker, error) {
  1187  		return charmrevisionworker.NewRevisionUpdateWorker(apiSt.CharmRevisionUpdater()), nil
  1188  	})
  1189  	runner.StartWorker("metricmanagerworker", func() (worker.Worker, error) {
  1190  		return metricworker.NewMetricsManager(getMetricAPI(apiSt))
  1191  	})
  1192  	singularRunner.StartWorker("instancepoller", func() (worker.Worker, error) {
  1193  		return newInstancePoller(apiSt.InstancePoller()), nil
  1194  	})
  1195  	singularRunner.StartWorker("cleaner", func() (worker.Worker, error) {
  1196  		return newCleaner(apiSt.Cleaner()), nil
  1197  	})
  1198  	singularRunner.StartWorker("addresserworker", func() (worker.Worker, error) {
  1199  		return newAddresser(apiSt.Addresser())
  1200  	})
  1201  
  1202  	// TODO(axw) 2013-09-24 bug #1229506
  1203  	// Make another job to enable the firewaller. Not all
  1204  	// environments are capable of managing ports
  1205  	// centrally.
  1206  	fwMode, err := getFirewallMode(apiSt)
  1207  	if err != nil {
  1208  		return nil, errors.Annotate(err, "cannot get firewall mode")
  1209  	}
  1210  	if fwMode != config.FwNone {
  1211  		singularRunner.StartWorker("firewaller", func() (worker.Worker, error) {
  1212  			return newFirewaller(apiSt.Firewaller())
  1213  		})
  1214  	} else {
  1215  		logger.Debugf("not starting firewaller worker - firewall-mode is %q", fwMode)
  1216  	}
  1217  
  1218  	return runner, nil
  1219  }
  1220  
  1221  var getFirewallMode = _getFirewallMode
  1222  
  1223  func _getFirewallMode(apiSt api.Connection) (string, error) {
  1224  	envConfig, err := apiSt.Environment().EnvironConfig()
  1225  	if err != nil {
  1226  		return "", errors.Annotate(err, "cannot read environment config")
  1227  	}
  1228  	return envConfig.FirewallMode(), nil
  1229  }
  1230  
  1231  // stateWorkerDialOpts is a mongo.DialOpts suitable
  1232  // for use by StateWorker to dial mongo.
  1233  //
  1234  // This must be overridden in tests, as it assumes
  1235  // journaling is enabled.
  1236  var stateWorkerDialOpts mongo.DialOpts
  1237  
  1238  func (a *MachineAgent) apiserverWorkerStarter(st *state.State, certChanged chan params.StateServingInfo) func() (worker.Worker, error) {
  1239  	return func() (worker.Worker, error) { return a.newApiserverWorker(st, certChanged) }
  1240  }
  1241  
  1242  func (a *MachineAgent) newApiserverWorker(st *state.State, certChanged chan params.StateServingInfo) (worker.Worker, error) {
  1243  	agentConfig := a.CurrentConfig()
  1244  	// If the configuration does not have the required information,
  1245  	// it is currently not a recoverable error, so we kill the whole
  1246  	// agent, potentially enabling human intervention to fix
  1247  	// the agent's configuration file.
  1248  	info, ok := agentConfig.StateServingInfo()
  1249  	if !ok {
  1250  		return nil, &cmdutil.FatalError{"StateServingInfo not available and we need it"}
  1251  	}
  1252  	cert := []byte(info.Cert)
  1253  	key := []byte(info.PrivateKey)
  1254  
  1255  	if len(cert) == 0 || len(key) == 0 {
  1256  		return nil, &cmdutil.FatalError{"configuration does not have state server cert/key"}
  1257  	}
  1258  	tag := agentConfig.Tag()
  1259  	dataDir := agentConfig.DataDir()
  1260  	logDir := agentConfig.LogDir()
  1261  
  1262  	endpoint := net.JoinHostPort("", strconv.Itoa(info.APIPort))
  1263  	listener, err := net.Listen("tcp", endpoint)
  1264  	if err != nil {
  1265  		return nil, err
  1266  	}
  1267  	return apiserver.NewServer(st, listener, apiserver.ServerConfig{
  1268  		Cert:        cert,
  1269  		Key:         key,
  1270  		Tag:         tag,
  1271  		DataDir:     dataDir,
  1272  		LogDir:      logDir,
  1273  		Validator:   a.limitLogins,
  1274  		CertChanged: certChanged,
  1275  	})
  1276  }
  1277  
  1278  // limitLogins is called by the API server for each login attempt.
  1279  // it returns an error if upgrades or restore are running.
  1280  func (a *MachineAgent) limitLogins(req params.LoginRequest) error {
  1281  	if err := a.limitLoginsDuringRestore(req); err != nil {
  1282  		return err
  1283  	}
  1284  	return a.limitLoginsDuringUpgrade(req)
  1285  }
  1286  
  1287  // limitLoginsDuringRestore will only allow logins for restore related purposes
  1288  // while the different steps of restore are running.
  1289  func (a *MachineAgent) limitLoginsDuringRestore(req params.LoginRequest) error {
  1290  	var err error
  1291  	switch {
  1292  	case a.IsRestoreRunning():
  1293  		err = apiserver.RestoreInProgressError
  1294  	case a.IsRestorePreparing():
  1295  		err = apiserver.AboutToRestoreError
  1296  	}
  1297  	if err != nil {
  1298  		authTag, parseErr := names.ParseTag(req.AuthTag)
  1299  		if parseErr != nil {
  1300  			return errors.Annotate(err, "could not parse auth tag")
  1301  		}
  1302  		switch authTag := authTag.(type) {
  1303  		case names.UserTag:
  1304  			// use a restricted API mode
  1305  			return err
  1306  		case names.MachineTag:
  1307  			if authTag == a.Tag() {
  1308  				// allow logins from the local machine
  1309  				return nil
  1310  			}
  1311  		}
  1312  		return errors.Errorf("login for %q blocked because restore is in progress", authTag)
  1313  	}
  1314  	return nil
  1315  }
  1316  
  1317  // limitLoginsDuringUpgrade is called by the API server for each login
  1318  // attempt. It returns an error if upgrades are in progress unless the
  1319  // login is for a user (i.e. a client) or the local machine.
  1320  func (a *MachineAgent) limitLoginsDuringUpgrade(req params.LoginRequest) error {
  1321  	if a.upgradeWorkerContext.IsUpgradeRunning() || a.isAgentUpgradePending() {
  1322  		authTag, err := names.ParseTag(req.AuthTag)
  1323  		if err != nil {
  1324  			return errors.Annotate(err, "could not parse auth tag")
  1325  		}
  1326  		switch authTag := authTag.(type) {
  1327  		case names.UserTag:
  1328  			// use a restricted API mode
  1329  			return apiserver.UpgradeInProgressError
  1330  		case names.MachineTag:
  1331  			if authTag == a.Tag() {
  1332  				// allow logins from the local machine
  1333  				return nil
  1334  			}
  1335  		}
  1336  		return errors.Errorf("login for %q blocked because %s", authTag, apiserver.UpgradeInProgressError.Error())
  1337  	} else {
  1338  		return nil // allow all logins
  1339  	}
  1340  }
  1341  
  1342  var stateWorkerServingConfigErr = errors.New("state worker started with no state serving info")
  1343  
  1344  // ensureMongoServer ensures that mongo is installed and running,
  1345  // and ready for opening a state connection.
  1346  func (a *MachineAgent) ensureMongoServer(agentConfig agent.Config) (err error) {
  1347  	a.mongoInitMutex.Lock()
  1348  	defer a.mongoInitMutex.Unlock()
  1349  	if a.mongoInitialized {
  1350  		logger.Debugf("mongo is already initialized")
  1351  		return nil
  1352  	}
  1353  	defer func() {
  1354  		if err == nil {
  1355  			a.mongoInitialized = true
  1356  		}
  1357  	}()
  1358  
  1359  	// Many of the steps here, such as adding the state server to the
  1360  	// admin DB and initiating the replicaset, are once-only actions,
  1361  	// required when upgrading from a pre-HA-capable
  1362  	// environment. These calls won't do anything if the thing they
  1363  	// need to set up has already been done.
  1364  	var needReplicasetInit = false
  1365  	var machineAddrs []network.Address
  1366  
  1367  	mongoInstalled, err := mongo.IsServiceInstalled(agentConfig.Value(agent.Namespace))
  1368  	if err != nil {
  1369  		return errors.Annotate(err, "error while checking if mongodb service is installed")
  1370  	}
  1371  
  1372  	if mongoInstalled {
  1373  		logger.Debugf("mongodb service is installed")
  1374  
  1375  		if _, err := a.ensureMongoAdminUser(agentConfig); err != nil {
  1376  			return errors.Trace(err)
  1377  		}
  1378  
  1379  		if err := a.ensureMongoSharedSecret(agentConfig); err != nil {
  1380  			return errors.Trace(err)
  1381  		}
  1382  		agentConfig = a.CurrentConfig() // ensureMongoSharedSecret may have updated the config
  1383  
  1384  		mongoInfo, ok := agentConfig.MongoInfo()
  1385  		if !ok {
  1386  			return errors.New("unable to retrieve mongo info to check replicaset")
  1387  		}
  1388  
  1389  		needReplicasetInit, err = isReplicasetInitNeeded(mongoInfo)
  1390  		if err != nil {
  1391  			return errors.Annotate(err, "error while checking replicaset")
  1392  		}
  1393  
  1394  		// If the replicaset is to be initialised the machine addresses
  1395  		// need to be retrieved *before* MongoDB is restarted with the
  1396  		// --replset option (in EnsureMongoServer). Once MongoDB is
  1397  		// started with --replset it won't respond to queries until the
  1398  		// replicaset is initiated.
  1399  		if needReplicasetInit {
  1400  			logger.Infof("replicaset not yet configured")
  1401  			machineAddrs, err = getMachineAddresses(agentConfig)
  1402  			if err != nil {
  1403  				return errors.Trace(err)
  1404  			}
  1405  		}
  1406  	}
  1407  
  1408  	// EnsureMongoServer installs/upgrades the init config as necessary.
  1409  	ensureServerParams, err := cmdutil.NewEnsureServerParams(agentConfig)
  1410  	if err != nil {
  1411  		return err
  1412  	}
  1413  	if err := cmdutil.EnsureMongoServer(ensureServerParams); err != nil {
  1414  		return err
  1415  	}
  1416  
  1417  	// Initiate the replicaset if required.
  1418  	if needReplicasetInit {
  1419  		servingInfo, ok := agentConfig.StateServingInfo()
  1420  		if !ok {
  1421  			return stateWorkerServingConfigErr
  1422  		}
  1423  		mongoInfo, ok := agentConfig.MongoInfo()
  1424  		if !ok {
  1425  			return errors.New("unable to retrieve mongo info to initiate replicaset")
  1426  		}
  1427  		if err := initiateReplicaSet(mongoInfo, servingInfo.StatePort, machineAddrs); err != nil {
  1428  			return err
  1429  		}
  1430  	}
  1431  
  1432  	return nil
  1433  }
  1434  
  1435  // ensureMongoAdminUser ensures that the machine's mongo user is in
  1436  // the admin DB.
  1437  func (a *MachineAgent) ensureMongoAdminUser(agentConfig agent.Config) (added bool, err error) {
  1438  	mongoInfo, ok1 := agentConfig.MongoInfo()
  1439  	servingInfo, ok2 := agentConfig.StateServingInfo()
  1440  	if !ok1 || !ok2 {
  1441  		return false, stateWorkerServingConfigErr
  1442  	}
  1443  	dialInfo, err := mongo.DialInfo(mongoInfo.Info, mongo.DefaultDialOpts())
  1444  	if err != nil {
  1445  		return false, err
  1446  	}
  1447  	if len(dialInfo.Addrs) > 1 {
  1448  		logger.Infof("more than one state server; admin user must exist")
  1449  		return false, nil
  1450  	}
  1451  	return ensureMongoAdminUser(mongo.EnsureAdminUserParams{
  1452  		DialInfo:  dialInfo,
  1453  		Namespace: agentConfig.Value(agent.Namespace),
  1454  		DataDir:   agentConfig.DataDir(),
  1455  		Port:      servingInfo.StatePort,
  1456  		User:      mongoInfo.Tag.String(),
  1457  		Password:  mongoInfo.Password,
  1458  	})
  1459  }
  1460  
  1461  // ensureMongoSharedSecret generates a MongoDB shared secret if
  1462  // required, updating the agent's config and state.
  1463  func (a *MachineAgent) ensureMongoSharedSecret(agentConfig agent.Config) error {
  1464  	servingInfo, ok := agentConfig.StateServingInfo()
  1465  	if !ok {
  1466  		return stateWorkerServingConfigErr
  1467  	}
  1468  
  1469  	if servingInfo.SharedSecret != "" {
  1470  		return nil // Already done
  1471  	}
  1472  
  1473  	logger.Infof("state serving info has no shared secret - generating")
  1474  
  1475  	var err error
  1476  	servingInfo.SharedSecret, err = mongo.GenerateSharedSecret()
  1477  	if err != nil {
  1478  		return err
  1479  	}
  1480  	logger.Debugf("updating state serving info in agent config")
  1481  	if err = a.ChangeConfig(func(config agent.ConfigSetter) error {
  1482  		config.SetStateServingInfo(servingInfo)
  1483  		return nil
  1484  	}); err != nil {
  1485  		return err
  1486  	}
  1487  	agentConfig = a.CurrentConfig()
  1488  
  1489  	logger.Debugf("updating state serving info in state")
  1490  
  1491  	// Note: we set Direct=true in the mongo options because it's
  1492  	// possible that we've previously upgraded the mongo server's
  1493  	// configuration to form a replicaset, but failed to initiate it.
  1494  	dialOpts := mongo.DefaultDialOpts()
  1495  	dialOpts.Direct = true
  1496  	st, _, err := openState(agentConfig, dialOpts)
  1497  	if err != nil {
  1498  		return err
  1499  	}
  1500  	defer st.Close()
  1501  
  1502  	ssi := cmdutil.ParamsStateServingInfoToStateStateServingInfo(servingInfo)
  1503  	if err := st.SetStateServingInfo(ssi); err != nil {
  1504  		return errors.Errorf("cannot set state serving info: %v", err)
  1505  	}
  1506  
  1507  	logger.Infof("shared secret updated in state serving info")
  1508  	return nil
  1509  }
  1510  
  1511  // isReplicasetInitNeeded returns true if the replicaset needs to be
  1512  // initiated.
  1513  func isReplicasetInitNeeded(mongoInfo *mongo.MongoInfo) (bool, error) {
  1514  	dialInfo, err := mongo.DialInfo(mongoInfo.Info, mongo.DefaultDialOpts())
  1515  	if err != nil {
  1516  		return false, errors.Annotate(err, "cannot generate dial info to check replicaset")
  1517  	}
  1518  	dialInfo.Username = mongoInfo.Tag.String()
  1519  	dialInfo.Password = mongoInfo.Password
  1520  
  1521  	session, err := mgo.DialWithInfo(dialInfo)
  1522  	if err != nil {
  1523  		return false, errors.Annotate(err, "cannot dial mongo to check replicaset")
  1524  	}
  1525  	defer session.Close()
  1526  
  1527  	cfg, err := replicaset.CurrentConfig(session)
  1528  	if err != nil {
  1529  		logger.Debugf("couldn't retrieve replicaset config (not fatal): %v", err)
  1530  		return true, nil
  1531  	}
  1532  	numMembers := len(cfg.Members)
  1533  	logger.Debugf("replicaset member count: %d", numMembers)
  1534  	return numMembers < 1, nil
  1535  }
  1536  
  1537  // getMachineAddresses connects to state to determine the machine's
  1538  // network addresses.
  1539  func getMachineAddresses(agentConfig agent.Config) ([]network.Address, error) {
  1540  	logger.Debugf("opening state to get machine addresses")
  1541  	dialOpts := mongo.DefaultDialOpts()
  1542  	dialOpts.Direct = true
  1543  	st, m, err := openState(agentConfig, dialOpts)
  1544  	if err != nil {
  1545  		return nil, errors.Annotate(err, "failed to open state to retrieve machine addresses")
  1546  	}
  1547  	defer st.Close()
  1548  	return m.Addresses(), nil
  1549  }
  1550  
  1551  // initiateReplicaSet connects to MongoDB and sets up the replicaset.
  1552  func initiateReplicaSet(mongoInfo *mongo.MongoInfo, statePort int, machineAddrs []network.Address) error {
  1553  	peerAddr := mongo.SelectPeerAddress(machineAddrs)
  1554  	if peerAddr == "" {
  1555  		return errors.Errorf("no appropriate peer address found in %q", machineAddrs)
  1556  	}
  1557  
  1558  	dialInfo, err := mongo.DialInfo(mongoInfo.Info, mongo.DefaultDialOpts())
  1559  	if err != nil {
  1560  		return errors.Annotate(err, "cannot generate dial info to initiate replicaset")
  1561  	}
  1562  
  1563  	if err := maybeInitiateMongoServer(peergrouper.InitiateMongoParams{
  1564  		DialInfo:       dialInfo,
  1565  		MemberHostPort: net.JoinHostPort(peerAddr, fmt.Sprint(statePort)),
  1566  		User:           mongoInfo.Tag.String(), // TODO(dfc) InitiateMongoParams should take a Tag
  1567  		Password:       mongoInfo.Password,
  1568  	}); err != nil && err != peergrouper.ErrReplicaSetAlreadyInitiated {
  1569  		return err
  1570  	}
  1571  	return nil
  1572  }
  1573  
  1574  func openState(agentConfig agent.Config, dialOpts mongo.DialOpts) (_ *state.State, _ *state.Machine, err error) {
  1575  	info, ok := agentConfig.MongoInfo()
  1576  	if !ok {
  1577  		return nil, nil, fmt.Errorf("no state info available")
  1578  	}
  1579  	st, err := state.Open(agentConfig.Environment(), info, dialOpts, environs.NewStatePolicy())
  1580  	if err != nil {
  1581  		return nil, nil, err
  1582  	}
  1583  	defer func() {
  1584  		if err != nil {
  1585  			st.Close()
  1586  		}
  1587  	}()
  1588  	m0, err := st.FindEntity(agentConfig.Tag())
  1589  	if err != nil {
  1590  		if errors.IsNotFound(err) {
  1591  			err = worker.ErrTerminateAgent
  1592  		}
  1593  		return nil, nil, err
  1594  	}
  1595  	m := m0.(*state.Machine)
  1596  	if m.Life() == state.Dead {
  1597  		return nil, nil, worker.ErrTerminateAgent
  1598  	}
  1599  	// Check the machine nonce as provisioned matches the agent.Conf value.
  1600  	if !m.CheckProvisioned(agentConfig.Nonce()) {
  1601  		// The agent is running on a different machine to the one it
  1602  		// should be according to state. It must stop immediately.
  1603  		logger.Errorf("running machine %v agent on inappropriate instance", m)
  1604  		return nil, nil, worker.ErrTerminateAgent
  1605  	}
  1606  	return st, m, nil
  1607  }
  1608  
  1609  // startWorkerAfterUpgrade starts a worker to run the specified child worker
  1610  // but only after waiting for upgrades to complete.
  1611  func (a *MachineAgent) startWorkerAfterUpgrade(runner worker.Runner, name string, start func() (worker.Worker, error)) {
  1612  	runner.StartWorker(name, func() (worker.Worker, error) {
  1613  		return a.upgradeWaiterWorker(name, start), nil
  1614  	})
  1615  }
  1616  
  1617  // upgradeWaiterWorker runs the specified worker after upgrades have completed.
  1618  func (a *MachineAgent) upgradeWaiterWorker(name string, start func() (worker.Worker, error)) worker.Worker {
  1619  	return worker.NewSimpleWorker(func(stop <-chan struct{}) error {
  1620  		// Wait for the agent upgrade and upgrade steps to complete (or for us to be stopped).
  1621  		for _, ch := range []chan struct{}{
  1622  			a.upgradeWorkerContext.UpgradeComplete,
  1623  			a.initialAgentUpgradeCheckComplete,
  1624  		} {
  1625  			select {
  1626  			case <-stop:
  1627  				return nil
  1628  			case <-ch:
  1629  			}
  1630  		}
  1631  		logger.Debugf("upgrades done, starting worker %q", name)
  1632  
  1633  		// Upgrades are done, start the worker.
  1634  		worker, err := start()
  1635  		if err != nil {
  1636  			return err
  1637  		}
  1638  		// Wait for worker to finish or for us to be stopped.
  1639  		waitCh := make(chan error)
  1640  		go func() {
  1641  			waitCh <- worker.Wait()
  1642  		}()
  1643  		select {
  1644  		case err := <-waitCh:
  1645  			logger.Debugf("worker %q exited with %v", name, err)
  1646  			return err
  1647  		case <-stop:
  1648  			logger.Debugf("stopping so killing worker %q", name)
  1649  			worker.Kill()
  1650  		}
  1651  		return <-waitCh // Ensure worker has stopped before returning.
  1652  	})
  1653  }
  1654  
  1655  func (a *MachineAgent) setMachineStatus(apiState api.Connection, status params.Status, info string) error {
  1656  	tag := a.Tag().(names.MachineTag)
  1657  	machine, err := apiState.Machiner().Machine(tag)
  1658  	if err != nil {
  1659  		return errors.Trace(err)
  1660  	}
  1661  	if err := machine.SetStatus(status, info, nil); err != nil {
  1662  		return errors.Trace(err)
  1663  	}
  1664  	return nil
  1665  }
  1666  
  1667  // WorkersStarted returns a channel that's closed once all top level workers
  1668  // have been started. This is provided for testing purposes.
  1669  func (a *MachineAgent) WorkersStarted() <-chan struct{} {
  1670  	return a.workersStarted
  1671  }
  1672  
  1673  func (a *MachineAgent) Tag() names.Tag {
  1674  	return names.NewMachineTag(a.machineId)
  1675  }
  1676  
  1677  func (a *MachineAgent) createJujuRun(dataDir string) error {
  1678  	// TODO do not remove the symlink if it already points
  1679  	// to the right place.
  1680  	if err := os.Remove(JujuRun); err != nil && !os.IsNotExist(err) {
  1681  		return err
  1682  	}
  1683  	jujud := filepath.Join(dataDir, "tools", a.Tag().String(), jujunames.Jujud)
  1684  	return symlink.New(jujud, JujuRun)
  1685  }
  1686  
  1687  func (a *MachineAgent) uninstallAgent(agentConfig agent.Config) error {
  1688  	var errors []error
  1689  	agentServiceName := agentConfig.Value(agent.AgentServiceName)
  1690  	if agentServiceName == "" {
  1691  		// For backwards compatibility, handle lack of AgentServiceName.
  1692  		agentServiceName = os.Getenv("UPSTART_JOB")
  1693  	}
  1694  	if agentServiceName != "" {
  1695  		svc, err := service.DiscoverService(agentServiceName, common.Conf{})
  1696  		if err != nil {
  1697  			errors = append(errors, fmt.Errorf("cannot remove service %q: %v", agentServiceName, err))
  1698  		} else if err := svc.Remove(); err != nil {
  1699  			errors = append(errors, fmt.Errorf("cannot remove service %q: %v", agentServiceName, err))
  1700  		}
  1701  	}
  1702  
  1703  	// Remove the juju-run symlink.
  1704  	if err := os.Remove(JujuRun); err != nil && !os.IsNotExist(err) {
  1705  		errors = append(errors, err)
  1706  	}
  1707  
  1708  	insideLXC, err := lxcutils.RunningInsideLXC()
  1709  	if err != nil {
  1710  		errors = append(errors, err)
  1711  	} else if insideLXC {
  1712  		// We're running inside LXC, so loop devices may leak. Detach
  1713  		// any loop devices that are backed by files on this machine.
  1714  		//
  1715  		// It is necessary to do this here as well as in container/lxc,
  1716  		// as container/lxc needs to check in the container's rootfs
  1717  		// to see if the loop device is attached to the container; that
  1718  		// will fail if the data-dir is removed first.
  1719  		if err := a.loopDeviceManager.DetachLoopDevices("/", agentConfig.DataDir()); err != nil {
  1720  			errors = append(errors, err)
  1721  		}
  1722  	}
  1723  
  1724  	namespace := agentConfig.Value(agent.Namespace)
  1725  	if err := mongo.RemoveService(namespace); err != nil {
  1726  		errors = append(errors, fmt.Errorf("cannot stop/remove mongo service with namespace %q: %v", namespace, err))
  1727  	}
  1728  	if err := os.RemoveAll(agentConfig.DataDir()); err != nil {
  1729  		errors = append(errors, err)
  1730  	}
  1731  	if len(errors) == 0 {
  1732  		return nil
  1733  	}
  1734  	return fmt.Errorf("uninstall failed: %v", errors)
  1735  }
  1736  
  1737  func newConnRunner(conns ...cmdutil.Pinger) worker.Runner {
  1738  	return worker.NewRunner(cmdutil.ConnectionIsFatal(logger, conns...), cmdutil.MoreImportant)
  1739  }
  1740  
  1741  type MongoSessioner interface {
  1742  	MongoSession() *mgo.Session
  1743  }
  1744  
  1745  func newSingularStateRunner(runner worker.Runner, st MongoSessioner, m *state.Machine) (worker.Runner, error) {
  1746  	singularStateConn := singularStateConn{st.MongoSession(), m}
  1747  	singularRunner, err := newSingularRunner(runner, singularStateConn)
  1748  	if err != nil {
  1749  		return nil, errors.Annotate(err, "cannot make singular State Runner")
  1750  	}
  1751  	return singularRunner, err
  1752  }
  1753  
  1754  // singularStateConn implements singular.Conn on
  1755  // top of a State connection.
  1756  type singularStateConn struct {
  1757  	session *mgo.Session
  1758  	machine *state.Machine
  1759  }
  1760  
  1761  func (c singularStateConn) IsMaster() (bool, error) {
  1762  	return mongo.IsMaster(c.session, c.machine)
  1763  }
  1764  
  1765  func (c singularStateConn) Ping() error {
  1766  	return c.session.Ping()
  1767  }
  1768  
  1769  func metricAPI(st api.Connection) metricsmanager.MetricsManagerClient {
  1770  	return metricsmanager.NewClient(st)
  1771  }
  1772  
  1773  // newDeployContext gives the tests the opportunity to create a deployer.Context
  1774  // that can be used for testing so as to avoid (1) deploying units to the system
  1775  // running the tests and (2) get access to the *State used internally, so that
  1776  // tests can be run without waiting for the 5s watcher refresh time to which we would
  1777  // otherwise be restricted.
  1778  var newDeployContext = func(st *apideployer.State, agentConfig agent.Config) deployer.Context {
  1779  	return deployer.NewSimpleContext(agentConfig, st)
  1780  }