github.com/mattyw/juju@v0.0.0-20140610034352-732aecd63861/cmd/jujud/machine_test.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package main
     5  
     6  import (
     7  	"io/ioutil"
     8  	"os"
     9  	"path/filepath"
    10  	"reflect"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/juju/errors"
    16  	"github.com/juju/names"
    17  	jc "github.com/juju/testing/checkers"
    18  	"github.com/juju/utils/apt"
    19  	"github.com/juju/utils/proxy"
    20  	"github.com/juju/utils/set"
    21  	gc "launchpad.net/gocheck"
    22  
    23  	"github.com/juju/juju/agent"
    24  	"github.com/juju/juju/agent/mongo"
    25  	"github.com/juju/juju/charm"
    26  	"github.com/juju/juju/cmd"
    27  	lxctesting "github.com/juju/juju/container/lxc/testing"
    28  	"github.com/juju/juju/environs/config"
    29  	envtesting "github.com/juju/juju/environs/testing"
    30  	"github.com/juju/juju/instance"
    31  	"github.com/juju/juju/juju"
    32  	jujutesting "github.com/juju/juju/juju/testing"
    33  	"github.com/juju/juju/provider/dummy"
    34  	"github.com/juju/juju/state"
    35  	"github.com/juju/juju/state/api"
    36  	apideployer "github.com/juju/juju/state/api/deployer"
    37  	"github.com/juju/juju/state/api/params"
    38  	apirsyslog "github.com/juju/juju/state/api/rsyslog"
    39  	charmtesting "github.com/juju/juju/state/apiserver/charmrevisionupdater/testing"
    40  	"github.com/juju/juju/state/watcher"
    41  	coretesting "github.com/juju/juju/testing"
    42  	"github.com/juju/juju/tools"
    43  	"github.com/juju/juju/upstart"
    44  	"github.com/juju/juju/utils/ssh"
    45  	sshtesting "github.com/juju/juju/utils/ssh/testing"
    46  	"github.com/juju/juju/version"
    47  	"github.com/juju/juju/worker"
    48  	"github.com/juju/juju/worker/authenticationworker"
    49  	"github.com/juju/juju/worker/deployer"
    50  	"github.com/juju/juju/worker/instancepoller"
    51  	"github.com/juju/juju/worker/machineenvironmentworker"
    52  	"github.com/juju/juju/worker/rsyslog"
    53  	"github.com/juju/juju/worker/singular"
    54  	"github.com/juju/juju/worker/upgrader"
    55  )
    56  
    57  type commonMachineSuite struct {
    58  	agentSuite
    59  	singularRecord *singularRunnerRecord
    60  	lxctesting.TestSuite
    61  	fakeEnsureMongo fakeEnsure
    62  }
    63  
    64  func (s *commonMachineSuite) SetUpSuite(c *gc.C) {
    65  	s.agentSuite.SetUpSuite(c)
    66  	s.TestSuite.SetUpSuite(c)
    67  	s.agentSuite.PatchValue(&charm.CacheDir, c.MkDir())
    68  }
    69  
    70  func (s *commonMachineSuite) TearDownSuite(c *gc.C) {
    71  	s.TestSuite.TearDownSuite(c)
    72  	s.agentSuite.TearDownSuite(c)
    73  }
    74  
    75  func (s *commonMachineSuite) SetUpTest(c *gc.C) {
    76  	s.agentSuite.SetUpTest(c)
    77  	s.TestSuite.SetUpTest(c)
    78  
    79  	os.Remove(jujuRun) // ignore error; may not exist
    80  	// Patch ssh user to avoid touching ~ubuntu/.ssh/authorized_keys.
    81  	s.agentSuite.PatchValue(&authenticationworker.SSHUser, "")
    82  
    83  	testpath := c.MkDir()
    84  	s.agentSuite.PatchEnvPathPrepend(testpath)
    85  	// mock out the start method so we can fake install services without sudo
    86  	fakeCmd(filepath.Join(testpath, "start"))
    87  	fakeCmd(filepath.Join(testpath, "stop"))
    88  
    89  	s.agentSuite.PatchValue(&upstart.InitDir, c.MkDir())
    90  
    91  	s.singularRecord = &singularRunnerRecord{}
    92  	s.agentSuite.PatchValue(&newSingularRunner, s.singularRecord.newSingularRunner)
    93  	s.agentSuite.PatchValue(&peergrouperNew, func(st *state.State) (worker.Worker, error) {
    94  		return newDummyWorker(), nil
    95  	})
    96  
    97  	s.fakeEnsureMongo = fakeEnsure{}
    98  	s.agentSuite.PatchValue(&ensureMongoServer, s.fakeEnsureMongo.fakeEnsureMongo)
    99  	s.agentSuite.PatchValue(&maybeInitiateMongoServer, s.fakeEnsureMongo.fakeInitiateMongo)
   100  }
   101  
   102  func fakeCmd(path string) {
   103  	err := ioutil.WriteFile(path, []byte("#!/bin/bash --norc\nexit 0"), 0755)
   104  	if err != nil {
   105  		panic(err)
   106  	}
   107  }
   108  
   109  func (s *commonMachineSuite) TearDownTest(c *gc.C) {
   110  	s.TestSuite.TearDownTest(c)
   111  	s.agentSuite.TearDownTest(c)
   112  }
   113  
   114  // primeAgent adds a new Machine to run the given jobs, and sets up the
   115  // machine agent's directory.  It returns the new machine, the
   116  // agent's configuration and the tools currently running.
   117  func (s *commonMachineSuite) primeAgent(
   118  	c *gc.C, vers version.Binary,
   119  	jobs ...state.MachineJob) (m *state.Machine, config agent.ConfigSetterWriter, tools *tools.Tools) {
   120  
   121  	// Add a machine and ensure it is provisioned.
   122  	m, err := s.State.AddMachine("quantal", jobs...)
   123  	c.Assert(err, gc.IsNil)
   124  	inst, md := jujutesting.AssertStartInstance(c, s.Conn.Environ, m.Id())
   125  	c.Assert(m.SetProvisioned(inst.Id(), state.BootstrapNonce, md), gc.IsNil)
   126  
   127  	// Add an address for the tests in case the maybeInitiateMongoServer
   128  	// codepath is exercised.
   129  	s.setFakeMachineAddresses(c, m)
   130  
   131  	// Set up the new machine.
   132  	err = m.SetAgentVersion(vers)
   133  	c.Assert(err, gc.IsNil)
   134  	err = m.SetPassword(initialMachinePassword)
   135  	c.Assert(err, gc.IsNil)
   136  	tag := names.MachineTag(m.Id())
   137  	if m.IsManager() {
   138  		err = m.SetMongoPassword(initialMachinePassword)
   139  		c.Assert(err, gc.IsNil)
   140  		config, tools = s.agentSuite.primeStateAgent(c, tag, initialMachinePassword, vers)
   141  		info, ok := config.StateServingInfo()
   142  		c.Assert(ok, jc.IsTrue)
   143  		err = s.State.SetStateServingInfo(info)
   144  		c.Assert(err, gc.IsNil)
   145  	} else {
   146  		config, tools = s.agentSuite.primeAgent(c, tag, initialMachinePassword, vers)
   147  	}
   148  	err = config.Write()
   149  	c.Assert(err, gc.IsNil)
   150  	return m, config, tools
   151  }
   152  
   153  // newAgent returns a new MachineAgent instance
   154  func (s *commonMachineSuite) newAgent(c *gc.C, m *state.Machine) *MachineAgent {
   155  	a := &MachineAgent{}
   156  	s.initAgent(c, a, "--machine-id", m.Id())
   157  	err := a.ReadConfig(m.Tag())
   158  	c.Assert(err, gc.IsNil)
   159  	return a
   160  }
   161  
   162  func (s *MachineSuite) TestParseSuccess(c *gc.C) {
   163  	create := func() (cmd.Command, *AgentConf) {
   164  		a := &MachineAgent{}
   165  		return a, &a.AgentConf
   166  	}
   167  	a := CheckAgentCommand(c, create, []string{"--machine-id", "42"})
   168  	c.Assert(a.(*MachineAgent).MachineId, gc.Equals, "42")
   169  }
   170  
   171  type MachineSuite struct {
   172  	commonMachineSuite
   173  }
   174  
   175  var _ = gc.Suite(&MachineSuite{})
   176  
   177  const initialMachinePassword = "machine-password-1234567890"
   178  
   179  func (s *MachineSuite) TestParseNonsense(c *gc.C) {
   180  	for _, args := range [][]string{
   181  		{},
   182  		{"--machine-id", "-4004"},
   183  	} {
   184  		err := ParseAgentCommand(&MachineAgent{}, args)
   185  		c.Assert(err, gc.ErrorMatches, "--machine-id option must be set, and expects a non-negative integer")
   186  	}
   187  }
   188  
   189  func (s *MachineSuite) TestParseUnknown(c *gc.C) {
   190  	a := &MachineAgent{}
   191  	err := ParseAgentCommand(a, []string{"--machine-id", "42", "blistering barnacles"})
   192  	c.Assert(err, gc.ErrorMatches, `unrecognized args: \["blistering barnacles"\]`)
   193  }
   194  
   195  func (s *MachineSuite) TestRunInvalidMachineId(c *gc.C) {
   196  	c.Skip("agents don't yet distinguish between temporary and permanent errors")
   197  	m, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   198  	err := s.newAgent(c, m).Run(nil)
   199  	c.Assert(err, gc.ErrorMatches, "some error")
   200  }
   201  
   202  func (s *MachineSuite) TestRunStop(c *gc.C) {
   203  	m, ac, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   204  	a := s.newAgent(c, m)
   205  	done := make(chan error)
   206  	go func() {
   207  		done <- a.Run(nil)
   208  	}()
   209  	err := a.Stop()
   210  	c.Assert(err, gc.IsNil)
   211  	c.Assert(<-done, gc.IsNil)
   212  	c.Assert(charm.CacheDir, gc.Equals, filepath.Join(ac.DataDir(), "charmcache"))
   213  }
   214  
   215  func (s *MachineSuite) TestWithDeadMachine(c *gc.C) {
   216  	m, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   217  	err := m.EnsureDead()
   218  	c.Assert(err, gc.IsNil)
   219  	a := s.newAgent(c, m)
   220  	err = runWithTimeout(a)
   221  	c.Assert(err, gc.IsNil)
   222  }
   223  
   224  func (s *MachineSuite) TestWithRemovedMachine(c *gc.C) {
   225  	m, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   226  	err := m.EnsureDead()
   227  	c.Assert(err, gc.IsNil)
   228  	err = m.Remove()
   229  	c.Assert(err, gc.IsNil)
   230  	a := s.newAgent(c, m)
   231  	err = runWithTimeout(a)
   232  	c.Assert(err, gc.IsNil)
   233  }
   234  
   235  func (s *MachineSuite) TestDyingMachine(c *gc.C) {
   236  	m, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   237  	a := s.newAgent(c, m)
   238  	done := make(chan error)
   239  	go func() {
   240  		done <- a.Run(nil)
   241  	}()
   242  	defer func() {
   243  		c.Check(a.Stop(), gc.IsNil)
   244  	}()
   245  	err := m.Destroy()
   246  	c.Assert(err, gc.IsNil)
   247  	select {
   248  	case err := <-done:
   249  		c.Assert(err, gc.IsNil)
   250  	case <-time.After(watcher.Period * 5 / 4):
   251  		// TODO(rog) Fix this so it doesn't wait for so long.
   252  		// https://bugs.github.com/juju/juju/+bug/1163983
   253  		c.Fatalf("timed out waiting for agent to terminate")
   254  	}
   255  	err = m.Refresh()
   256  	c.Assert(err, gc.IsNil)
   257  	c.Assert(m.Life(), gc.Equals, state.Dead)
   258  }
   259  
   260  func (s *MachineSuite) TestHostUnits(c *gc.C) {
   261  	m, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   262  	a := s.newAgent(c, m)
   263  	ctx, reset := patchDeployContext(c, s.BackingState)
   264  	defer reset()
   265  	go func() { c.Check(a.Run(nil), gc.IsNil) }()
   266  	defer func() { c.Check(a.Stop(), gc.IsNil) }()
   267  
   268  	// check that unassigned units don't trigger any deployments.
   269  	svc := s.AddTestingService(c, "wordpress", s.AddTestingCharm(c, "wordpress"))
   270  	u0, err := svc.AddUnit()
   271  	c.Assert(err, gc.IsNil)
   272  	u1, err := svc.AddUnit()
   273  	c.Assert(err, gc.IsNil)
   274  
   275  	ctx.waitDeployed(c)
   276  
   277  	// assign u0, check it's deployed.
   278  	err = u0.AssignToMachine(m)
   279  	c.Assert(err, gc.IsNil)
   280  	ctx.waitDeployed(c, u0.Name())
   281  
   282  	// "start the agent" for u0 to prevent short-circuited remove-on-destroy;
   283  	// check that it's kept deployed despite being Dying.
   284  	err = u0.SetStatus(params.StatusStarted, "", nil)
   285  	c.Assert(err, gc.IsNil)
   286  	err = u0.Destroy()
   287  	c.Assert(err, gc.IsNil)
   288  	ctx.waitDeployed(c, u0.Name())
   289  
   290  	// add u1 to the machine, check it's deployed.
   291  	err = u1.AssignToMachine(m)
   292  	c.Assert(err, gc.IsNil)
   293  	ctx.waitDeployed(c, u0.Name(), u1.Name())
   294  
   295  	// make u0 dead; check the deployer recalls the unit and removes it from
   296  	// state.
   297  	err = u0.EnsureDead()
   298  	c.Assert(err, gc.IsNil)
   299  	ctx.waitDeployed(c, u1.Name())
   300  
   301  	// The deployer actually removes the unit just after
   302  	// removing its deployment, so we need to poll here
   303  	// until it actually happens.
   304  	for attempt := coretesting.LongAttempt.Start(); attempt.Next(); {
   305  		err := u0.Refresh()
   306  		if err == nil && attempt.HasNext() {
   307  			continue
   308  		}
   309  		c.Assert(err, jc.Satisfies, errors.IsNotFound)
   310  	}
   311  
   312  	// short-circuit-remove u1 after it's been deployed; check it's recalled
   313  	// and removed from state.
   314  	err = u1.Destroy()
   315  	c.Assert(err, gc.IsNil)
   316  	err = u1.Refresh()
   317  	c.Assert(err, jc.Satisfies, errors.IsNotFound)
   318  	ctx.waitDeployed(c)
   319  }
   320  
   321  func patchDeployContext(c *gc.C, st *state.State) (*fakeContext, func()) {
   322  	ctx := &fakeContext{
   323  		inited: make(chan struct{}),
   324  	}
   325  	orig := newDeployContext
   326  	newDeployContext = func(dst *apideployer.State, agentConfig agent.Config) deployer.Context {
   327  		ctx.st = st
   328  		ctx.agentConfig = agentConfig
   329  		close(ctx.inited)
   330  		return ctx
   331  	}
   332  	return ctx, func() { newDeployContext = orig }
   333  }
   334  
   335  func (s *commonMachineSuite) setFakeMachineAddresses(c *gc.C, machine *state.Machine) {
   336  	addrs := []instance.Address{
   337  		instance.NewAddress("0.1.2.3", instance.NetworkUnknown),
   338  	}
   339  	err := machine.SetAddresses(addrs...)
   340  	c.Assert(err, gc.IsNil)
   341  	// Set the addresses in the environ instance as well so that if the instance poller
   342  	// runs it won't overwrite them.
   343  	instId, err := machine.InstanceId()
   344  	c.Assert(err, gc.IsNil)
   345  	insts, err := s.Conn.Environ.Instances([]instance.Id{instId})
   346  	c.Assert(err, gc.IsNil)
   347  	dummy.SetInstanceAddresses(insts[0], addrs)
   348  }
   349  
   350  func (s *MachineSuite) TestManageEnviron(c *gc.C) {
   351  	usefulVersion := version.Current
   352  	usefulVersion.Series = "quantal" // to match the charm created below
   353  	envtesting.AssertUploadFakeToolsVersions(c, s.Conn.Environ.Storage(), usefulVersion)
   354  	m, _, _ := s.primeAgent(c, version.Current, state.JobManageEnviron)
   355  	op := make(chan dummy.Operation, 200)
   356  	dummy.Listen(op)
   357  
   358  	a := s.newAgent(c, m)
   359  	// Make sure the agent is stopped even if the test fails.
   360  	defer a.Stop()
   361  	done := make(chan error)
   362  	go func() {
   363  		done <- a.Run(nil)
   364  	}()
   365  
   366  	// Check that the provisioner and firewaller are alive by doing
   367  	// a rudimentary check that it responds to state changes.
   368  
   369  	// Add one unit to a service; it should get allocated a machine
   370  	// and then its ports should be opened.
   371  	charm := s.AddTestingCharm(c, "dummy")
   372  	svc := s.AddTestingService(c, "test-service", charm)
   373  	err := svc.SetExposed()
   374  	c.Assert(err, gc.IsNil)
   375  	units, err := juju.AddUnits(s.State, svc, 1, "")
   376  	c.Assert(err, gc.IsNil)
   377  	c.Check(opRecvTimeout(c, s.State, op, dummy.OpStartInstance{}), gc.NotNil)
   378  
   379  	// Wait for the instance id to show up in the state.
   380  	s.waitProvisioned(c, units[0])
   381  	err = units[0].OpenPort("tcp", 999)
   382  	c.Assert(err, gc.IsNil)
   383  
   384  	c.Check(opRecvTimeout(c, s.State, op, dummy.OpOpenPorts{}), gc.NotNil)
   385  
   386  	err = a.Stop()
   387  	c.Assert(err, gc.IsNil)
   388  
   389  	select {
   390  	case err := <-done:
   391  		c.Assert(err, gc.IsNil)
   392  	case <-time.After(5 * time.Second):
   393  		c.Fatalf("timed out waiting for agent to terminate")
   394  	}
   395  
   396  	c.Assert(s.singularRecord.started(), jc.DeepEquals, []string{
   397  		"charm-revision-updater",
   398  		"cleaner",
   399  		"environ-provisioner",
   400  		"firewaller",
   401  		"minunitsworker",
   402  		"resumer",
   403  	})
   404  }
   405  
   406  func (s *MachineSuite) TestManageEnvironRunsInstancePoller(c *gc.C) {
   407  	s.agentSuite.PatchValue(&instancepoller.ShortPoll, 500*time.Millisecond)
   408  	usefulVersion := version.Current
   409  	usefulVersion.Series = "quantal" // to match the charm created below
   410  	envtesting.AssertUploadFakeToolsVersions(c, s.Conn.Environ.Storage(), usefulVersion)
   411  	m, _, _ := s.primeAgent(c, version.Current, state.JobManageEnviron)
   412  	a := s.newAgent(c, m)
   413  	defer a.Stop()
   414  	go func() {
   415  		c.Check(a.Run(nil), gc.IsNil)
   416  	}()
   417  
   418  	// Add one unit to a service;
   419  	charm := s.AddTestingCharm(c, "dummy")
   420  	svc := s.AddTestingService(c, "test-service", charm)
   421  	units, err := juju.AddUnits(s.State, svc, 1, "")
   422  	c.Assert(err, gc.IsNil)
   423  
   424  	m, instId := s.waitProvisioned(c, units[0])
   425  	insts, err := s.Conn.Environ.Instances([]instance.Id{instId})
   426  	c.Assert(err, gc.IsNil)
   427  	addrs := []instance.Address{instance.NewAddress("1.2.3.4", instance.NetworkUnknown)}
   428  	dummy.SetInstanceAddresses(insts[0], addrs)
   429  	dummy.SetInstanceStatus(insts[0], "running")
   430  
   431  	for a := coretesting.LongAttempt.Start(); a.Next(); {
   432  		if !a.HasNext() {
   433  			c.Logf("final machine addresses: %#v", m.Addresses())
   434  			c.Fatalf("timed out waiting for machine to get address")
   435  		}
   436  		err := m.Refresh()
   437  		c.Assert(err, gc.IsNil)
   438  		instStatus, err := m.InstanceStatus()
   439  		c.Assert(err, gc.IsNil)
   440  		if reflect.DeepEqual(m.Addresses(), addrs) && instStatus == "running" {
   441  			break
   442  		}
   443  	}
   444  }
   445  
   446  func (s *MachineSuite) TestManageEnvironRunsPeergrouper(c *gc.C) {
   447  	started := make(chan struct{}, 1)
   448  	s.agentSuite.PatchValue(&peergrouperNew, func(st *state.State) (worker.Worker, error) {
   449  		c.Check(st, gc.NotNil)
   450  		select {
   451  		case started <- struct{}{}:
   452  		default:
   453  		}
   454  		return newDummyWorker(), nil
   455  	})
   456  	m, _, _ := s.primeAgent(c, version.Current, state.JobManageEnviron)
   457  	a := s.newAgent(c, m)
   458  	defer a.Stop()
   459  	go func() {
   460  		c.Check(a.Run(nil), gc.IsNil)
   461  	}()
   462  	select {
   463  	case <-started:
   464  	case <-time.After(coretesting.LongWait):
   465  		c.Fatalf("timed out waiting for peergrouper worker to be started")
   466  	}
   467  }
   468  
   469  func (s *MachineSuite) TestEnsureLocalEnvironDoesntRunPeergrouper(c *gc.C) {
   470  	started := make(chan struct{}, 1)
   471  	s.agentSuite.PatchValue(&peergrouperNew, func(st *state.State) (worker.Worker, error) {
   472  		c.Check(st, gc.NotNil)
   473  		select {
   474  		case started <- struct{}{}:
   475  		default:
   476  		}
   477  		return newDummyWorker(), nil
   478  	})
   479  	m, _, _ := s.primeAgent(c, version.Current, state.JobManageEnviron)
   480  	a := s.newAgent(c, m)
   481  	err := a.ChangeConfig(func(config agent.ConfigSetter) {
   482  		config.SetValue(agent.ProviderType, "local")
   483  	})
   484  	c.Assert(err, gc.IsNil)
   485  	defer func() { c.Check(a.Stop(), gc.IsNil) }()
   486  	go func() {
   487  		c.Check(a.Run(nil), gc.IsNil)
   488  	}()
   489  	select {
   490  	case <-started:
   491  		c.Fatalf("local environment should not start peergrouper")
   492  	case <-time.After(coretesting.ShortWait):
   493  	}
   494  }
   495  
   496  func (s *MachineSuite) TestManageEnvironCallsUseMultipleCPUs(c *gc.C) {
   497  	// If it has been enabled, the JobManageEnviron agent should call utils.UseMultipleCPUs
   498  	usefulVersion := version.Current
   499  	usefulVersion.Series = "quantal"
   500  	envtesting.AssertUploadFakeToolsVersions(c, s.Conn.Environ.Storage(), usefulVersion)
   501  	m, _, _ := s.primeAgent(c, version.Current, state.JobManageEnviron)
   502  	calledChan := make(chan struct{}, 1)
   503  	s.agentSuite.PatchValue(&useMultipleCPUs, func() { calledChan <- struct{}{} })
   504  	// Now, start the agent, and observe that a JobManageEnviron agent
   505  	// calls UseMultipleCPUs
   506  	a := s.newAgent(c, m)
   507  	defer a.Stop()
   508  	go func() {
   509  		c.Check(a.Run(nil), gc.IsNil)
   510  	}()
   511  	// Wait for configuration to be finished
   512  	<-a.WorkersStarted()
   513  	select {
   514  	case <-calledChan:
   515  	case <-time.After(coretesting.LongWait):
   516  		c.Errorf("we failed to call UseMultipleCPUs()")
   517  	}
   518  	c.Check(a.Stop(), gc.IsNil)
   519  	// However, an agent that just JobHostUnits doesn't call UseMultipleCPUs
   520  	m2, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   521  	a2 := s.newAgent(c, m2)
   522  	defer a2.Stop()
   523  	go func() {
   524  		c.Check(a2.Run(nil), gc.IsNil)
   525  	}()
   526  	// Wait until all the workers have been started, and then kill everything
   527  	<-a2.workersStarted
   528  	c.Check(a2.Stop(), gc.IsNil)
   529  	select {
   530  	case <-calledChan:
   531  		c.Errorf("we should not have called UseMultipleCPUs()")
   532  	case <-time.After(coretesting.ShortWait):
   533  	}
   534  }
   535  
   536  func (s *MachineSuite) waitProvisioned(c *gc.C, unit *state.Unit) (*state.Machine, instance.Id) {
   537  	c.Logf("waiting for unit %q to be provisioned", unit)
   538  	machineId, err := unit.AssignedMachineId()
   539  	c.Assert(err, gc.IsNil)
   540  	m, err := s.State.Machine(machineId)
   541  	c.Assert(err, gc.IsNil)
   542  	w := m.Watch()
   543  	defer w.Stop()
   544  	timeout := time.After(coretesting.LongWait)
   545  	for {
   546  		select {
   547  		case <-timeout:
   548  			c.Fatalf("timed out waiting for provisioning")
   549  		case _, ok := <-w.Changes():
   550  			c.Assert(ok, jc.IsTrue)
   551  			err := m.Refresh()
   552  			c.Assert(err, gc.IsNil)
   553  			if instId, err := m.InstanceId(); err == nil {
   554  				c.Logf("unit provisioned with instance %s", instId)
   555  				return m, instId
   556  			} else {
   557  				c.Check(err, jc.Satisfies, state.IsNotProvisionedError)
   558  			}
   559  		}
   560  	}
   561  	panic("watcher died")
   562  }
   563  
   564  func (s *MachineSuite) testUpgradeRequest(c *gc.C, agent runner, tag string, currentTools *tools.Tools) {
   565  	newVers := version.Current
   566  	newVers.Patch++
   567  	newTools := envtesting.AssertUploadFakeToolsVersions(c, s.Conn.Environ.Storage(), newVers)[0]
   568  	err := s.State.SetEnvironAgentVersion(newVers.Number)
   569  	c.Assert(err, gc.IsNil)
   570  	err = runWithTimeout(agent)
   571  	envtesting.CheckUpgraderReadyError(c, err, &upgrader.UpgradeReadyError{
   572  		AgentName: tag,
   573  		OldTools:  currentTools.Version,
   574  		NewTools:  newTools.Version,
   575  		DataDir:   s.DataDir(),
   576  	})
   577  }
   578  
   579  func (s *MachineSuite) TestUpgradeRequest(c *gc.C) {
   580  	m, _, currentTools := s.primeAgent(c, version.Current, state.JobManageEnviron, state.JobHostUnits)
   581  	a := s.newAgent(c, m)
   582  	s.testUpgradeRequest(c, a, m.Tag(), currentTools)
   583  }
   584  
   585  var fastDialOpts = api.DialOpts{
   586  	Timeout:    coretesting.LongWait,
   587  	RetryDelay: coretesting.ShortWait,
   588  }
   589  
   590  func (s *MachineSuite) waitStopped(c *gc.C, job state.MachineJob, a *MachineAgent, done chan error) {
   591  	err := a.Stop()
   592  	if job == state.JobManageEnviron {
   593  		// When shutting down, the API server can be shut down before
   594  		// the other workers that connect to it, so they get an error so
   595  		// they then die, causing Stop to return an error.  It's not
   596  		// easy to control the actual error that's received in this
   597  		// circumstance so we just log it rather than asserting that it
   598  		// is not nil.
   599  		if err != nil {
   600  			c.Logf("error shutting down state manager: %v", err)
   601  		}
   602  	} else {
   603  		c.Assert(err, gc.IsNil)
   604  	}
   605  
   606  	select {
   607  	case err := <-done:
   608  		c.Assert(err, gc.IsNil)
   609  	case <-time.After(5 * time.Second):
   610  		c.Fatalf("timed out waiting for agent to terminate")
   611  	}
   612  }
   613  
   614  func (s *MachineSuite) assertJobWithAPI(
   615  	c *gc.C,
   616  	job state.MachineJob,
   617  	test func(agent.Config, *api.State),
   618  ) {
   619  	s.assertAgentOpensState(c, &reportOpenedAPI, job, func(cfg agent.Config, st eitherState) {
   620  		test(cfg, st.(*api.State))
   621  	})
   622  }
   623  
   624  func (s *MachineSuite) assertJobWithState(
   625  	c *gc.C,
   626  	job state.MachineJob,
   627  	test func(agent.Config, *state.State),
   628  ) {
   629  	paramsJob := job.ToParams()
   630  	if !paramsJob.NeedsState() {
   631  		c.Fatalf("%v does not use state", paramsJob)
   632  	}
   633  	s.assertAgentOpensState(c, &reportOpenedState, job, func(cfg agent.Config, st eitherState) {
   634  		test(cfg, st.(*state.State))
   635  	})
   636  }
   637  
   638  // assertAgentOpensState asserts that a machine agent started with the
   639  // given job will call the function pointed to by reportOpened. The
   640  // agent's configuration and the value passed to reportOpened are then
   641  // passed to the test function for further checking.
   642  func (s *MachineSuite) assertAgentOpensState(
   643  	c *gc.C,
   644  	reportOpened *func(eitherState),
   645  	job state.MachineJob,
   646  	test func(agent.Config, eitherState),
   647  ) {
   648  	stm, conf, _ := s.primeAgent(c, version.Current, job)
   649  	a := s.newAgent(c, stm)
   650  	defer a.Stop()
   651  
   652  	// All state jobs currently also run an APIWorker, so no
   653  	// need to check for that here, like in assertJobWithState.
   654  
   655  	agentAPIs := make(chan eitherState, 1)
   656  	s.agentSuite.PatchValue(reportOpened, func(st eitherState) {
   657  		select {
   658  		case agentAPIs <- st:
   659  		default:
   660  		}
   661  	})
   662  
   663  	done := make(chan error)
   664  	go func() {
   665  		done <- a.Run(nil)
   666  	}()
   667  
   668  	select {
   669  	case agentAPI := <-agentAPIs:
   670  		c.Assert(agentAPI, gc.NotNil)
   671  		test(conf, agentAPI)
   672  	case <-time.After(coretesting.LongWait):
   673  		c.Fatalf("API not opened")
   674  	}
   675  
   676  	s.waitStopped(c, job, a, done)
   677  }
   678  
   679  func (s *MachineSuite) TestManageEnvironServesAPI(c *gc.C) {
   680  	s.assertJobWithState(c, state.JobManageEnviron, func(conf agent.Config, agentState *state.State) {
   681  		st, err := api.Open(conf.APIInfo(), fastDialOpts)
   682  		c.Assert(err, gc.IsNil)
   683  		defer st.Close()
   684  		m, err := st.Machiner().Machine(conf.Tag())
   685  		c.Assert(err, gc.IsNil)
   686  		c.Assert(m.Life(), gc.Equals, params.Alive)
   687  	})
   688  }
   689  
   690  func (s *MachineSuite) TestManageEnvironRunsCleaner(c *gc.C) {
   691  	s.assertJobWithState(c, state.JobManageEnviron, func(conf agent.Config, agentState *state.State) {
   692  		// Create a service and unit, and destroy the service.
   693  		service := s.AddTestingService(c, "wordpress", s.AddTestingCharm(c, "wordpress"))
   694  		unit, err := service.AddUnit()
   695  		c.Assert(err, gc.IsNil)
   696  		err = service.Destroy()
   697  		c.Assert(err, gc.IsNil)
   698  
   699  		// Check the unit was not yet removed.
   700  		err = unit.Refresh()
   701  		c.Assert(err, gc.IsNil)
   702  		w := unit.Watch()
   703  		defer w.Stop()
   704  
   705  		// Trigger a sync on the state used by the agent, and wait
   706  		// for the unit to be removed.
   707  		agentState.StartSync()
   708  		timeout := time.After(coretesting.LongWait)
   709  		for done := false; !done; {
   710  			select {
   711  			case <-timeout:
   712  				c.Fatalf("unit not cleaned up")
   713  			case <-time.After(coretesting.ShortWait):
   714  				s.State.StartSync()
   715  			case <-w.Changes():
   716  				err := unit.Refresh()
   717  				if errors.IsNotFound(err) {
   718  					done = true
   719  				} else {
   720  					c.Assert(err, gc.IsNil)
   721  				}
   722  			}
   723  		}
   724  	})
   725  }
   726  
   727  func (s *MachineSuite) TestJobManageEnvironRunsMinUnitsWorker(c *gc.C) {
   728  	s.assertJobWithState(c, state.JobManageEnviron, func(conf agent.Config, agentState *state.State) {
   729  		// Ensure that the MinUnits worker is alive by doing a simple check
   730  		// that it responds to state changes: add a service, set its minimum
   731  		// number of units to one, wait for the worker to add the missing unit.
   732  		service := s.AddTestingService(c, "wordpress", s.AddTestingCharm(c, "wordpress"))
   733  		err := service.SetMinUnits(1)
   734  		c.Assert(err, gc.IsNil)
   735  		w := service.Watch()
   736  		defer w.Stop()
   737  
   738  		// Trigger a sync on the state used by the agent, and wait for the unit
   739  		// to be created.
   740  		agentState.StartSync()
   741  		timeout := time.After(coretesting.LongWait)
   742  		for {
   743  			select {
   744  			case <-timeout:
   745  				c.Fatalf("unit not created")
   746  			case <-time.After(coretesting.ShortWait):
   747  				s.State.StartSync()
   748  			case <-w.Changes():
   749  				units, err := service.AllUnits()
   750  				c.Assert(err, gc.IsNil)
   751  				if len(units) == 1 {
   752  					return
   753  				}
   754  			}
   755  		}
   756  	})
   757  }
   758  
   759  func (s *MachineSuite) TestMachineAgentRunsAuthorisedKeysWorker(c *gc.C) {
   760  	// Start the machine agent.
   761  	m, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   762  	a := s.newAgent(c, m)
   763  	go func() { c.Check(a.Run(nil), gc.IsNil) }()
   764  	defer func() { c.Check(a.Stop(), gc.IsNil) }()
   765  
   766  	// Update the keys in the environment.
   767  	sshKey := sshtesting.ValidKeyOne.Key + " user@host"
   768  	err := s.BackingState.UpdateEnvironConfig(map[string]interface{}{"authorized-keys": sshKey}, nil, nil)
   769  	c.Assert(err, gc.IsNil)
   770  
   771  	// Wait for ssh keys file to be updated.
   772  	s.State.StartSync()
   773  	timeout := time.After(coretesting.LongWait)
   774  	sshKeyWithCommentPrefix := sshtesting.ValidKeyOne.Key + " Juju:user@host"
   775  	for {
   776  		select {
   777  		case <-timeout:
   778  			c.Fatalf("timeout while waiting for authorised ssh keys to change")
   779  		case <-time.After(coretesting.ShortWait):
   780  			keys, err := ssh.ListKeys(authenticationworker.SSHUser, ssh.FullKeys)
   781  			c.Assert(err, gc.IsNil)
   782  			keysStr := strings.Join(keys, "\n")
   783  			if sshKeyWithCommentPrefix != keysStr {
   784  				continue
   785  			}
   786  			return
   787  		}
   788  	}
   789  }
   790  
   791  // opRecvTimeout waits for any of the given kinds of operation to
   792  // be received from ops, and times out if not.
   793  func opRecvTimeout(c *gc.C, st *state.State, opc <-chan dummy.Operation, kinds ...dummy.Operation) dummy.Operation {
   794  	st.StartSync()
   795  	for {
   796  		select {
   797  		case op := <-opc:
   798  			for _, k := range kinds {
   799  				if reflect.TypeOf(op) == reflect.TypeOf(k) {
   800  					return op
   801  				}
   802  			}
   803  			c.Logf("discarding unknown event %#v", op)
   804  		case <-time.After(15 * time.Second):
   805  			c.Fatalf("time out wating for operation")
   806  		}
   807  	}
   808  }
   809  
   810  func (s *MachineSuite) TestOpenStateFailsForJobHostUnitsButOpenAPIWorks(c *gc.C) {
   811  	m, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   812  	s.testOpenAPIState(c, m, s.newAgent(c, m), initialMachinePassword)
   813  	s.assertJobWithAPI(c, state.JobHostUnits, func(conf agent.Config, st *api.State) {
   814  		s.assertCannotOpenState(c, conf.Tag(), conf.DataDir())
   815  	})
   816  }
   817  
   818  func (s *MachineSuite) TestOpenStateWorksForJobManageEnviron(c *gc.C) {
   819  	s.assertJobWithAPI(c, state.JobManageEnviron, func(conf agent.Config, st *api.State) {
   820  		s.assertCanOpenState(c, conf.Tag(), conf.DataDir())
   821  	})
   822  }
   823  
   824  func (s *MachineSuite) TestMachineAgentSymlinkJujuRun(c *gc.C) {
   825  	_, err := os.Stat(jujuRun)
   826  	c.Assert(err, jc.Satisfies, os.IsNotExist)
   827  	s.assertJobWithAPI(c, state.JobManageEnviron, func(conf agent.Config, st *api.State) {
   828  		// juju-run should have been created
   829  		_, err := os.Stat(jujuRun)
   830  		c.Assert(err, gc.IsNil)
   831  	})
   832  }
   833  
   834  func (s *MachineSuite) TestMachineAgentSymlinkJujuRunExists(c *gc.C) {
   835  	err := os.Symlink("/nowhere/special", jujuRun)
   836  	c.Assert(err, gc.IsNil)
   837  	_, err = os.Stat(jujuRun)
   838  	c.Assert(err, jc.Satisfies, os.IsNotExist)
   839  	s.assertJobWithAPI(c, state.JobManageEnviron, func(conf agent.Config, st *api.State) {
   840  		// juju-run should have been recreated
   841  		_, err := os.Stat(jujuRun)
   842  		c.Assert(err, gc.IsNil)
   843  		link, err := os.Readlink(jujuRun)
   844  		c.Assert(err, gc.IsNil)
   845  		c.Assert(link, gc.Not(gc.Equals), "/nowhere/special")
   846  	})
   847  }
   848  
   849  func (s *MachineSuite) TestMachineEnvironWorker(c *gc.C) {
   850  	proxyDir := c.MkDir()
   851  	s.agentSuite.PatchValue(&machineenvironmentworker.ProxyDirectory, proxyDir)
   852  	s.agentSuite.PatchValue(&apt.ConfFile, filepath.Join(proxyDir, "juju-apt-proxy"))
   853  
   854  	s.primeAgent(c, version.Current, state.JobHostUnits)
   855  	// Make sure there are some proxy settings to write.
   856  	proxySettings := proxy.Settings{
   857  		Http:  "http proxy",
   858  		Https: "https proxy",
   859  		Ftp:   "ftp proxy",
   860  	}
   861  
   862  	updateAttrs := config.ProxyConfigMap(proxySettings)
   863  
   864  	err := s.State.UpdateEnvironConfig(updateAttrs, nil, nil)
   865  	c.Assert(err, gc.IsNil)
   866  
   867  	s.assertJobWithAPI(c, state.JobHostUnits, func(conf agent.Config, st *api.State) {
   868  		for {
   869  			select {
   870  			case <-time.After(coretesting.LongWait):
   871  				c.Fatalf("timeout while waiting for proxy settings to change")
   872  			case <-time.After(10 * time.Millisecond):
   873  				_, err := os.Stat(apt.ConfFile)
   874  				if os.IsNotExist(err) {
   875  					continue
   876  				}
   877  				c.Assert(err, gc.IsNil)
   878  				return
   879  			}
   880  		}
   881  	})
   882  }
   883  
   884  func (s *MachineSuite) TestMachineAgentUninstall(c *gc.C) {
   885  	m, ac, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   886  	err := m.EnsureDead()
   887  	c.Assert(err, gc.IsNil)
   888  	a := s.newAgent(c, m)
   889  	err = runWithTimeout(a)
   890  	c.Assert(err, gc.IsNil)
   891  	// juju-run should have been removed on termination
   892  	_, err = os.Stat(jujuRun)
   893  	c.Assert(err, jc.Satisfies, os.IsNotExist)
   894  	// data-dir should have been removed on termination
   895  	_, err = os.Stat(ac.DataDir())
   896  	c.Assert(err, jc.Satisfies, os.IsNotExist)
   897  }
   898  
   899  func (s *MachineSuite) TestMachineAgentRsyslogManageEnviron(c *gc.C) {
   900  	s.testMachineAgentRsyslogConfigWorker(c, state.JobManageEnviron, rsyslog.RsyslogModeAccumulate)
   901  }
   902  
   903  func (s *MachineSuite) TestMachineAgentRsyslogHostUnits(c *gc.C) {
   904  	s.testMachineAgentRsyslogConfigWorker(c, state.JobHostUnits, rsyslog.RsyslogModeForwarding)
   905  }
   906  
   907  func (s *MachineSuite) testMachineAgentRsyslogConfigWorker(c *gc.C, job state.MachineJob, expectedMode rsyslog.RsyslogMode) {
   908  	created := make(chan rsyslog.RsyslogMode, 1)
   909  	s.agentSuite.PatchValue(&newRsyslogConfigWorker, func(_ *apirsyslog.State, _ agent.Config, mode rsyslog.RsyslogMode) (worker.Worker, error) {
   910  		created <- mode
   911  		return newDummyWorker(), nil
   912  	})
   913  	s.assertJobWithAPI(c, job, func(conf agent.Config, st *api.State) {
   914  		select {
   915  		case <-time.After(coretesting.LongWait):
   916  			c.Fatalf("timeout while waiting for rsyslog worker to be created")
   917  		case mode := <-created:
   918  			c.Assert(mode, gc.Equals, expectedMode)
   919  		}
   920  	})
   921  }
   922  
   923  func (s *MachineSuite) TestMachineAgentRunsAPIAddressUpdaterWorker(c *gc.C) {
   924  	// Start the machine agent.
   925  	m, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   926  	a := s.newAgent(c, m)
   927  	go func() { c.Check(a.Run(nil), gc.IsNil) }()
   928  	defer func() { c.Check(a.Stop(), gc.IsNil) }()
   929  
   930  	// Update the API addresses.
   931  	updatedServers := [][]instance.HostPort{instance.AddressesWithPort(
   932  		instance.NewAddresses("localhost"), 1234,
   933  	)}
   934  	err := s.BackingState.SetAPIHostPorts(updatedServers)
   935  	c.Assert(err, gc.IsNil)
   936  
   937  	// Wait for config to be updated.
   938  	s.BackingState.StartSync()
   939  	for attempt := coretesting.LongAttempt.Start(); attempt.Next(); {
   940  		addrs, err := a.CurrentConfig().APIAddresses()
   941  		c.Assert(err, gc.IsNil)
   942  		if reflect.DeepEqual(addrs, []string{"localhost:1234"}) {
   943  			return
   944  		}
   945  	}
   946  	c.Fatalf("timeout while waiting for agent config to change")
   947  }
   948  
   949  func (s *MachineSuite) TestMachineAgentUpgradeMongo(c *gc.C) {
   950  	m, agentConfig, _ := s.primeAgent(c, version.Current, state.JobManageEnviron)
   951  	agentConfig.SetUpgradedToVersion(version.MustParse("1.18.0"))
   952  	err := agentConfig.Write()
   953  	c.Assert(err, gc.IsNil)
   954  	err = s.State.MongoSession().DB("admin").RemoveUser(m.Tag())
   955  	c.Assert(err, gc.IsNil)
   956  
   957  	s.agentSuite.PatchValue(&ensureMongoAdminUser, func(p mongo.EnsureAdminUserParams) (bool, error) {
   958  		err := s.State.MongoSession().DB("admin").AddUser(p.User, p.Password, false)
   959  		c.Assert(err, gc.IsNil)
   960  		return true, nil
   961  	})
   962  
   963  	stateOpened := make(chan eitherState, 1)
   964  	s.agentSuite.PatchValue(&reportOpenedState, func(st eitherState) {
   965  		select {
   966  		case stateOpened <- st:
   967  		default:
   968  		}
   969  	})
   970  
   971  	// Start the machine agent, and wait for state to be opened.
   972  	a := s.newAgent(c, m)
   973  	done := make(chan error)
   974  	go func() { done <- a.Run(nil) }()
   975  	defer a.Stop() // in case of failure
   976  	select {
   977  	case st := <-stateOpened:
   978  		c.Assert(st, gc.NotNil)
   979  	case <-time.After(coretesting.LongWait):
   980  		c.Fatalf("state not opened")
   981  	}
   982  	s.waitStopped(c, state.JobManageEnviron, a, done)
   983  	c.Assert(s.fakeEnsureMongo.ensureCount, gc.Equals, 1)
   984  	c.Assert(s.fakeEnsureMongo.initiateCount, gc.Equals, 1)
   985  }
   986  
   987  // MachineWithCharmsSuite provides infrastructure for tests which need to
   988  // work with charms.
   989  type MachineWithCharmsSuite struct {
   990  	commonMachineSuite
   991  	charmtesting.CharmSuite
   992  
   993  	machine *state.Machine
   994  }
   995  
   996  var _ = gc.Suite(&MachineWithCharmsSuite{})
   997  
   998  func (s *MachineWithCharmsSuite) SetUpSuite(c *gc.C) {
   999  	s.commonMachineSuite.SetUpSuite(c)
  1000  	s.CharmSuite.SetUpSuite(c, &s.commonMachineSuite.JujuConnSuite)
  1001  }
  1002  
  1003  func (s *MachineWithCharmsSuite) TearDownSuite(c *gc.C) {
  1004  	s.commonMachineSuite.TearDownSuite(c)
  1005  	s.CharmSuite.TearDownSuite(c)
  1006  }
  1007  
  1008  func (s *MachineWithCharmsSuite) SetUpTest(c *gc.C) {
  1009  	s.commonMachineSuite.SetUpTest(c)
  1010  	s.CharmSuite.SetUpTest(c)
  1011  }
  1012  
  1013  func (s *MachineWithCharmsSuite) TearDownTest(c *gc.C) {
  1014  	s.commonMachineSuite.TearDownTest(c)
  1015  	s.CharmSuite.TearDownTest(c)
  1016  }
  1017  
  1018  func (s *MachineWithCharmsSuite) TestManageEnvironRunsCharmRevisionUpdater(c *gc.C) {
  1019  	m, _, _ := s.primeAgent(c, version.Current, state.JobManageEnviron)
  1020  
  1021  	s.SetupScenario(c)
  1022  
  1023  	a := s.newAgent(c, m)
  1024  	go func() {
  1025  		c.Check(a.Run(nil), gc.IsNil)
  1026  	}()
  1027  	defer func() { c.Check(a.Stop(), gc.IsNil) }()
  1028  
  1029  	checkRevision := func() bool {
  1030  		curl := charm.MustParseURL("cs:quantal/mysql")
  1031  		placeholder, err := s.State.LatestPlaceholderCharm(curl)
  1032  		return err == nil && placeholder.String() == curl.WithRevision(23).String()
  1033  	}
  1034  	success := false
  1035  	for attempt := coretesting.LongAttempt.Start(); attempt.Next(); {
  1036  		if success = checkRevision(); success {
  1037  			break
  1038  		}
  1039  	}
  1040  	c.Assert(success, gc.Equals, true)
  1041  }
  1042  
  1043  type singularRunnerRecord struct {
  1044  	mu             sync.Mutex
  1045  	startedWorkers set.Strings
  1046  }
  1047  
  1048  func (r *singularRunnerRecord) newSingularRunner(runner worker.Runner, conn singular.Conn) (worker.Runner, error) {
  1049  	sr, err := singular.New(runner, conn)
  1050  	if err != nil {
  1051  		return nil, err
  1052  	}
  1053  	return &fakeSingularRunner{
  1054  		Runner: sr,
  1055  		record: r,
  1056  	}, nil
  1057  }
  1058  
  1059  // started returns the names of all singular-started workers.
  1060  func (r *singularRunnerRecord) started() []string {
  1061  	return r.startedWorkers.SortedValues()
  1062  }
  1063  
  1064  type fakeSingularRunner struct {
  1065  	worker.Runner
  1066  	record *singularRunnerRecord
  1067  }
  1068  
  1069  func (r *fakeSingularRunner) StartWorker(name string, start func() (worker.Worker, error)) error {
  1070  	r.record.mu.Lock()
  1071  	defer r.record.mu.Unlock()
  1072  	r.record.startedWorkers.Add(name)
  1073  	return r.Runner.StartWorker(name, start)
  1074  }
  1075  
  1076  func newDummyWorker() worker.Worker {
  1077  	return worker.NewSimpleWorker(func(stop <-chan struct{}) error {
  1078  		<-stop
  1079  		return nil
  1080  	})
  1081  }