github.com/rogpeppe/juju@v0.0.0-20140613142852-6337964b789e/cmd/jujud/machine_test.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package main
     5  
     6  import (
     7  	"io/ioutil"
     8  	"os"
     9  	"path/filepath"
    10  	"reflect"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/juju/charm"
    16  	"github.com/juju/cmd"
    17  	"github.com/juju/errors"
    18  	"github.com/juju/names"
    19  	jc "github.com/juju/testing/checkers"
    20  	"github.com/juju/utils/apt"
    21  	"github.com/juju/utils/proxy"
    22  	"github.com/juju/utils/set"
    23  	gc "launchpad.net/gocheck"
    24  
    25  	"github.com/juju/juju/agent"
    26  	lxctesting "github.com/juju/juju/container/lxc/testing"
    27  	"github.com/juju/juju/environs/config"
    28  	envtesting "github.com/juju/juju/environs/testing"
    29  	"github.com/juju/juju/instance"
    30  	"github.com/juju/juju/juju"
    31  	jujutesting "github.com/juju/juju/juju/testing"
    32  	"github.com/juju/juju/mongo"
    33  	"github.com/juju/juju/network"
    34  	"github.com/juju/juju/provider/dummy"
    35  	"github.com/juju/juju/state"
    36  	"github.com/juju/juju/state/api"
    37  	apideployer "github.com/juju/juju/state/api/deployer"
    38  	"github.com/juju/juju/state/api/params"
    39  	apirsyslog "github.com/juju/juju/state/api/rsyslog"
    40  	charmtesting "github.com/juju/juju/state/apiserver/charmrevisionupdater/testing"
    41  	"github.com/juju/juju/state/watcher"
    42  	coretesting "github.com/juju/juju/testing"
    43  	"github.com/juju/juju/tools"
    44  	"github.com/juju/juju/upstart"
    45  	"github.com/juju/juju/utils/ssh"
    46  	sshtesting "github.com/juju/juju/utils/ssh/testing"
    47  	"github.com/juju/juju/version"
    48  	"github.com/juju/juju/worker"
    49  	"github.com/juju/juju/worker/authenticationworker"
    50  	"github.com/juju/juju/worker/deployer"
    51  	"github.com/juju/juju/worker/instancepoller"
    52  	"github.com/juju/juju/worker/machineenvironmentworker"
    53  	"github.com/juju/juju/worker/rsyslog"
    54  	"github.com/juju/juju/worker/singular"
    55  	"github.com/juju/juju/worker/upgrader"
    56  )
    57  
    58  type commonMachineSuite struct {
    59  	agentSuite
    60  	singularRecord *singularRunnerRecord
    61  	lxctesting.TestSuite
    62  	fakeEnsureMongo fakeEnsure
    63  }
    64  
    65  func (s *commonMachineSuite) SetUpSuite(c *gc.C) {
    66  	s.agentSuite.SetUpSuite(c)
    67  	s.TestSuite.SetUpSuite(c)
    68  	s.agentSuite.PatchValue(&charm.CacheDir, c.MkDir())
    69  }
    70  
    71  func (s *commonMachineSuite) TearDownSuite(c *gc.C) {
    72  	s.TestSuite.TearDownSuite(c)
    73  	s.agentSuite.TearDownSuite(c)
    74  }
    75  
    76  func (s *commonMachineSuite) SetUpTest(c *gc.C) {
    77  	s.agentSuite.SetUpTest(c)
    78  	s.TestSuite.SetUpTest(c)
    79  
    80  	os.Remove(jujuRun) // ignore error; may not exist
    81  	// Patch ssh user to avoid touching ~ubuntu/.ssh/authorized_keys.
    82  	s.agentSuite.PatchValue(&authenticationworker.SSHUser, "")
    83  
    84  	testpath := c.MkDir()
    85  	s.agentSuite.PatchEnvPathPrepend(testpath)
    86  	// mock out the start method so we can fake install services without sudo
    87  	fakeCmd(filepath.Join(testpath, "start"))
    88  	fakeCmd(filepath.Join(testpath, "stop"))
    89  
    90  	s.agentSuite.PatchValue(&upstart.InitDir, c.MkDir())
    91  
    92  	s.singularRecord = &singularRunnerRecord{}
    93  	s.agentSuite.PatchValue(&newSingularRunner, s.singularRecord.newSingularRunner)
    94  	s.agentSuite.PatchValue(&peergrouperNew, func(st *state.State) (worker.Worker, error) {
    95  		return newDummyWorker(), nil
    96  	})
    97  
    98  	s.fakeEnsureMongo = fakeEnsure{}
    99  	s.agentSuite.PatchValue(&ensureMongoServer, s.fakeEnsureMongo.fakeEnsureMongo)
   100  	s.agentSuite.PatchValue(&maybeInitiateMongoServer, s.fakeEnsureMongo.fakeInitiateMongo)
   101  }
   102  
   103  func fakeCmd(path string) {
   104  	err := ioutil.WriteFile(path, []byte("#!/bin/bash --norc\nexit 0"), 0755)
   105  	if err != nil {
   106  		panic(err)
   107  	}
   108  }
   109  
   110  func (s *commonMachineSuite) TearDownTest(c *gc.C) {
   111  	s.TestSuite.TearDownTest(c)
   112  	s.agentSuite.TearDownTest(c)
   113  }
   114  
   115  // primeAgent adds a new Machine to run the given jobs, and sets up the
   116  // machine agent's directory.  It returns the new machine, the
   117  // agent's configuration and the tools currently running.
   118  func (s *commonMachineSuite) primeAgent(
   119  	c *gc.C, vers version.Binary,
   120  	jobs ...state.MachineJob) (m *state.Machine, config agent.ConfigSetterWriter, tools *tools.Tools) {
   121  
   122  	// Add a machine and ensure it is provisioned.
   123  	m, err := s.State.AddMachine("quantal", jobs...)
   124  	c.Assert(err, gc.IsNil)
   125  	inst, md := jujutesting.AssertStartInstance(c, s.Conn.Environ, m.Id())
   126  	c.Assert(m.SetProvisioned(inst.Id(), state.BootstrapNonce, md), gc.IsNil)
   127  
   128  	// Add an address for the tests in case the maybeInitiateMongoServer
   129  	// codepath is exercised.
   130  	s.setFakeMachineAddresses(c, m)
   131  
   132  	// Set up the new machine.
   133  	err = m.SetAgentVersion(vers)
   134  	c.Assert(err, gc.IsNil)
   135  	err = m.SetPassword(initialMachinePassword)
   136  	c.Assert(err, gc.IsNil)
   137  	tag := names.NewMachineTag(m.Id()).String()
   138  	if m.IsManager() {
   139  		err = m.SetMongoPassword(initialMachinePassword)
   140  		c.Assert(err, gc.IsNil)
   141  		config, tools = s.agentSuite.primeStateAgent(c, tag, initialMachinePassword, vers)
   142  		info, ok := config.StateServingInfo()
   143  		c.Assert(ok, jc.IsTrue)
   144  		err = s.State.SetStateServingInfo(info)
   145  		c.Assert(err, gc.IsNil)
   146  	} else {
   147  		config, tools = s.agentSuite.primeAgent(c, tag, initialMachinePassword, vers)
   148  	}
   149  	err = config.Write()
   150  	c.Assert(err, gc.IsNil)
   151  	return m, config, tools
   152  }
   153  
   154  // newAgent returns a new MachineAgent instance
   155  func (s *commonMachineSuite) newAgent(c *gc.C, m *state.Machine) *MachineAgent {
   156  	a := &MachineAgent{}
   157  	s.initAgent(c, a, "--machine-id", m.Id())
   158  	err := a.ReadConfig(m.Tag())
   159  	c.Assert(err, gc.IsNil)
   160  	return a
   161  }
   162  
   163  func (s *MachineSuite) TestParseSuccess(c *gc.C) {
   164  	create := func() (cmd.Command, *AgentConf) {
   165  		a := &MachineAgent{}
   166  		return a, &a.AgentConf
   167  	}
   168  	a := CheckAgentCommand(c, create, []string{"--machine-id", "42"})
   169  	c.Assert(a.(*MachineAgent).MachineId, gc.Equals, "42")
   170  }
   171  
   172  type MachineSuite struct {
   173  	commonMachineSuite
   174  }
   175  
   176  var _ = gc.Suite(&MachineSuite{})
   177  
   178  const initialMachinePassword = "machine-password-1234567890"
   179  
   180  func (s *MachineSuite) TestParseNonsense(c *gc.C) {
   181  	for _, args := range [][]string{
   182  		{},
   183  		{"--machine-id", "-4004"},
   184  	} {
   185  		err := ParseAgentCommand(&MachineAgent{}, args)
   186  		c.Assert(err, gc.ErrorMatches, "--machine-id option must be set, and expects a non-negative integer")
   187  	}
   188  }
   189  
   190  func (s *MachineSuite) TestParseUnknown(c *gc.C) {
   191  	a := &MachineAgent{}
   192  	err := ParseAgentCommand(a, []string{"--machine-id", "42", "blistering barnacles"})
   193  	c.Assert(err, gc.ErrorMatches, `unrecognized args: \["blistering barnacles"\]`)
   194  }
   195  
   196  func (s *MachineSuite) TestRunInvalidMachineId(c *gc.C) {
   197  	c.Skip("agents don't yet distinguish between temporary and permanent errors")
   198  	m, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   199  	err := s.newAgent(c, m).Run(nil)
   200  	c.Assert(err, gc.ErrorMatches, "some error")
   201  }
   202  
   203  func (s *MachineSuite) TestRunStop(c *gc.C) {
   204  	m, ac, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   205  	a := s.newAgent(c, m)
   206  	done := make(chan error)
   207  	go func() {
   208  		done <- a.Run(nil)
   209  	}()
   210  	err := a.Stop()
   211  	c.Assert(err, gc.IsNil)
   212  	c.Assert(<-done, gc.IsNil)
   213  	c.Assert(charm.CacheDir, gc.Equals, filepath.Join(ac.DataDir(), "charmcache"))
   214  }
   215  
   216  func (s *MachineSuite) TestWithDeadMachine(c *gc.C) {
   217  	m, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   218  	err := m.EnsureDead()
   219  	c.Assert(err, gc.IsNil)
   220  	a := s.newAgent(c, m)
   221  	err = runWithTimeout(a)
   222  	c.Assert(err, gc.IsNil)
   223  }
   224  
   225  func (s *MachineSuite) TestWithRemovedMachine(c *gc.C) {
   226  	m, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   227  	err := m.EnsureDead()
   228  	c.Assert(err, gc.IsNil)
   229  	err = m.Remove()
   230  	c.Assert(err, gc.IsNil)
   231  	a := s.newAgent(c, m)
   232  	err = runWithTimeout(a)
   233  	c.Assert(err, gc.IsNil)
   234  }
   235  
   236  func (s *MachineSuite) TestDyingMachine(c *gc.C) {
   237  	m, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   238  	a := s.newAgent(c, m)
   239  	done := make(chan error)
   240  	go func() {
   241  		done <- a.Run(nil)
   242  	}()
   243  	defer func() {
   244  		c.Check(a.Stop(), gc.IsNil)
   245  	}()
   246  	err := m.Destroy()
   247  	c.Assert(err, gc.IsNil)
   248  	select {
   249  	case err := <-done:
   250  		c.Assert(err, gc.IsNil)
   251  	case <-time.After(watcher.Period * 5 / 4):
   252  		// TODO(rog) Fix this so it doesn't wait for so long.
   253  		// https://bugs.github.com/juju/juju/+bug/1163983
   254  		c.Fatalf("timed out waiting for agent to terminate")
   255  	}
   256  	err = m.Refresh()
   257  	c.Assert(err, gc.IsNil)
   258  	c.Assert(m.Life(), gc.Equals, state.Dead)
   259  }
   260  
   261  func (s *MachineSuite) TestHostUnits(c *gc.C) {
   262  	m, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   263  	a := s.newAgent(c, m)
   264  	ctx, reset := patchDeployContext(c, s.BackingState)
   265  	defer reset()
   266  	go func() { c.Check(a.Run(nil), gc.IsNil) }()
   267  	defer func() { c.Check(a.Stop(), gc.IsNil) }()
   268  
   269  	// check that unassigned units don't trigger any deployments.
   270  	svc := s.AddTestingService(c, "wordpress", s.AddTestingCharm(c, "wordpress"))
   271  	u0, err := svc.AddUnit()
   272  	c.Assert(err, gc.IsNil)
   273  	u1, err := svc.AddUnit()
   274  	c.Assert(err, gc.IsNil)
   275  
   276  	ctx.waitDeployed(c)
   277  
   278  	// assign u0, check it's deployed.
   279  	err = u0.AssignToMachine(m)
   280  	c.Assert(err, gc.IsNil)
   281  	ctx.waitDeployed(c, u0.Name())
   282  
   283  	// "start the agent" for u0 to prevent short-circuited remove-on-destroy;
   284  	// check that it's kept deployed despite being Dying.
   285  	err = u0.SetStatus(params.StatusStarted, "", nil)
   286  	c.Assert(err, gc.IsNil)
   287  	err = u0.Destroy()
   288  	c.Assert(err, gc.IsNil)
   289  	ctx.waitDeployed(c, u0.Name())
   290  
   291  	// add u1 to the machine, check it's deployed.
   292  	err = u1.AssignToMachine(m)
   293  	c.Assert(err, gc.IsNil)
   294  	ctx.waitDeployed(c, u0.Name(), u1.Name())
   295  
   296  	// make u0 dead; check the deployer recalls the unit and removes it from
   297  	// state.
   298  	err = u0.EnsureDead()
   299  	c.Assert(err, gc.IsNil)
   300  	ctx.waitDeployed(c, u1.Name())
   301  
   302  	// The deployer actually removes the unit just after
   303  	// removing its deployment, so we need to poll here
   304  	// until it actually happens.
   305  	for attempt := coretesting.LongAttempt.Start(); attempt.Next(); {
   306  		err := u0.Refresh()
   307  		if err == nil && attempt.HasNext() {
   308  			continue
   309  		}
   310  		c.Assert(err, jc.Satisfies, errors.IsNotFound)
   311  	}
   312  
   313  	// short-circuit-remove u1 after it's been deployed; check it's recalled
   314  	// and removed from state.
   315  	err = u1.Destroy()
   316  	c.Assert(err, gc.IsNil)
   317  	err = u1.Refresh()
   318  	c.Assert(err, jc.Satisfies, errors.IsNotFound)
   319  	ctx.waitDeployed(c)
   320  }
   321  
   322  func patchDeployContext(c *gc.C, st *state.State) (*fakeContext, func()) {
   323  	ctx := &fakeContext{
   324  		inited: make(chan struct{}),
   325  	}
   326  	orig := newDeployContext
   327  	newDeployContext = func(dst *apideployer.State, agentConfig agent.Config) deployer.Context {
   328  		ctx.st = st
   329  		ctx.agentConfig = agentConfig
   330  		close(ctx.inited)
   331  		return ctx
   332  	}
   333  	return ctx, func() { newDeployContext = orig }
   334  }
   335  
   336  func (s *commonMachineSuite) setFakeMachineAddresses(c *gc.C, machine *state.Machine) {
   337  	addrs := []network.Address{
   338  		network.NewAddress("0.1.2.3", network.ScopeUnknown),
   339  	}
   340  	err := machine.SetAddresses(addrs...)
   341  	c.Assert(err, gc.IsNil)
   342  	// Set the addresses in the environ instance as well so that if the instance poller
   343  	// runs it won't overwrite them.
   344  	instId, err := machine.InstanceId()
   345  	c.Assert(err, gc.IsNil)
   346  	insts, err := s.Conn.Environ.Instances([]instance.Id{instId})
   347  	c.Assert(err, gc.IsNil)
   348  	dummy.SetInstanceAddresses(insts[0], addrs)
   349  }
   350  
   351  func (s *MachineSuite) TestManageEnviron(c *gc.C) {
   352  	usefulVersion := version.Current
   353  	usefulVersion.Series = "quantal" // to match the charm created below
   354  	envtesting.AssertUploadFakeToolsVersions(c, s.Conn.Environ.Storage(), usefulVersion)
   355  	m, _, _ := s.primeAgent(c, version.Current, state.JobManageEnviron)
   356  	op := make(chan dummy.Operation, 200)
   357  	dummy.Listen(op)
   358  
   359  	a := s.newAgent(c, m)
   360  	// Make sure the agent is stopped even if the test fails.
   361  	defer a.Stop()
   362  	done := make(chan error)
   363  	go func() {
   364  		done <- a.Run(nil)
   365  	}()
   366  
   367  	// Check that the provisioner and firewaller are alive by doing
   368  	// a rudimentary check that it responds to state changes.
   369  
   370  	// Add one unit to a service; it should get allocated a machine
   371  	// and then its ports should be opened.
   372  	charm := s.AddTestingCharm(c, "dummy")
   373  	svc := s.AddTestingService(c, "test-service", charm)
   374  	err := svc.SetExposed()
   375  	c.Assert(err, gc.IsNil)
   376  	units, err := juju.AddUnits(s.State, svc, 1, "")
   377  	c.Assert(err, gc.IsNil)
   378  	c.Check(opRecvTimeout(c, s.State, op, dummy.OpStartInstance{}), gc.NotNil)
   379  
   380  	// Wait for the instance id to show up in the state.
   381  	s.waitProvisioned(c, units[0])
   382  	err = units[0].OpenPort("tcp", 999)
   383  	c.Assert(err, gc.IsNil)
   384  
   385  	c.Check(opRecvTimeout(c, s.State, op, dummy.OpOpenPorts{}), gc.NotNil)
   386  
   387  	err = a.Stop()
   388  	c.Assert(err, gc.IsNil)
   389  
   390  	select {
   391  	case err := <-done:
   392  		c.Assert(err, gc.IsNil)
   393  	case <-time.After(5 * time.Second):
   394  		c.Fatalf("timed out waiting for agent to terminate")
   395  	}
   396  
   397  	c.Assert(s.singularRecord.started(), jc.DeepEquals, []string{
   398  		"charm-revision-updater",
   399  		"cleaner",
   400  		"environ-provisioner",
   401  		"firewaller",
   402  		"minunitsworker",
   403  		"resumer",
   404  	})
   405  }
   406  
   407  func (s *MachineSuite) TestManageEnvironRunsInstancePoller(c *gc.C) {
   408  	s.agentSuite.PatchValue(&instancepoller.ShortPoll, 500*time.Millisecond)
   409  	usefulVersion := version.Current
   410  	usefulVersion.Series = "quantal" // to match the charm created below
   411  	envtesting.AssertUploadFakeToolsVersions(c, s.Conn.Environ.Storage(), usefulVersion)
   412  	m, _, _ := s.primeAgent(c, version.Current, state.JobManageEnviron)
   413  	a := s.newAgent(c, m)
   414  	defer a.Stop()
   415  	go func() {
   416  		c.Check(a.Run(nil), gc.IsNil)
   417  	}()
   418  
   419  	// Add one unit to a service;
   420  	charm := s.AddTestingCharm(c, "dummy")
   421  	svc := s.AddTestingService(c, "test-service", charm)
   422  	units, err := juju.AddUnits(s.State, svc, 1, "")
   423  	c.Assert(err, gc.IsNil)
   424  
   425  	m, instId := s.waitProvisioned(c, units[0])
   426  	insts, err := s.Conn.Environ.Instances([]instance.Id{instId})
   427  	c.Assert(err, gc.IsNil)
   428  	addrs := []network.Address{network.NewAddress("1.2.3.4", network.ScopeUnknown)}
   429  	dummy.SetInstanceAddresses(insts[0], addrs)
   430  	dummy.SetInstanceStatus(insts[0], "running")
   431  
   432  	for a := coretesting.LongAttempt.Start(); a.Next(); {
   433  		if !a.HasNext() {
   434  			c.Logf("final machine addresses: %#v", m.Addresses())
   435  			c.Fatalf("timed out waiting for machine to get address")
   436  		}
   437  		err := m.Refresh()
   438  		c.Assert(err, gc.IsNil)
   439  		instStatus, err := m.InstanceStatus()
   440  		c.Assert(err, gc.IsNil)
   441  		if reflect.DeepEqual(m.Addresses(), addrs) && instStatus == "running" {
   442  			break
   443  		}
   444  	}
   445  }
   446  
   447  func (s *MachineSuite) TestManageEnvironRunsPeergrouper(c *gc.C) {
   448  	started := make(chan struct{}, 1)
   449  	s.agentSuite.PatchValue(&peergrouperNew, func(st *state.State) (worker.Worker, error) {
   450  		c.Check(st, gc.NotNil)
   451  		select {
   452  		case started <- struct{}{}:
   453  		default:
   454  		}
   455  		return newDummyWorker(), nil
   456  	})
   457  	m, _, _ := s.primeAgent(c, version.Current, state.JobManageEnviron)
   458  	a := s.newAgent(c, m)
   459  	defer a.Stop()
   460  	go func() {
   461  		c.Check(a.Run(nil), gc.IsNil)
   462  	}()
   463  	select {
   464  	case <-started:
   465  	case <-time.After(coretesting.LongWait):
   466  		c.Fatalf("timed out waiting for peergrouper worker to be started")
   467  	}
   468  }
   469  
   470  func (s *MachineSuite) TestEnsureLocalEnvironDoesntRunPeergrouper(c *gc.C) {
   471  	started := make(chan struct{}, 1)
   472  	s.agentSuite.PatchValue(&peergrouperNew, func(st *state.State) (worker.Worker, error) {
   473  		c.Check(st, gc.NotNil)
   474  		select {
   475  		case started <- struct{}{}:
   476  		default:
   477  		}
   478  		return newDummyWorker(), nil
   479  	})
   480  	m, _, _ := s.primeAgent(c, version.Current, state.JobManageEnviron)
   481  	a := s.newAgent(c, m)
   482  	err := a.ChangeConfig(func(config agent.ConfigSetter) {
   483  		config.SetValue(agent.ProviderType, "local")
   484  	})
   485  	c.Assert(err, gc.IsNil)
   486  	defer func() { c.Check(a.Stop(), gc.IsNil) }()
   487  	go func() {
   488  		c.Check(a.Run(nil), gc.IsNil)
   489  	}()
   490  	select {
   491  	case <-started:
   492  		c.Fatalf("local environment should not start peergrouper")
   493  	case <-time.After(coretesting.ShortWait):
   494  	}
   495  }
   496  
   497  func (s *MachineSuite) TestManageEnvironCallsUseMultipleCPUs(c *gc.C) {
   498  	// If it has been enabled, the JobManageEnviron agent should call utils.UseMultipleCPUs
   499  	usefulVersion := version.Current
   500  	usefulVersion.Series = "quantal"
   501  	envtesting.AssertUploadFakeToolsVersions(c, s.Conn.Environ.Storage(), usefulVersion)
   502  	m, _, _ := s.primeAgent(c, version.Current, state.JobManageEnviron)
   503  	calledChan := make(chan struct{}, 1)
   504  	s.agentSuite.PatchValue(&useMultipleCPUs, func() { calledChan <- struct{}{} })
   505  	// Now, start the agent, and observe that a JobManageEnviron agent
   506  	// calls UseMultipleCPUs
   507  	a := s.newAgent(c, m)
   508  	defer a.Stop()
   509  	go func() {
   510  		c.Check(a.Run(nil), gc.IsNil)
   511  	}()
   512  	// Wait for configuration to be finished
   513  	<-a.WorkersStarted()
   514  	select {
   515  	case <-calledChan:
   516  	case <-time.After(coretesting.LongWait):
   517  		c.Errorf("we failed to call UseMultipleCPUs()")
   518  	}
   519  	c.Check(a.Stop(), gc.IsNil)
   520  	// However, an agent that just JobHostUnits doesn't call UseMultipleCPUs
   521  	m2, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   522  	a2 := s.newAgent(c, m2)
   523  	defer a2.Stop()
   524  	go func() {
   525  		c.Check(a2.Run(nil), gc.IsNil)
   526  	}()
   527  	// Wait until all the workers have been started, and then kill everything
   528  	<-a2.workersStarted
   529  	c.Check(a2.Stop(), gc.IsNil)
   530  	select {
   531  	case <-calledChan:
   532  		c.Errorf("we should not have called UseMultipleCPUs()")
   533  	case <-time.After(coretesting.ShortWait):
   534  	}
   535  }
   536  
   537  func (s *MachineSuite) waitProvisioned(c *gc.C, unit *state.Unit) (*state.Machine, instance.Id) {
   538  	c.Logf("waiting for unit %q to be provisioned", unit)
   539  	machineId, err := unit.AssignedMachineId()
   540  	c.Assert(err, gc.IsNil)
   541  	m, err := s.State.Machine(machineId)
   542  	c.Assert(err, gc.IsNil)
   543  	w := m.Watch()
   544  	defer w.Stop()
   545  	timeout := time.After(coretesting.LongWait)
   546  	for {
   547  		select {
   548  		case <-timeout:
   549  			c.Fatalf("timed out waiting for provisioning")
   550  		case _, ok := <-w.Changes():
   551  			c.Assert(ok, jc.IsTrue)
   552  			err := m.Refresh()
   553  			c.Assert(err, gc.IsNil)
   554  			if instId, err := m.InstanceId(); err == nil {
   555  				c.Logf("unit provisioned with instance %s", instId)
   556  				return m, instId
   557  			} else {
   558  				c.Check(err, jc.Satisfies, state.IsNotProvisionedError)
   559  			}
   560  		}
   561  	}
   562  	panic("watcher died")
   563  }
   564  
   565  func (s *MachineSuite) testUpgradeRequest(c *gc.C, agent runner, tag string, currentTools *tools.Tools) {
   566  	newVers := version.Current
   567  	newVers.Patch++
   568  	newTools := envtesting.AssertUploadFakeToolsVersions(c, s.Conn.Environ.Storage(), newVers)[0]
   569  	err := s.State.SetEnvironAgentVersion(newVers.Number)
   570  	c.Assert(err, gc.IsNil)
   571  	err = runWithTimeout(agent)
   572  	envtesting.CheckUpgraderReadyError(c, err, &upgrader.UpgradeReadyError{
   573  		AgentName: tag,
   574  		OldTools:  currentTools.Version,
   575  		NewTools:  newTools.Version,
   576  		DataDir:   s.DataDir(),
   577  	})
   578  }
   579  
   580  func (s *MachineSuite) TestUpgradeRequest(c *gc.C) {
   581  	m, _, currentTools := s.primeAgent(c, version.Current, state.JobManageEnviron, state.JobHostUnits)
   582  	a := s.newAgent(c, m)
   583  	s.testUpgradeRequest(c, a, m.Tag(), currentTools)
   584  }
   585  
   586  var fastDialOpts = api.DialOpts{
   587  	Timeout:    coretesting.LongWait,
   588  	RetryDelay: coretesting.ShortWait,
   589  }
   590  
   591  func (s *MachineSuite) waitStopped(c *gc.C, job state.MachineJob, a *MachineAgent, done chan error) {
   592  	err := a.Stop()
   593  	if job == state.JobManageEnviron {
   594  		// When shutting down, the API server can be shut down before
   595  		// the other workers that connect to it, so they get an error so
   596  		// they then die, causing Stop to return an error.  It's not
   597  		// easy to control the actual error that's received in this
   598  		// circumstance so we just log it rather than asserting that it
   599  		// is not nil.
   600  		if err != nil {
   601  			c.Logf("error shutting down state manager: %v", err)
   602  		}
   603  	} else {
   604  		c.Assert(err, gc.IsNil)
   605  	}
   606  
   607  	select {
   608  	case err := <-done:
   609  		c.Assert(err, gc.IsNil)
   610  	case <-time.After(5 * time.Second):
   611  		c.Fatalf("timed out waiting for agent to terminate")
   612  	}
   613  }
   614  
   615  func (s *MachineSuite) assertJobWithAPI(
   616  	c *gc.C,
   617  	job state.MachineJob,
   618  	test func(agent.Config, *api.State),
   619  ) {
   620  	s.assertAgentOpensState(c, &reportOpenedAPI, job, func(cfg agent.Config, st eitherState) {
   621  		test(cfg, st.(*api.State))
   622  	})
   623  }
   624  
   625  func (s *MachineSuite) assertJobWithState(
   626  	c *gc.C,
   627  	job state.MachineJob,
   628  	test func(agent.Config, *state.State),
   629  ) {
   630  	paramsJob := job.ToParams()
   631  	if !paramsJob.NeedsState() {
   632  		c.Fatalf("%v does not use state", paramsJob)
   633  	}
   634  	s.assertAgentOpensState(c, &reportOpenedState, job, func(cfg agent.Config, st eitherState) {
   635  		test(cfg, st.(*state.State))
   636  	})
   637  }
   638  
   639  // assertAgentOpensState asserts that a machine agent started with the
   640  // given job will call the function pointed to by reportOpened. The
   641  // agent's configuration and the value passed to reportOpened are then
   642  // passed to the test function for further checking.
   643  func (s *MachineSuite) assertAgentOpensState(
   644  	c *gc.C,
   645  	reportOpened *func(eitherState),
   646  	job state.MachineJob,
   647  	test func(agent.Config, eitherState),
   648  ) {
   649  	stm, conf, _ := s.primeAgent(c, version.Current, job)
   650  	a := s.newAgent(c, stm)
   651  	defer a.Stop()
   652  
   653  	// All state jobs currently also run an APIWorker, so no
   654  	// need to check for that here, like in assertJobWithState.
   655  
   656  	agentAPIs := make(chan eitherState, 1)
   657  	s.agentSuite.PatchValue(reportOpened, func(st eitherState) {
   658  		select {
   659  		case agentAPIs <- st:
   660  		default:
   661  		}
   662  	})
   663  
   664  	done := make(chan error)
   665  	go func() {
   666  		done <- a.Run(nil)
   667  	}()
   668  
   669  	select {
   670  	case agentAPI := <-agentAPIs:
   671  		c.Assert(agentAPI, gc.NotNil)
   672  		test(conf, agentAPI)
   673  	case <-time.After(coretesting.LongWait):
   674  		c.Fatalf("API not opened")
   675  	}
   676  
   677  	s.waitStopped(c, job, a, done)
   678  }
   679  
   680  func (s *MachineSuite) TestManageEnvironServesAPI(c *gc.C) {
   681  	s.assertJobWithState(c, state.JobManageEnviron, func(conf agent.Config, agentState *state.State) {
   682  		st, err := api.Open(conf.APIInfo(), fastDialOpts)
   683  		c.Assert(err, gc.IsNil)
   684  		defer st.Close()
   685  		m, err := st.Machiner().Machine(conf.Tag())
   686  		c.Assert(err, gc.IsNil)
   687  		c.Assert(m.Life(), gc.Equals, params.Alive)
   688  	})
   689  }
   690  
   691  func (s *MachineSuite) TestManageEnvironRunsCleaner(c *gc.C) {
   692  	s.assertJobWithState(c, state.JobManageEnviron, func(conf agent.Config, agentState *state.State) {
   693  		// Create a service and unit, and destroy the service.
   694  		service := s.AddTestingService(c, "wordpress", s.AddTestingCharm(c, "wordpress"))
   695  		unit, err := service.AddUnit()
   696  		c.Assert(err, gc.IsNil)
   697  		err = service.Destroy()
   698  		c.Assert(err, gc.IsNil)
   699  
   700  		// Check the unit was not yet removed.
   701  		err = unit.Refresh()
   702  		c.Assert(err, gc.IsNil)
   703  		w := unit.Watch()
   704  		defer w.Stop()
   705  
   706  		// Trigger a sync on the state used by the agent, and wait
   707  		// for the unit to be removed.
   708  		agentState.StartSync()
   709  		timeout := time.After(coretesting.LongWait)
   710  		for done := false; !done; {
   711  			select {
   712  			case <-timeout:
   713  				c.Fatalf("unit not cleaned up")
   714  			case <-time.After(coretesting.ShortWait):
   715  				s.State.StartSync()
   716  			case <-w.Changes():
   717  				err := unit.Refresh()
   718  				if errors.IsNotFound(err) {
   719  					done = true
   720  				} else {
   721  					c.Assert(err, gc.IsNil)
   722  				}
   723  			}
   724  		}
   725  	})
   726  }
   727  
   728  func (s *MachineSuite) TestJobManageEnvironRunsMinUnitsWorker(c *gc.C) {
   729  	s.assertJobWithState(c, state.JobManageEnviron, func(conf agent.Config, agentState *state.State) {
   730  		// Ensure that the MinUnits worker is alive by doing a simple check
   731  		// that it responds to state changes: add a service, set its minimum
   732  		// number of units to one, wait for the worker to add the missing unit.
   733  		service := s.AddTestingService(c, "wordpress", s.AddTestingCharm(c, "wordpress"))
   734  		err := service.SetMinUnits(1)
   735  		c.Assert(err, gc.IsNil)
   736  		w := service.Watch()
   737  		defer w.Stop()
   738  
   739  		// Trigger a sync on the state used by the agent, and wait for the unit
   740  		// to be created.
   741  		agentState.StartSync()
   742  		timeout := time.After(coretesting.LongWait)
   743  		for {
   744  			select {
   745  			case <-timeout:
   746  				c.Fatalf("unit not created")
   747  			case <-time.After(coretesting.ShortWait):
   748  				s.State.StartSync()
   749  			case <-w.Changes():
   750  				units, err := service.AllUnits()
   751  				c.Assert(err, gc.IsNil)
   752  				if len(units) == 1 {
   753  					return
   754  				}
   755  			}
   756  		}
   757  	})
   758  }
   759  
   760  func (s *MachineSuite) TestMachineAgentRunsAuthorisedKeysWorker(c *gc.C) {
   761  	// Start the machine agent.
   762  	m, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   763  	a := s.newAgent(c, m)
   764  	go func() { c.Check(a.Run(nil), gc.IsNil) }()
   765  	defer func() { c.Check(a.Stop(), gc.IsNil) }()
   766  
   767  	// Update the keys in the environment.
   768  	sshKey := sshtesting.ValidKeyOne.Key + " user@host"
   769  	err := s.BackingState.UpdateEnvironConfig(map[string]interface{}{"authorized-keys": sshKey}, nil, nil)
   770  	c.Assert(err, gc.IsNil)
   771  
   772  	// Wait for ssh keys file to be updated.
   773  	s.State.StartSync()
   774  	timeout := time.After(coretesting.LongWait)
   775  	sshKeyWithCommentPrefix := sshtesting.ValidKeyOne.Key + " Juju:user@host"
   776  	for {
   777  		select {
   778  		case <-timeout:
   779  			c.Fatalf("timeout while waiting for authorised ssh keys to change")
   780  		case <-time.After(coretesting.ShortWait):
   781  			keys, err := ssh.ListKeys(authenticationworker.SSHUser, ssh.FullKeys)
   782  			c.Assert(err, gc.IsNil)
   783  			keysStr := strings.Join(keys, "\n")
   784  			if sshKeyWithCommentPrefix != keysStr {
   785  				continue
   786  			}
   787  			return
   788  		}
   789  	}
   790  }
   791  
   792  // opRecvTimeout waits for any of the given kinds of operation to
   793  // be received from ops, and times out if not.
   794  func opRecvTimeout(c *gc.C, st *state.State, opc <-chan dummy.Operation, kinds ...dummy.Operation) dummy.Operation {
   795  	st.StartSync()
   796  	for {
   797  		select {
   798  		case op := <-opc:
   799  			for _, k := range kinds {
   800  				if reflect.TypeOf(op) == reflect.TypeOf(k) {
   801  					return op
   802  				}
   803  			}
   804  			c.Logf("discarding unknown event %#v", op)
   805  		case <-time.After(15 * time.Second):
   806  			c.Fatalf("time out wating for operation")
   807  		}
   808  	}
   809  }
   810  
   811  func (s *MachineSuite) TestOpenStateFailsForJobHostUnitsButOpenAPIWorks(c *gc.C) {
   812  	m, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   813  	s.testOpenAPIState(c, m, s.newAgent(c, m), initialMachinePassword)
   814  	s.assertJobWithAPI(c, state.JobHostUnits, func(conf agent.Config, st *api.State) {
   815  		s.assertCannotOpenState(c, conf.Tag(), conf.DataDir())
   816  	})
   817  }
   818  
   819  func (s *MachineSuite) TestOpenStateWorksForJobManageEnviron(c *gc.C) {
   820  	s.assertJobWithAPI(c, state.JobManageEnviron, func(conf agent.Config, st *api.State) {
   821  		s.assertCanOpenState(c, conf.Tag(), conf.DataDir())
   822  	})
   823  }
   824  
   825  func (s *MachineSuite) TestMachineAgentSymlinkJujuRun(c *gc.C) {
   826  	_, err := os.Stat(jujuRun)
   827  	c.Assert(err, jc.Satisfies, os.IsNotExist)
   828  	s.assertJobWithAPI(c, state.JobManageEnviron, func(conf agent.Config, st *api.State) {
   829  		// juju-run should have been created
   830  		_, err := os.Stat(jujuRun)
   831  		c.Assert(err, gc.IsNil)
   832  	})
   833  }
   834  
   835  func (s *MachineSuite) TestMachineAgentSymlinkJujuRunExists(c *gc.C) {
   836  	err := os.Symlink("/nowhere/special", jujuRun)
   837  	c.Assert(err, gc.IsNil)
   838  	_, err = os.Stat(jujuRun)
   839  	c.Assert(err, jc.Satisfies, os.IsNotExist)
   840  	s.assertJobWithAPI(c, state.JobManageEnviron, func(conf agent.Config, st *api.State) {
   841  		// juju-run should have been recreated
   842  		_, err := os.Stat(jujuRun)
   843  		c.Assert(err, gc.IsNil)
   844  		link, err := os.Readlink(jujuRun)
   845  		c.Assert(err, gc.IsNil)
   846  		c.Assert(link, gc.Not(gc.Equals), "/nowhere/special")
   847  	})
   848  }
   849  
   850  func (s *MachineSuite) TestMachineEnvironWorker(c *gc.C) {
   851  	proxyDir := c.MkDir()
   852  	s.agentSuite.PatchValue(&machineenvironmentworker.ProxyDirectory, proxyDir)
   853  	s.agentSuite.PatchValue(&apt.ConfFile, filepath.Join(proxyDir, "juju-apt-proxy"))
   854  
   855  	s.primeAgent(c, version.Current, state.JobHostUnits)
   856  	// Make sure there are some proxy settings to write.
   857  	proxySettings := proxy.Settings{
   858  		Http:  "http proxy",
   859  		Https: "https proxy",
   860  		Ftp:   "ftp proxy",
   861  	}
   862  
   863  	updateAttrs := config.ProxyConfigMap(proxySettings)
   864  
   865  	err := s.State.UpdateEnvironConfig(updateAttrs, nil, nil)
   866  	c.Assert(err, gc.IsNil)
   867  
   868  	s.assertJobWithAPI(c, state.JobHostUnits, func(conf agent.Config, st *api.State) {
   869  		for {
   870  			select {
   871  			case <-time.After(coretesting.LongWait):
   872  				c.Fatalf("timeout while waiting for proxy settings to change")
   873  			case <-time.After(10 * time.Millisecond):
   874  				_, err := os.Stat(apt.ConfFile)
   875  				if os.IsNotExist(err) {
   876  					continue
   877  				}
   878  				c.Assert(err, gc.IsNil)
   879  				return
   880  			}
   881  		}
   882  	})
   883  }
   884  
   885  func (s *MachineSuite) TestMachineAgentUninstall(c *gc.C) {
   886  	m, ac, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   887  	err := m.EnsureDead()
   888  	c.Assert(err, gc.IsNil)
   889  	a := s.newAgent(c, m)
   890  	err = runWithTimeout(a)
   891  	c.Assert(err, gc.IsNil)
   892  	// juju-run should have been removed on termination
   893  	_, err = os.Stat(jujuRun)
   894  	c.Assert(err, jc.Satisfies, os.IsNotExist)
   895  	// data-dir should have been removed on termination
   896  	_, err = os.Stat(ac.DataDir())
   897  	c.Assert(err, jc.Satisfies, os.IsNotExist)
   898  }
   899  
   900  func (s *MachineSuite) TestMachineAgentRsyslogManageEnviron(c *gc.C) {
   901  	s.testMachineAgentRsyslogConfigWorker(c, state.JobManageEnviron, rsyslog.RsyslogModeAccumulate)
   902  }
   903  
   904  func (s *MachineSuite) TestMachineAgentRsyslogHostUnits(c *gc.C) {
   905  	s.testMachineAgentRsyslogConfigWorker(c, state.JobHostUnits, rsyslog.RsyslogModeForwarding)
   906  }
   907  
   908  func (s *MachineSuite) testMachineAgentRsyslogConfigWorker(c *gc.C, job state.MachineJob, expectedMode rsyslog.RsyslogMode) {
   909  	created := make(chan rsyslog.RsyslogMode, 1)
   910  	s.agentSuite.PatchValue(&newRsyslogConfigWorker, func(_ *apirsyslog.State, _ agent.Config, mode rsyslog.RsyslogMode) (worker.Worker, error) {
   911  		created <- mode
   912  		return newDummyWorker(), nil
   913  	})
   914  	s.assertJobWithAPI(c, job, func(conf agent.Config, st *api.State) {
   915  		select {
   916  		case <-time.After(coretesting.LongWait):
   917  			c.Fatalf("timeout while waiting for rsyslog worker to be created")
   918  		case mode := <-created:
   919  			c.Assert(mode, gc.Equals, expectedMode)
   920  		}
   921  	})
   922  }
   923  
   924  func (s *MachineSuite) TestMachineAgentRunsAPIAddressUpdaterWorker(c *gc.C) {
   925  	// Start the machine agent.
   926  	m, _, _ := s.primeAgent(c, version.Current, state.JobHostUnits)
   927  	a := s.newAgent(c, m)
   928  	go func() { c.Check(a.Run(nil), gc.IsNil) }()
   929  	defer func() { c.Check(a.Stop(), gc.IsNil) }()
   930  
   931  	// Update the API addresses.
   932  	updatedServers := [][]network.HostPort{network.AddressesWithPort(
   933  		network.NewAddresses("localhost"), 1234,
   934  	)}
   935  	err := s.BackingState.SetAPIHostPorts(updatedServers)
   936  	c.Assert(err, gc.IsNil)
   937  
   938  	// Wait for config to be updated.
   939  	s.BackingState.StartSync()
   940  	for attempt := coretesting.LongAttempt.Start(); attempt.Next(); {
   941  		addrs, err := a.CurrentConfig().APIAddresses()
   942  		c.Assert(err, gc.IsNil)
   943  		if reflect.DeepEqual(addrs, []string{"localhost:1234"}) {
   944  			return
   945  		}
   946  	}
   947  	c.Fatalf("timeout while waiting for agent config to change")
   948  }
   949  
   950  func (s *MachineSuite) TestMachineAgentUpgradeMongo(c *gc.C) {
   951  	m, agentConfig, _ := s.primeAgent(c, version.Current, state.JobManageEnviron)
   952  	agentConfig.SetUpgradedToVersion(version.MustParse("1.18.0"))
   953  	err := agentConfig.Write()
   954  	c.Assert(err, gc.IsNil)
   955  	err = s.State.MongoSession().DB("admin").RemoveUser(m.Tag())
   956  	c.Assert(err, gc.IsNil)
   957  
   958  	s.agentSuite.PatchValue(&ensureMongoAdminUser, func(p mongo.EnsureAdminUserParams) (bool, error) {
   959  		err := s.State.MongoSession().DB("admin").AddUser(p.User, p.Password, false)
   960  		c.Assert(err, gc.IsNil)
   961  		return true, nil
   962  	})
   963  
   964  	stateOpened := make(chan eitherState, 1)
   965  	s.agentSuite.PatchValue(&reportOpenedState, func(st eitherState) {
   966  		select {
   967  		case stateOpened <- st:
   968  		default:
   969  		}
   970  	})
   971  
   972  	// Start the machine agent, and wait for state to be opened.
   973  	a := s.newAgent(c, m)
   974  	done := make(chan error)
   975  	go func() { done <- a.Run(nil) }()
   976  	defer a.Stop() // in case of failure
   977  	select {
   978  	case st := <-stateOpened:
   979  		c.Assert(st, gc.NotNil)
   980  	case <-time.After(coretesting.LongWait):
   981  		c.Fatalf("state not opened")
   982  	}
   983  	s.waitStopped(c, state.JobManageEnviron, a, done)
   984  	c.Assert(s.fakeEnsureMongo.ensureCount, gc.Equals, 1)
   985  	c.Assert(s.fakeEnsureMongo.initiateCount, gc.Equals, 1)
   986  }
   987  
   988  // MachineWithCharmsSuite provides infrastructure for tests which need to
   989  // work with charms.
   990  type MachineWithCharmsSuite struct {
   991  	commonMachineSuite
   992  	charmtesting.CharmSuite
   993  
   994  	machine *state.Machine
   995  }
   996  
   997  var _ = gc.Suite(&MachineWithCharmsSuite{})
   998  
   999  func (s *MachineWithCharmsSuite) SetUpSuite(c *gc.C) {
  1000  	s.commonMachineSuite.SetUpSuite(c)
  1001  	s.CharmSuite.SetUpSuite(c, &s.commonMachineSuite.JujuConnSuite)
  1002  }
  1003  
  1004  func (s *MachineWithCharmsSuite) TearDownSuite(c *gc.C) {
  1005  	s.commonMachineSuite.TearDownSuite(c)
  1006  	s.CharmSuite.TearDownSuite(c)
  1007  }
  1008  
  1009  func (s *MachineWithCharmsSuite) SetUpTest(c *gc.C) {
  1010  	s.commonMachineSuite.SetUpTest(c)
  1011  	s.CharmSuite.SetUpTest(c)
  1012  }
  1013  
  1014  func (s *MachineWithCharmsSuite) TearDownTest(c *gc.C) {
  1015  	s.commonMachineSuite.TearDownTest(c)
  1016  	s.CharmSuite.TearDownTest(c)
  1017  }
  1018  
  1019  func (s *MachineWithCharmsSuite) TestManageEnvironRunsCharmRevisionUpdater(c *gc.C) {
  1020  	m, _, _ := s.primeAgent(c, version.Current, state.JobManageEnviron)
  1021  
  1022  	s.SetupScenario(c)
  1023  
  1024  	a := s.newAgent(c, m)
  1025  	go func() {
  1026  		c.Check(a.Run(nil), gc.IsNil)
  1027  	}()
  1028  	defer func() { c.Check(a.Stop(), gc.IsNil) }()
  1029  
  1030  	checkRevision := func() bool {
  1031  		curl := charm.MustParseURL("cs:quantal/mysql")
  1032  		placeholder, err := s.State.LatestPlaceholderCharm(curl)
  1033  		return err == nil && placeholder.String() == curl.WithRevision(23).String()
  1034  	}
  1035  	success := false
  1036  	for attempt := coretesting.LongAttempt.Start(); attempt.Next(); {
  1037  		if success = checkRevision(); success {
  1038  			break
  1039  		}
  1040  	}
  1041  	c.Assert(success, gc.Equals, true)
  1042  }
  1043  
  1044  type singularRunnerRecord struct {
  1045  	mu             sync.Mutex
  1046  	startedWorkers set.Strings
  1047  }
  1048  
  1049  func (r *singularRunnerRecord) newSingularRunner(runner worker.Runner, conn singular.Conn) (worker.Runner, error) {
  1050  	sr, err := singular.New(runner, conn)
  1051  	if err != nil {
  1052  		return nil, err
  1053  	}
  1054  	return &fakeSingularRunner{
  1055  		Runner: sr,
  1056  		record: r,
  1057  	}, nil
  1058  }
  1059  
  1060  // started returns the names of all singular-started workers.
  1061  func (r *singularRunnerRecord) started() []string {
  1062  	return r.startedWorkers.SortedValues()
  1063  }
  1064  
  1065  type fakeSingularRunner struct {
  1066  	worker.Runner
  1067  	record *singularRunnerRecord
  1068  }
  1069  
  1070  func (r *fakeSingularRunner) StartWorker(name string, start func() (worker.Worker, error)) error {
  1071  	r.record.mu.Lock()
  1072  	defer r.record.mu.Unlock()
  1073  	r.record.startedWorkers.Add(name)
  1074  	return r.Runner.StartWorker(name, start)
  1075  }
  1076  
  1077  func newDummyWorker() worker.Worker {
  1078  	return worker.NewSimpleWorker(func(stop <-chan struct{}) error {
  1079  		<-stop
  1080  		return nil
  1081  	})
  1082  }