github.com/axw/juju@v0.0.0-20161005053422-4bd6544d08d4/cmd/jujud/agent/machine_test.go (about)

     1  // Copyright 2012-2016 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package agent
     5  
     6  import (
     7  	"io/ioutil"
     8  	"os"
     9  	"path/filepath"
    10  	"reflect"
    11  	"runtime"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/juju/cmd"
    16  	"github.com/juju/cmd/cmdtesting"
    17  	"github.com/juju/errors"
    18  	jc "github.com/juju/testing/checkers"
    19  	"github.com/juju/utils"
    20  	"github.com/juju/utils/arch"
    21  	"github.com/juju/utils/series"
    22  	"github.com/juju/utils/set"
    23  	"github.com/juju/utils/ssh"
    24  	sshtesting "github.com/juju/utils/ssh/testing"
    25  	"github.com/juju/utils/symlink"
    26  	"github.com/juju/version"
    27  	gc "gopkg.in/check.v1"
    28  	"gopkg.in/juju/charmrepo.v2-unstable"
    29  	"gopkg.in/juju/names.v2"
    30  	"gopkg.in/natefinch/lumberjack.v2"
    31  	"gopkg.in/tomb.v1"
    32  
    33  	"github.com/juju/juju/agent"
    34  	"github.com/juju/juju/api"
    35  	"github.com/juju/juju/api/imagemetadata"
    36  	apimachiner "github.com/juju/juju/api/machiner"
    37  	"github.com/juju/juju/apiserver/params"
    38  	"github.com/juju/juju/cert"
    39  	"github.com/juju/juju/cmd/jujud/agent/model"
    40  	"github.com/juju/juju/core/migration"
    41  	"github.com/juju/juju/environs"
    42  	envtesting "github.com/juju/juju/environs/testing"
    43  	"github.com/juju/juju/instance"
    44  	"github.com/juju/juju/juju"
    45  	"github.com/juju/juju/network"
    46  	"github.com/juju/juju/provider/dummy"
    47  	"github.com/juju/juju/state"
    48  	"github.com/juju/juju/state/watcher"
    49  	"github.com/juju/juju/status"
    50  	"github.com/juju/juju/storage"
    51  	coretesting "github.com/juju/juju/testing"
    52  	"github.com/juju/juju/tools"
    53  	jujuversion "github.com/juju/juju/version"
    54  	"github.com/juju/juju/worker"
    55  	"github.com/juju/juju/worker/authenticationworker"
    56  	"github.com/juju/juju/worker/certupdater"
    57  	"github.com/juju/juju/worker/dependency"
    58  	"github.com/juju/juju/worker/diskmanager"
    59  	"github.com/juju/juju/worker/instancepoller"
    60  	"github.com/juju/juju/worker/machiner"
    61  	"github.com/juju/juju/worker/migrationmaster"
    62  	"github.com/juju/juju/worker/mongoupgrader"
    63  	"github.com/juju/juju/worker/storageprovisioner"
    64  	"github.com/juju/juju/worker/upgrader"
    65  	"github.com/juju/juju/worker/workertest"
    66  )
    67  
    68  type MachineSuite struct {
    69  	commonMachineSuite
    70  }
    71  
    72  var _ = gc.Suite(&MachineSuite{})
    73  
    74  func (s *MachineSuite) SetUpTest(c *gc.C) {
    75  	s.commonMachineSuite.SetUpTest(c)
    76  	// Most of these tests normally finish sub-second on a fast machine.
    77  	// If any given test hits a minute, we have almost certainly become
    78  	// wedged, so dump the logs.
    79  	coretesting.DumpTestLogsAfter(time.Minute, c, s)
    80  }
    81  
    82  func (s *MachineSuite) TestParseNonsense(c *gc.C) {
    83  	for _, args := range [][]string{
    84  		{},
    85  		{"--machine-id", "-4004"},
    86  	} {
    87  		var agentConf agentConf
    88  		err := ParseAgentCommand(&machineAgentCmd{agentInitializer: &agentConf}, args)
    89  		c.Assert(err, gc.ErrorMatches, "--machine-id option must be set, and expects a non-negative integer")
    90  	}
    91  }
    92  
    93  func (s *MachineSuite) TestParseUnknown(c *gc.C) {
    94  	var agentConf agentConf
    95  	a := &machineAgentCmd{agentInitializer: &agentConf}
    96  	err := ParseAgentCommand(a, []string{"--machine-id", "42", "blistering barnacles"})
    97  	c.Assert(err, gc.ErrorMatches, `unrecognized args: \["blistering barnacles"\]`)
    98  }
    99  
   100  func (s *MachineSuite) TestParseSuccess(c *gc.C) {
   101  	create := func() (cmd.Command, AgentConf) {
   102  		agentConf := agentConf{dataDir: s.DataDir()}
   103  		a := NewMachineAgentCmd(
   104  			nil,
   105  			NewTestMachineAgentFactory(&agentConf, nil, c.MkDir()),
   106  			&agentConf,
   107  			&agentConf,
   108  		)
   109  		a.(*machineAgentCmd).logToStdErr = true
   110  
   111  		return a, &agentConf
   112  	}
   113  	a := CheckAgentCommand(c, create, []string{"--machine-id", "42"})
   114  	c.Assert(a.(*machineAgentCmd).machineId, gc.Equals, "42")
   115  }
   116  
   117  func (s *MachineSuite) TestRunInvalidMachineId(c *gc.C) {
   118  	c.Skip("agents don't yet distinguish between temporary and permanent errors")
   119  	m, _, _ := s.primeAgent(c, state.JobHostUnits)
   120  	err := s.newAgent(c, m).Run(nil)
   121  	c.Assert(err, gc.ErrorMatches, "some error")
   122  }
   123  
   124  func (s *MachineSuite) TestUseLumberjack(c *gc.C) {
   125  	ctx := cmdtesting.Context(c)
   126  	agentConf := FakeAgentConfig{}
   127  
   128  	a := NewMachineAgentCmd(
   129  		ctx,
   130  		NewTestMachineAgentFactory(&agentConf, nil, c.MkDir()),
   131  		agentConf,
   132  		agentConf,
   133  	)
   134  	// little hack to set the data that Init expects to already be set
   135  	a.(*machineAgentCmd).machineId = "42"
   136  
   137  	err := a.Init(nil)
   138  	c.Assert(err, gc.IsNil)
   139  
   140  	l, ok := ctx.Stderr.(*lumberjack.Logger)
   141  	c.Assert(ok, jc.IsTrue)
   142  	c.Check(l.MaxAge, gc.Equals, 0)
   143  	c.Check(l.MaxBackups, gc.Equals, 2)
   144  	c.Check(l.Filename, gc.Equals, filepath.FromSlash("/var/log/juju/machine-42.log"))
   145  	c.Check(l.MaxSize, gc.Equals, 300)
   146  }
   147  
   148  func (s *MachineSuite) TestDontUseLumberjack(c *gc.C) {
   149  	ctx := cmdtesting.Context(c)
   150  	agentConf := FakeAgentConfig{}
   151  
   152  	a := NewMachineAgentCmd(
   153  		ctx,
   154  		NewTestMachineAgentFactory(&agentConf, nil, c.MkDir()),
   155  		agentConf,
   156  		agentConf,
   157  	)
   158  	// little hack to set the data that Init expects to already be set
   159  	a.(*machineAgentCmd).machineId = "42"
   160  
   161  	// set the value that normally gets set by the flag parsing
   162  	a.(*machineAgentCmd).logToStdErr = true
   163  
   164  	err := a.Init(nil)
   165  	c.Assert(err, gc.IsNil)
   166  
   167  	_, ok := ctx.Stderr.(*lumberjack.Logger)
   168  	c.Assert(ok, jc.IsFalse)
   169  }
   170  
   171  func (s *MachineSuite) TestRunStop(c *gc.C) {
   172  	m, ac, _ := s.primeAgent(c, state.JobHostUnits)
   173  	a := s.newAgent(c, m)
   174  	done := make(chan error)
   175  	go func() {
   176  		done <- a.Run(nil)
   177  	}()
   178  	err := a.Stop()
   179  	c.Assert(err, jc.ErrorIsNil)
   180  	c.Assert(<-done, jc.ErrorIsNil)
   181  	c.Assert(charmrepo.CacheDir, gc.Equals, filepath.Join(ac.DataDir(), "charmcache"))
   182  }
   183  
   184  func (s *MachineSuite) TestWithDeadMachine(c *gc.C) {
   185  	m, ac, _ := s.primeAgent(c, state.JobHostUnits)
   186  	err := m.EnsureDead()
   187  	c.Assert(err, jc.ErrorIsNil)
   188  	a := s.newAgent(c, m)
   189  	err = runWithTimeout(a)
   190  	c.Assert(err, jc.ErrorIsNil)
   191  
   192  	_, err = os.Stat(ac.DataDir())
   193  	c.Assert(err, jc.Satisfies, os.IsNotExist)
   194  }
   195  
   196  func (s *MachineSuite) TestWithRemovedMachine(c *gc.C) {
   197  	m, ac, _ := s.primeAgent(c, state.JobHostUnits)
   198  	err := m.EnsureDead()
   199  	c.Assert(err, jc.ErrorIsNil)
   200  	err = m.Remove()
   201  	c.Assert(err, jc.ErrorIsNil)
   202  	a := s.newAgent(c, m)
   203  	err = runWithTimeout(a)
   204  	c.Assert(err, jc.ErrorIsNil)
   205  
   206  	_, err = os.Stat(ac.DataDir())
   207  	c.Assert(err, jc.Satisfies, os.IsNotExist)
   208  }
   209  
   210  func (s *MachineSuite) TestDyingMachine(c *gc.C) {
   211  	m, _, _ := s.primeAgent(c, state.JobHostUnits)
   212  	a := s.newAgent(c, m)
   213  	done := make(chan error)
   214  	go func() {
   215  		done <- a.Run(nil)
   216  	}()
   217  	defer func() {
   218  		c.Check(a.Stop(), jc.ErrorIsNil)
   219  	}()
   220  	// Wait for configuration to be finished
   221  	<-a.WorkersStarted()
   222  	err := m.Destroy()
   223  	c.Assert(err, jc.ErrorIsNil)
   224  	select {
   225  	case err := <-done:
   226  		c.Assert(err, jc.ErrorIsNil)
   227  	case <-time.After(watcher.Period * 5 / 4):
   228  		// TODO(rog) Fix this so it doesn't wait for so long.
   229  		// https://bugs.launchpad.net/juju-core/+bug/1163983
   230  		c.Fatalf("timed out waiting for agent to terminate")
   231  	}
   232  	err = m.Refresh()
   233  	c.Assert(err, jc.ErrorIsNil)
   234  	c.Assert(m.Life(), gc.Equals, state.Dead)
   235  }
   236  
   237  func (s *MachineSuite) TestHostUnits(c *gc.C) {
   238  	m, _, _ := s.primeAgent(c, state.JobHostUnits)
   239  	a := s.newAgent(c, m)
   240  	ctx, reset := patchDeployContext(c, s.BackingState)
   241  	defer reset()
   242  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
   243  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
   244  
   245  	// check that unassigned units don't trigger any deployments.
   246  	svc := s.AddTestingService(c, "wordpress", s.AddTestingCharm(c, "wordpress"))
   247  	u0, err := svc.AddUnit()
   248  	c.Assert(err, jc.ErrorIsNil)
   249  	u1, err := svc.AddUnit()
   250  	c.Assert(err, jc.ErrorIsNil)
   251  
   252  	ctx.waitDeployed(c)
   253  
   254  	// assign u0, check it's deployed.
   255  	err = u0.AssignToMachine(m)
   256  	c.Assert(err, jc.ErrorIsNil)
   257  	ctx.waitDeployed(c, u0.Name())
   258  
   259  	// "start the agent" for u0 to prevent short-circuited remove-on-destroy;
   260  	// check that it's kept deployed despite being Dying.
   261  	// lp:1558657
   262  	now := time.Now()
   263  	sInfo := status.StatusInfo{
   264  		Status:  status.Idle,
   265  		Message: "",
   266  		Since:   &now,
   267  	}
   268  	err = u0.SetAgentStatus(sInfo)
   269  	c.Assert(err, jc.ErrorIsNil)
   270  	err = u0.Destroy()
   271  	c.Assert(err, jc.ErrorIsNil)
   272  	ctx.waitDeployed(c, u0.Name())
   273  
   274  	// add u1 to the machine, check it's deployed.
   275  	err = u1.AssignToMachine(m)
   276  	c.Assert(err, jc.ErrorIsNil)
   277  	ctx.waitDeployed(c, u0.Name(), u1.Name())
   278  
   279  	// make u0 dead; check the deployer recalls the unit and removes it from
   280  	// state.
   281  	err = u0.EnsureDead()
   282  	c.Assert(err, jc.ErrorIsNil)
   283  	ctx.waitDeployed(c, u1.Name())
   284  
   285  	// The deployer actually removes the unit just after
   286  	// removing its deployment, so we need to poll here
   287  	// until it actually happens.
   288  	for attempt := coretesting.LongAttempt.Start(); attempt.Next(); {
   289  		if !attempt.HasNext() {
   290  			c.Fatalf("timeout waiting for unit %q to be removed", u0.Name())
   291  		}
   292  		if err := u0.Refresh(); err == nil {
   293  			c.Logf("waiting unit %q to be removed...", u0.Name())
   294  			continue
   295  		} else {
   296  			c.Assert(err, jc.Satisfies, errors.IsNotFound)
   297  			break
   298  		}
   299  	}
   300  
   301  	// short-circuit-remove u1 after it's been deployed; check it's recalled
   302  	// and removed from state.
   303  	err = u1.Destroy()
   304  	c.Assert(err, jc.ErrorIsNil)
   305  	err = u1.Refresh()
   306  	c.Assert(err, jc.Satisfies, errors.IsNotFound)
   307  	ctx.waitDeployed(c)
   308  }
   309  
   310  func (s *MachineSuite) TestManageModel(c *gc.C) {
   311  	usefulVersion := version.Binary{
   312  		Number: jujuversion.Current,
   313  		Arch:   arch.HostArch(),
   314  		Series: "quantal", // to match the charm created below
   315  	}
   316  	envtesting.AssertUploadFakeToolsVersions(c, s.DefaultToolsStorage, s.Environ.Config().AgentStream(), s.Environ.Config().AgentStream(), usefulVersion)
   317  	m, _, _ := s.primeAgent(c, state.JobManageModel)
   318  	op := make(chan dummy.Operation, 200)
   319  	dummy.Listen(op)
   320  
   321  	a := s.newAgent(c, m)
   322  	// Make sure the agent is stopped even if the test fails.
   323  	defer a.Stop()
   324  	done := make(chan error)
   325  	go func() {
   326  		done <- a.Run(nil)
   327  	}()
   328  	c.Logf("started test agent, waiting for workers...")
   329  	r0 := s.singularRecord.nextRunner(c)
   330  	r0.waitForWorker(c, "txnpruner")
   331  
   332  	// Check that the provisioner and firewaller are alive by doing
   333  	// a rudimentary check that it responds to state changes.
   334  
   335  	// Create an exposed service, and add a unit.
   336  	charm := s.AddTestingCharm(c, "dummy")
   337  	svc := s.AddTestingService(c, "test-service", charm)
   338  	err := svc.SetExposed()
   339  	c.Assert(err, jc.ErrorIsNil)
   340  	units, err := juju.AddUnits(s.State, svc, svc.Name(), 1, nil)
   341  	c.Assert(err, jc.ErrorIsNil)
   342  
   343  	// It should be allocated to a machine, which should then be provisioned.
   344  	c.Logf("service %q added with 1 unit, waiting for unit %q's machine to be started...", svc.Name(), units[0].Name())
   345  	c.Check(opRecvTimeout(c, s.State, op, dummy.OpStartInstance{}), gc.NotNil)
   346  	c.Logf("machine hosting unit %q started, waiting for the unit to be deployed...", units[0].Name())
   347  	s.waitProvisioned(c, units[0])
   348  
   349  	// Open a port on the unit; it should be handled by the firewaller.
   350  	c.Logf("unit %q deployed, opening port tcp/999...", units[0].Name())
   351  	err = units[0].OpenPort("tcp", 999)
   352  	c.Assert(err, jc.ErrorIsNil)
   353  	c.Check(opRecvTimeout(c, s.State, op, dummy.OpOpenPorts{}), gc.NotNil)
   354  	c.Logf("unit %q port tcp/999 opened, cleaning up...", units[0].Name())
   355  
   356  	err = a.Stop()
   357  	c.Assert(err, jc.ErrorIsNil)
   358  	select {
   359  	case err := <-done:
   360  		c.Assert(err, jc.ErrorIsNil)
   361  	case <-time.After(coretesting.LongWait):
   362  		c.Fatalf("timed out waiting for agent to terminate")
   363  	}
   364  	c.Logf("test agent stopped successfully.")
   365  }
   366  
   367  func (s *MachineSuite) TestManageModelRunsInstancePoller(c *gc.C) {
   368  	s.AgentSuite.PatchValue(&instancepoller.ShortPoll, 500*time.Millisecond)
   369  	usefulVersion := version.Binary{
   370  		Number: jujuversion.Current,
   371  		Arch:   arch.HostArch(),
   372  		Series: "quantal", // to match the charm created below
   373  	}
   374  	envtesting.AssertUploadFakeToolsVersions(
   375  		c, s.DefaultToolsStorage,
   376  		s.Environ.Config().AgentStream(),
   377  		s.Environ.Config().AgentStream(),
   378  		usefulVersion,
   379  	)
   380  	m, _, _ := s.primeAgent(c, state.JobManageModel)
   381  	a := s.newAgent(c, m)
   382  	defer a.Stop()
   383  	go func() {
   384  		c.Check(a.Run(nil), jc.ErrorIsNil)
   385  	}()
   386  
   387  	// Add one unit to a service;
   388  	charm := s.AddTestingCharm(c, "dummy")
   389  	svc := s.AddTestingService(c, "test-service", charm)
   390  	units, err := juju.AddUnits(s.State, svc, svc.Name(), 1, nil)
   391  	c.Assert(err, jc.ErrorIsNil)
   392  
   393  	m, instId := s.waitProvisioned(c, units[0])
   394  	insts, err := s.Environ.Instances([]instance.Id{instId})
   395  	c.Assert(err, jc.ErrorIsNil)
   396  	addrs := network.NewAddresses("1.2.3.4")
   397  	dummy.SetInstanceAddresses(insts[0], addrs)
   398  	dummy.SetInstanceStatus(insts[0], "running")
   399  
   400  	for attempt := coretesting.LongAttempt.Start(); attempt.Next(); {
   401  		if !attempt.HasNext() {
   402  			c.Logf("final machine addresses: %#v", m.Addresses())
   403  			c.Fatalf("timed out waiting for machine to get address")
   404  		}
   405  		err := m.Refresh()
   406  		c.Assert(err, jc.ErrorIsNil)
   407  		instStatus, err := m.InstanceStatus()
   408  		c.Assert(err, jc.ErrorIsNil)
   409  		c.Logf("found status is %q %q", instStatus.Status, instStatus.Message)
   410  		if reflect.DeepEqual(m.Addresses(), addrs) && instStatus.Message == "running" {
   411  			c.Logf("machine %q address updated: %+v", m.Id(), addrs)
   412  			break
   413  		}
   414  		c.Logf("waiting for machine %q address to be updated", m.Id())
   415  	}
   416  }
   417  
   418  func (s *MachineSuite) TestManageModelRunsPeergrouper(c *gc.C) {
   419  	started := newSignal()
   420  	s.AgentSuite.PatchValue(&peergrouperNew, func(st *state.State, _ bool) (worker.Worker, error) {
   421  		c.Check(st, gc.NotNil)
   422  		started.trigger()
   423  		return newDummyWorker(), nil
   424  	})
   425  	m, _, _ := s.primeAgent(c, state.JobManageModel)
   426  	a := s.newAgent(c, m)
   427  	defer a.Stop()
   428  	go func() {
   429  		c.Check(a.Run(nil), jc.ErrorIsNil)
   430  	}()
   431  	started.assertTriggered(c, "peergrouperworker to start")
   432  }
   433  
   434  func (s *MachineSuite) TestManageModelRunsDbLogPrunerIfFeatureFlagEnabled(c *gc.C) {
   435  	m, _, _ := s.primeAgent(c, state.JobManageModel)
   436  	a := s.newAgent(c, m)
   437  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
   438  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
   439  
   440  	runner := s.singularRecord.nextRunner(c)
   441  	runner.waitForWorker(c, "dblogpruner")
   442  }
   443  
   444  func (s *MachineSuite) TestManageModelCallsUseMultipleCPUs(c *gc.C) {
   445  	// If it has been enabled, the JobManageModel agent should call utils.UseMultipleCPUs
   446  	usefulVersion := version.Binary{
   447  		Number: jujuversion.Current,
   448  		Arch:   arch.HostArch(),
   449  		Series: "quantal", // to match the charm created below
   450  	}
   451  	envtesting.AssertUploadFakeToolsVersions(
   452  		c, s.DefaultToolsStorage, s.Environ.Config().AgentStream(), s.Environ.Config().AgentStream(), usefulVersion)
   453  	m, _, _ := s.primeAgent(c, state.JobManageModel)
   454  	calledChan := make(chan struct{}, 1)
   455  	s.AgentSuite.PatchValue(&useMultipleCPUs, func() { calledChan <- struct{}{} })
   456  	// Now, start the agent, and observe that a JobManageModel agent
   457  	// calls UseMultipleCPUs
   458  	a := s.newAgent(c, m)
   459  	defer a.Stop()
   460  	go func() {
   461  		c.Check(a.Run(nil), jc.ErrorIsNil)
   462  	}()
   463  	// Wait for configuration to be finished
   464  	<-a.WorkersStarted()
   465  	s.assertChannelActive(c, calledChan, "UseMultipleCPUs() to be called")
   466  
   467  	c.Check(a.Stop(), jc.ErrorIsNil)
   468  	// However, an agent that just JobHostUnits doesn't call UseMultipleCPUs
   469  	m2, _, _ := s.primeAgent(c, state.JobHostUnits)
   470  	a2 := s.newAgent(c, m2)
   471  	defer a2.Stop()
   472  	go func() {
   473  		c.Check(a2.Run(nil), jc.ErrorIsNil)
   474  	}()
   475  	// Wait until all the workers have been started, and then kill everything
   476  	<-a2.workersStarted
   477  	c.Check(a2.Stop(), jc.ErrorIsNil)
   478  	s.assertChannelInactive(c, calledChan, "UseMultipleCPUs() was called")
   479  }
   480  
   481  func (s *MachineSuite) waitProvisioned(c *gc.C, unit *state.Unit) (*state.Machine, instance.Id) {
   482  	c.Logf("waiting for unit %q to be provisioned", unit)
   483  	machineId, err := unit.AssignedMachineId()
   484  	c.Assert(err, jc.ErrorIsNil)
   485  	m, err := s.State.Machine(machineId)
   486  	c.Assert(err, jc.ErrorIsNil)
   487  	w := m.Watch()
   488  	defer worker.Stop(w)
   489  	timeout := time.After(coretesting.LongWait)
   490  	for {
   491  		select {
   492  		case <-timeout:
   493  			c.Fatalf("timed out waiting for provisioning")
   494  		case <-time.After(coretesting.ShortWait):
   495  			s.State.StartSync()
   496  		case _, ok := <-w.Changes():
   497  			c.Assert(ok, jc.IsTrue)
   498  			err := m.Refresh()
   499  			c.Assert(err, jc.ErrorIsNil)
   500  			if instId, err := m.InstanceId(); err == nil {
   501  				c.Logf("unit provisioned with instance %s", instId)
   502  				return m, instId
   503  			} else {
   504  				c.Check(err, jc.Satisfies, errors.IsNotProvisioned)
   505  			}
   506  		}
   507  	}
   508  }
   509  
   510  func (s *MachineSuite) testUpgradeRequest(c *gc.C, agent runner, tag string, currentTools *tools.Tools) {
   511  	newVers := version.Binary{
   512  		Number: jujuversion.Current,
   513  		Arch:   arch.HostArch(),
   514  		Series: series.HostSeries(),
   515  	}
   516  	newVers.Patch++
   517  	newTools := envtesting.AssertUploadFakeToolsVersions(
   518  		c, s.DefaultToolsStorage, s.Environ.Config().AgentStream(), s.Environ.Config().AgentStream(), newVers)[0]
   519  	err := s.State.SetModelAgentVersion(newVers.Number)
   520  	c.Assert(err, jc.ErrorIsNil)
   521  	err = runWithTimeout(agent)
   522  	envtesting.CheckUpgraderReadyError(c, err, &upgrader.UpgradeReadyError{
   523  		AgentName: tag,
   524  		OldTools:  currentTools.Version,
   525  		NewTools:  newTools.Version,
   526  		DataDir:   s.DataDir(),
   527  	})
   528  }
   529  
   530  func (s *MachineSuite) TestUpgradeRequest(c *gc.C) {
   531  	m, _, currentTools := s.primeAgent(c, state.JobManageModel, state.JobHostUnits)
   532  	a := s.newAgent(c, m)
   533  	s.testUpgradeRequest(c, a, m.Tag().String(), currentTools)
   534  	c.Assert(a.isInitialUpgradeCheckPending(), jc.IsTrue)
   535  }
   536  
   537  func (s *MachineSuite) TestNoUpgradeRequired(c *gc.C) {
   538  	m, _, _ := s.primeAgent(c, state.JobManageModel, state.JobHostUnits)
   539  	a := s.newAgent(c, m)
   540  	done := make(chan error)
   541  	go func() { done <- a.Run(nil) }()
   542  	select {
   543  	case <-a.initialUpgradeCheckComplete.Unlocked():
   544  	case <-time.After(coretesting.LongWait):
   545  		c.Fatalf("timeout waiting for upgrade check")
   546  	}
   547  	defer a.Stop() // in case of failure
   548  	s.waitStopped(c, state.JobManageModel, a, done)
   549  	c.Assert(a.isInitialUpgradeCheckPending(), jc.IsFalse)
   550  }
   551  
   552  func (s *MachineSuite) waitStopped(c *gc.C, job state.MachineJob, a *MachineAgent, done chan error) {
   553  	err := a.Stop()
   554  	if job == state.JobManageModel {
   555  		// When shutting down, the API server can be shut down before
   556  		// the other workers that connect to it, so they get an error so
   557  		// they then die, causing Stop to return an error.  It's not
   558  		// easy to control the actual error that's received in this
   559  		// circumstance so we just log it rather than asserting that it
   560  		// is not nil.
   561  		if err != nil {
   562  			c.Logf("error shutting down state manager: %v", err)
   563  		}
   564  	} else {
   565  		c.Assert(err, jc.ErrorIsNil)
   566  	}
   567  
   568  	select {
   569  	case err := <-done:
   570  		c.Assert(err, jc.ErrorIsNil)
   571  	case <-time.After(coretesting.LongWait):
   572  		c.Fatalf("timed out waiting for agent to terminate")
   573  	}
   574  }
   575  
   576  func (s *MachineSuite) assertJobWithState(
   577  	c *gc.C,
   578  	job state.MachineJob,
   579  	test func(agent.Config, *state.State),
   580  ) {
   581  	paramsJob := job.ToParams()
   582  	if !paramsJob.NeedsState() {
   583  		c.Fatalf("%v does not use state", paramsJob)
   584  	}
   585  	s.assertAgentOpensState(c, job, test)
   586  }
   587  
   588  // assertAgentOpensState asserts that a machine agent started with the
   589  // given job. The agent's configuration and the agent's state.State are
   590  // then passed to the test function for further checking.
   591  func (s *MachineSuite) assertAgentOpensState(c *gc.C, job state.MachineJob, test func(agent.Config, *state.State)) {
   592  	stm, conf, _ := s.primeAgent(c, job)
   593  	a := s.newAgent(c, stm)
   594  	defer a.Stop()
   595  	logger.Debugf("new agent %#v", a)
   596  
   597  	// All state jobs currently also run an APIWorker, so no
   598  	// need to check for that here, like in assertJobWithState.
   599  	st, done := s.waitForOpenState(c, a)
   600  	test(conf, st)
   601  	s.waitStopped(c, job, a, done)
   602  }
   603  
   604  func (s *MachineSuite) waitForOpenState(c *gc.C, a *MachineAgent) (*state.State, chan error) {
   605  	agentAPIs := make(chan *state.State, 1)
   606  	s.AgentSuite.PatchValue(&reportOpenedState, func(st *state.State) {
   607  		select {
   608  		case agentAPIs <- st:
   609  		default:
   610  		}
   611  	})
   612  
   613  	done := make(chan error)
   614  	go func() {
   615  		done <- a.Run(nil)
   616  	}()
   617  
   618  	select {
   619  	case agentAPI := <-agentAPIs:
   620  		c.Assert(agentAPI, gc.NotNil)
   621  		return agentAPI, done
   622  	case <-time.After(coretesting.LongWait):
   623  		c.Fatalf("API not opened")
   624  	}
   625  	panic("can't happen")
   626  }
   627  
   628  func (s *MachineSuite) TestManageModelServesAPI(c *gc.C) {
   629  	s.assertJobWithState(c, state.JobManageModel, func(conf agent.Config, agentState *state.State) {
   630  		apiInfo, ok := conf.APIInfo()
   631  		c.Assert(ok, jc.IsTrue)
   632  		st, err := api.Open(apiInfo, fastDialOpts)
   633  		c.Assert(err, jc.ErrorIsNil)
   634  		defer st.Close()
   635  		m, err := apimachiner.NewState(st).Machine(conf.Tag().(names.MachineTag))
   636  		c.Assert(err, jc.ErrorIsNil)
   637  		c.Assert(m.Life(), gc.Equals, params.Alive)
   638  	})
   639  }
   640  
   641  func (s *MachineSuite) assertAgentSetsToolsVersion(c *gc.C, job state.MachineJob) {
   642  	vers := version.Binary{
   643  		Number: jujuversion.Current,
   644  		Arch:   arch.HostArch(),
   645  		Series: series.HostSeries(),
   646  	}
   647  	vers.Minor++
   648  	m, _, _ := s.primeAgentVersion(c, vers, job)
   649  	a := s.newAgent(c, m)
   650  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
   651  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
   652  
   653  	timeout := time.After(coretesting.LongWait)
   654  	for done := false; !done; {
   655  		select {
   656  		case <-timeout:
   657  			c.Fatalf("timeout while waiting for agent version to be set")
   658  		case <-time.After(coretesting.ShortWait):
   659  			c.Log("Refreshing")
   660  			err := m.Refresh()
   661  			c.Assert(err, jc.ErrorIsNil)
   662  			c.Log("Fetching agent tools")
   663  			agentTools, err := m.AgentTools()
   664  			c.Assert(err, jc.ErrorIsNil)
   665  			c.Logf("(%v vs. %v)", agentTools.Version, jujuversion.Current)
   666  			if agentTools.Version.Minor != jujuversion.Current.Minor {
   667  				continue
   668  			}
   669  			c.Assert(agentTools.Version.Number, gc.DeepEquals, jujuversion.Current)
   670  			done = true
   671  		}
   672  	}
   673  }
   674  
   675  func (s *MachineSuite) TestAgentSetsToolsVersionManageModel(c *gc.C) {
   676  	s.assertAgentSetsToolsVersion(c, state.JobManageModel)
   677  }
   678  
   679  func (s *MachineSuite) TestAgentSetsToolsVersionHostUnits(c *gc.C) {
   680  	s.assertAgentSetsToolsVersion(c, state.JobHostUnits)
   681  }
   682  
   683  func (s *MachineSuite) TestManageModelRunsCleaner(c *gc.C) {
   684  	s.assertJobWithState(c, state.JobManageModel, func(conf agent.Config, agentState *state.State) {
   685  		// Create a service and unit, and destroy the service.
   686  		service := s.AddTestingService(c, "wordpress", s.AddTestingCharm(c, "wordpress"))
   687  		unit, err := service.AddUnit()
   688  		c.Assert(err, jc.ErrorIsNil)
   689  		err = service.Destroy()
   690  		c.Assert(err, jc.ErrorIsNil)
   691  
   692  		// Check the unit was not yet removed.
   693  		err = unit.Refresh()
   694  		c.Assert(err, jc.ErrorIsNil)
   695  		w := unit.Watch()
   696  		defer worker.Stop(w)
   697  
   698  		// Trigger a sync on the state used by the agent, and wait
   699  		// for the unit to be removed.
   700  		agentState.StartSync()
   701  		timeout := time.After(coretesting.LongWait)
   702  		for done := false; !done; {
   703  			select {
   704  			case <-timeout:
   705  				c.Fatalf("unit not cleaned up")
   706  			case <-time.After(coretesting.ShortWait):
   707  				s.State.StartSync()
   708  			case <-w.Changes():
   709  				err := unit.Refresh()
   710  				if errors.IsNotFound(err) {
   711  					done = true
   712  				} else {
   713  					c.Assert(err, jc.ErrorIsNil)
   714  				}
   715  			}
   716  		}
   717  	})
   718  }
   719  
   720  func (s *MachineSuite) TestJobManageModelRunsMinUnitsWorker(c *gc.C) {
   721  	s.assertJobWithState(c, state.JobManageModel, func(_ agent.Config, agentState *state.State) {
   722  		// Ensure that the MinUnits worker is alive by doing a simple check
   723  		// that it responds to state changes: add a service, set its minimum
   724  		// number of units to one, wait for the worker to add the missing unit.
   725  		service := s.AddTestingService(c, "wordpress", s.AddTestingCharm(c, "wordpress"))
   726  		err := service.SetMinUnits(1)
   727  		c.Assert(err, jc.ErrorIsNil)
   728  		w := service.Watch()
   729  		defer worker.Stop(w)
   730  
   731  		// Trigger a sync on the state used by the agent, and wait for the unit
   732  		// to be created.
   733  		agentState.StartSync()
   734  		timeout := time.After(coretesting.LongWait)
   735  		for {
   736  			select {
   737  			case <-timeout:
   738  				c.Fatalf("unit not created")
   739  			case <-time.After(coretesting.ShortWait):
   740  				s.State.StartSync()
   741  			case <-w.Changes():
   742  				units, err := service.AllUnits()
   743  				c.Assert(err, jc.ErrorIsNil)
   744  				if len(units) == 1 {
   745  					return
   746  				}
   747  			}
   748  		}
   749  	})
   750  }
   751  
   752  func (s *MachineSuite) TestMachineAgentRunsAuthorisedKeysWorker(c *gc.C) {
   753  	//TODO(bogdanteleaga): Fix once we get authentication worker up on windows
   754  	if runtime.GOOS == "windows" {
   755  		c.Skip("bug 1403084: authentication worker not yet implemented on windows")
   756  	}
   757  	// Start the machine agent.
   758  	m, _, _ := s.primeAgent(c, state.JobHostUnits)
   759  	a := s.newAgent(c, m)
   760  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
   761  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
   762  
   763  	// Update the keys in the environment.
   764  	sshKey := sshtesting.ValidKeyOne.Key + " user@host"
   765  	err := s.BackingState.UpdateModelConfig(map[string]interface{}{"authorized-keys": sshKey}, nil, nil)
   766  	c.Assert(err, jc.ErrorIsNil)
   767  
   768  	// Wait for ssh keys file to be updated.
   769  	s.State.StartSync()
   770  	timeout := time.After(coretesting.LongWait)
   771  	sshKeyWithCommentPrefix := sshtesting.ValidKeyOne.Key + " Juju:user@host"
   772  	for {
   773  		select {
   774  		case <-timeout:
   775  			c.Fatalf("timeout while waiting for authorised ssh keys to change")
   776  		case <-time.After(coretesting.ShortWait):
   777  			s.State.StartSync()
   778  			keys, err := ssh.ListKeys(authenticationworker.SSHUser, ssh.FullKeys)
   779  			c.Assert(err, jc.ErrorIsNil)
   780  			keysStr := strings.Join(keys, "\n")
   781  			if sshKeyWithCommentPrefix != keysStr {
   782  				continue
   783  			}
   784  			return
   785  		}
   786  	}
   787  }
   788  
   789  func (s *MachineSuite) TestMachineAgentSymlinks(c *gc.C) {
   790  	stm, _, _ := s.primeAgent(c, state.JobManageModel)
   791  	a := s.newAgent(c, stm)
   792  	defer a.Stop()
   793  	_, done := s.waitForOpenState(c, a)
   794  
   795  	// Symlinks should have been created
   796  	for _, link := range []string{jujuRun, jujuDumpLogs} {
   797  		_, err := os.Stat(utils.EnsureBaseDir(a.rootDir, link))
   798  		c.Assert(err, jc.ErrorIsNil, gc.Commentf(link))
   799  	}
   800  
   801  	s.waitStopped(c, state.JobManageModel, a, done)
   802  }
   803  
   804  func (s *MachineSuite) TestMachineAgentSymlinkJujuRunExists(c *gc.C) {
   805  	if runtime.GOOS == "windows" {
   806  		// Cannot make symlink to nonexistent file on windows or
   807  		// create a file point a symlink to it then remove it
   808  		c.Skip("Cannot test this on windows")
   809  	}
   810  
   811  	stm, _, _ := s.primeAgent(c, state.JobManageModel)
   812  	a := s.newAgent(c, stm)
   813  	defer a.Stop()
   814  
   815  	// Pre-create the symlinks, but pointing to the incorrect location.
   816  	links := []string{jujuRun, jujuDumpLogs}
   817  	a.rootDir = c.MkDir()
   818  	for _, link := range links {
   819  		fullLink := utils.EnsureBaseDir(a.rootDir, link)
   820  		c.Assert(os.MkdirAll(filepath.Dir(fullLink), os.FileMode(0755)), jc.ErrorIsNil)
   821  		c.Assert(symlink.New("/nowhere/special", fullLink), jc.ErrorIsNil, gc.Commentf(link))
   822  	}
   823  
   824  	// Start the agent and wait for it be running.
   825  	_, done := s.waitForOpenState(c, a)
   826  
   827  	// juju-run symlink should have been recreated.
   828  	for _, link := range links {
   829  		fullLink := utils.EnsureBaseDir(a.rootDir, link)
   830  		linkTarget, err := symlink.Read(fullLink)
   831  		c.Assert(err, jc.ErrorIsNil)
   832  		c.Assert(linkTarget, gc.Not(gc.Equals), "/nowhere/special", gc.Commentf(link))
   833  	}
   834  
   835  	s.waitStopped(c, state.JobManageModel, a, done)
   836  }
   837  
   838  func (s *MachineSuite) TestMachineAgentUninstall(c *gc.C) {
   839  	m, ac, _ := s.primeAgent(c, state.JobHostUnits)
   840  	err := m.EnsureDead()
   841  	c.Assert(err, jc.ErrorIsNil)
   842  	a := s.newAgent(c, m)
   843  	err = runWithTimeout(a)
   844  	c.Assert(err, jc.ErrorIsNil)
   845  
   846  	// juju-run and juju-dumplogs symlinks should have been removed on
   847  	// termination.
   848  	for _, link := range []string{jujuRun, jujuDumpLogs} {
   849  		_, err = os.Stat(utils.EnsureBaseDir(a.rootDir, link))
   850  		c.Assert(err, jc.Satisfies, os.IsNotExist)
   851  	}
   852  
   853  	// data-dir should have been removed on termination
   854  	_, err = os.Stat(ac.DataDir())
   855  	c.Assert(err, jc.Satisfies, os.IsNotExist)
   856  }
   857  
   858  func (s *MachineSuite) TestMachineAgentRunsAPIAddressUpdaterWorker(c *gc.C) {
   859  	// Start the machine agent.
   860  	m, _, _ := s.primeAgent(c, state.JobHostUnits)
   861  	a := s.newAgent(c, m)
   862  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
   863  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
   864  
   865  	// Update the API addresses.
   866  	updatedServers := [][]network.HostPort{
   867  		network.NewHostPorts(1234, "localhost"),
   868  	}
   869  	err := s.BackingState.SetAPIHostPorts(updatedServers)
   870  	c.Assert(err, jc.ErrorIsNil)
   871  
   872  	// Wait for config to be updated.
   873  	for attempt := coretesting.LongAttempt.Start(); attempt.Next(); {
   874  		s.BackingState.StartSync()
   875  		if !attempt.HasNext() {
   876  			break
   877  		}
   878  		addrs, err := a.CurrentConfig().APIAddresses()
   879  		c.Assert(err, jc.ErrorIsNil)
   880  		if reflect.DeepEqual(addrs, []string{"localhost:1234"}) {
   881  			return
   882  		}
   883  	}
   884  	c.Fatalf("timeout while waiting for agent config to change")
   885  }
   886  
   887  func (s *MachineSuite) TestMachineAgentRunsDiskManagerWorker(c *gc.C) {
   888  	// Patch out the worker func before starting the agent.
   889  	started := newSignal()
   890  	newWorker := func(diskmanager.ListBlockDevicesFunc, diskmanager.BlockDeviceSetter) worker.Worker {
   891  		started.trigger()
   892  		return worker.NewNoOpWorker()
   893  	}
   894  	s.PatchValue(&diskmanager.NewWorker, newWorker)
   895  
   896  	// Start the machine agent.
   897  	m, _, _ := s.primeAgent(c, state.JobHostUnits)
   898  	a := s.newAgent(c, m)
   899  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
   900  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
   901  	started.assertTriggered(c, "diskmanager worker to start")
   902  }
   903  
   904  func (s *MachineSuite) TestMongoUpgradeWorker(c *gc.C) {
   905  	// Patch out the worker func before starting the agent.
   906  	started := make(chan struct{})
   907  	newWorker := func(*state.State, string, mongoupgrader.StopMongo) (worker.Worker, error) {
   908  		close(started)
   909  		return worker.NewNoOpWorker(), nil
   910  	}
   911  	s.PatchValue(&newUpgradeMongoWorker, newWorker)
   912  
   913  	// Start the machine agent.
   914  	m, _, _ := s.primeAgent(c, state.JobManageModel)
   915  	a := s.newAgent(c, m)
   916  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
   917  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
   918  
   919  	// Wait for worker to be started.
   920  	s.State.StartSync()
   921  	select {
   922  	case <-started:
   923  	case <-time.After(coretesting.LongWait):
   924  		c.Fatalf("timeout while waiting for mongo upgrader worker to start")
   925  	}
   926  }
   927  
   928  func (s *MachineSuite) TestDiskManagerWorkerUpdatesState(c *gc.C) {
   929  	expected := []storage.BlockDevice{{DeviceName: "whatever"}}
   930  	s.PatchValue(&diskmanager.DefaultListBlockDevices, func() ([]storage.BlockDevice, error) {
   931  		return expected, nil
   932  	})
   933  
   934  	// Start the machine agent.
   935  	m, _, _ := s.primeAgent(c, state.JobHostUnits)
   936  	a := s.newAgent(c, m)
   937  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
   938  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
   939  
   940  	// Wait for state to be updated.
   941  	s.BackingState.StartSync()
   942  	for attempt := coretesting.LongAttempt.Start(); attempt.Next(); {
   943  		devices, err := s.BackingState.BlockDevices(m.MachineTag())
   944  		c.Assert(err, jc.ErrorIsNil)
   945  		if len(devices) > 0 {
   946  			c.Assert(devices, gc.HasLen, 1)
   947  			c.Assert(devices[0].DeviceName, gc.Equals, expected[0].DeviceName)
   948  			return
   949  		}
   950  	}
   951  	c.Fatalf("timeout while waiting for block devices to be recorded")
   952  }
   953  
   954  func (s *MachineSuite) TestMachineAgentDoesNotRunMetadataWorkerForHostUnits(c *gc.C) {
   955  	s.checkMetadataWorkerNotRun(c, state.JobHostUnits, "can host units")
   956  }
   957  
   958  func (s *MachineSuite) TestMachineAgentDoesNotRunMetadataWorkerForNonSimpleStreamDependentProviders(c *gc.C) {
   959  	s.checkMetadataWorkerNotRun(c, state.JobManageModel, "has provider which doesn't depend on simple streams")
   960  }
   961  
   962  func (s *MachineSuite) checkMetadataWorkerNotRun(c *gc.C, job state.MachineJob, suffix string) {
   963  	// Patch out the worker func before starting the agent.
   964  	started := newSignal()
   965  	newWorker := func(cl *imagemetadata.Client) worker.Worker {
   966  		started.trigger()
   967  		return worker.NewNoOpWorker()
   968  	}
   969  	s.PatchValue(&newMetadataUpdater, newWorker)
   970  
   971  	// Start the machine agent.
   972  	m, _, _ := s.primeAgent(c, job)
   973  	a := s.newAgent(c, m)
   974  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
   975  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
   976  	started.assertNotTriggered(c, startWorkerWait, "metadata update worker started")
   977  }
   978  
   979  func (s *MachineSuite) TestMachineAgentRunsMachineStorageWorker(c *gc.C) {
   980  	m, _, _ := s.primeAgent(c, state.JobHostUnits)
   981  
   982  	started := newSignal()
   983  	newWorker := func(config storageprovisioner.Config) (worker.Worker, error) {
   984  		c.Check(config.Scope, gc.Equals, m.Tag())
   985  		c.Check(config.Validate(), jc.ErrorIsNil)
   986  		started.trigger()
   987  		return worker.NewNoOpWorker(), nil
   988  	}
   989  	s.PatchValue(&storageprovisioner.NewStorageProvisioner, newWorker)
   990  
   991  	// Start the machine agent.
   992  	a := s.newAgent(c, m)
   993  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
   994  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
   995  	started.assertTriggered(c, "storage worker to start")
   996  }
   997  
   998  func (s *MachineSuite) TestMachineAgentRunsCertificateUpdateWorkerForController(c *gc.C) {
   999  	started := newSignal()
  1000  	newUpdater := func(certupdater.AddressWatcher, certupdater.StateServingInfoGetter, certupdater.ControllerConfigGetter,
  1001  		certupdater.APIHostPortsGetter, certupdater.StateServingInfoSetter,
  1002  	) worker.Worker {
  1003  		started.trigger()
  1004  		return worker.NewNoOpWorker()
  1005  	}
  1006  	s.PatchValue(&newCertificateUpdater, newUpdater)
  1007  
  1008  	// Start the machine agent.
  1009  	m, _, _ := s.primeAgent(c, state.JobManageModel)
  1010  	a := s.newAgent(c, m)
  1011  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
  1012  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
  1013  	started.assertTriggered(c, "certificate to be updated")
  1014  }
  1015  
  1016  func (s *MachineSuite) TestMachineAgentDoesNotRunsCertificateUpdateWorkerForNonController(c *gc.C) {
  1017  	started := newSignal()
  1018  	newUpdater := func(certupdater.AddressWatcher, certupdater.StateServingInfoGetter, certupdater.ControllerConfigGetter,
  1019  		certupdater.APIHostPortsGetter, certupdater.StateServingInfoSetter,
  1020  	) worker.Worker {
  1021  		started.trigger()
  1022  		return worker.NewNoOpWorker()
  1023  	}
  1024  	s.PatchValue(&newCertificateUpdater, newUpdater)
  1025  
  1026  	// Start the machine agent.
  1027  	m, _, _ := s.primeAgent(c, state.JobHostUnits)
  1028  	a := s.newAgent(c, m)
  1029  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
  1030  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
  1031  	started.assertNotTriggered(c, startWorkerWait, "certificate was updated")
  1032  }
  1033  
  1034  func (s *MachineSuite) TestCertificateUpdateWorkerUpdatesCertificate(c *gc.C) {
  1035  	// Set up the machine agent.
  1036  	m, _, _ := s.primeAgent(c, state.JobManageModel)
  1037  	a := s.newAgent(c, m)
  1038  	a.ReadConfig(names.NewMachineTag(m.Id()).String())
  1039  
  1040  	// Set up check that certificate has been updated.
  1041  	updated := make(chan struct{})
  1042  	go func() {
  1043  		for {
  1044  			stateInfo, _ := a.CurrentConfig().StateServingInfo()
  1045  			srvCert, err := cert.ParseCert(stateInfo.Cert)
  1046  			if !c.Check(err, jc.ErrorIsNil) {
  1047  				break
  1048  			}
  1049  			sanIPs := make([]string, len(srvCert.IPAddresses))
  1050  			for i, ip := range srvCert.IPAddresses {
  1051  				sanIPs[i] = ip.String()
  1052  			}
  1053  			if len(sanIPs) == 1 && sanIPs[0] == "0.1.2.3" {
  1054  				close(updated)
  1055  				break
  1056  			}
  1057  			time.Sleep(100 * time.Millisecond)
  1058  		}
  1059  	}()
  1060  
  1061  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
  1062  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
  1063  	s.assertChannelActive(c, updated, "certificate to be updated")
  1064  }
  1065  
  1066  func (s *MachineSuite) TestCertificateDNSUpdated(c *gc.C) {
  1067  	// Disable the certificate work so it doesn't update the certificate.
  1068  	newUpdater := func(certupdater.AddressWatcher, certupdater.StateServingInfoGetter, certupdater.ControllerConfigGetter,
  1069  		certupdater.APIHostPortsGetter, certupdater.StateServingInfoSetter,
  1070  	) worker.Worker {
  1071  		return worker.NewNoOpWorker()
  1072  	}
  1073  	s.PatchValue(&newCertificateUpdater, newUpdater)
  1074  
  1075  	// Set up the machine agent.
  1076  	m, _, _ := s.primeAgent(c, state.JobManageModel)
  1077  	a := s.newAgent(c, m)
  1078  
  1079  	// Set up check that certificate has been updated when the agent starts.
  1080  	updated := make(chan struct{})
  1081  	expectedDnsNames := set.NewStrings("local", "juju-apiserver", "juju-mongodb")
  1082  	go func() {
  1083  		for {
  1084  			stateInfo, _ := a.CurrentConfig().StateServingInfo()
  1085  			srvCert, err := cert.ParseCert(stateInfo.Cert)
  1086  			c.Assert(err, jc.ErrorIsNil)
  1087  			certDnsNames := set.NewStrings(srvCert.DNSNames...)
  1088  			if !expectedDnsNames.Difference(certDnsNames).IsEmpty() {
  1089  				continue
  1090  			}
  1091  			pemContent, err := ioutil.ReadFile(filepath.Join(s.DataDir(), "server.pem"))
  1092  			c.Assert(err, jc.ErrorIsNil)
  1093  			if string(pemContent) == stateInfo.Cert+"\n"+stateInfo.PrivateKey {
  1094  				close(updated)
  1095  				break
  1096  			}
  1097  			time.Sleep(10 * time.Millisecond)
  1098  		}
  1099  	}()
  1100  
  1101  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
  1102  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
  1103  	s.assertChannelActive(c, updated, "certificate to be updated")
  1104  }
  1105  
  1106  func (s *MachineSuite) setupIgnoreAddresses(c *gc.C, expectedIgnoreValue bool) chan bool {
  1107  	ignoreAddressCh := make(chan bool, 1)
  1108  	s.AgentSuite.PatchValue(&machiner.NewMachiner, func(cfg machiner.Config) (worker.Worker, error) {
  1109  		select {
  1110  		case ignoreAddressCh <- cfg.ClearMachineAddressesOnStart:
  1111  		default:
  1112  		}
  1113  
  1114  		// The test just cares that NewMachiner is called with the correct
  1115  		// value, nothing else is done with the worker.
  1116  		return newDummyWorker(), nil
  1117  	})
  1118  
  1119  	attrs := coretesting.Attrs{"ignore-machine-addresses": expectedIgnoreValue}
  1120  	err := s.BackingState.UpdateModelConfig(attrs, nil, nil)
  1121  	c.Assert(err, jc.ErrorIsNil)
  1122  	return ignoreAddressCh
  1123  }
  1124  
  1125  func (s *MachineSuite) TestMachineAgentIgnoreAddresses(c *gc.C) {
  1126  	for _, expectedIgnoreValue := range []bool{true, false} {
  1127  		ignoreAddressCh := s.setupIgnoreAddresses(c, expectedIgnoreValue)
  1128  
  1129  		m, _, _ := s.primeAgent(c, state.JobHostUnits)
  1130  		a := s.newAgent(c, m)
  1131  		defer a.Stop()
  1132  		doneCh := make(chan error)
  1133  		go func() {
  1134  			doneCh <- a.Run(nil)
  1135  		}()
  1136  
  1137  		select {
  1138  		case ignoreMachineAddresses := <-ignoreAddressCh:
  1139  			if ignoreMachineAddresses != expectedIgnoreValue {
  1140  				c.Fatalf("expected ignore-machine-addresses = %v, got = %v", expectedIgnoreValue, ignoreMachineAddresses)
  1141  			}
  1142  		case <-time.After(coretesting.LongWait):
  1143  			c.Fatalf("timed out waiting for the machiner to start")
  1144  		}
  1145  		s.waitStopped(c, state.JobHostUnits, a, doneCh)
  1146  	}
  1147  }
  1148  
  1149  func (s *MachineSuite) TestMachineAgentIgnoreAddressesContainer(c *gc.C) {
  1150  	ignoreAddressCh := s.setupIgnoreAddresses(c, true)
  1151  
  1152  	parent, err := s.State.AddMachine("quantal", state.JobHostUnits)
  1153  	c.Assert(err, jc.ErrorIsNil)
  1154  	m, err := s.State.AddMachineInsideMachine(
  1155  		state.MachineTemplate{
  1156  			Series: "trusty",
  1157  			Jobs:   []state.MachineJob{state.JobHostUnits},
  1158  		},
  1159  		parent.Id(),
  1160  		instance.LXD,
  1161  	)
  1162  	c.Assert(err, jc.ErrorIsNil)
  1163  
  1164  	vers := version.Binary{
  1165  		Number: jujuversion.Current,
  1166  		Arch:   arch.HostArch(),
  1167  		Series: series.HostSeries(),
  1168  	}
  1169  	s.primeAgentWithMachine(c, m, vers)
  1170  	a := s.newAgent(c, m)
  1171  	defer a.Stop()
  1172  	doneCh := make(chan error)
  1173  	go func() {
  1174  		doneCh <- a.Run(nil)
  1175  	}()
  1176  
  1177  	select {
  1178  	case ignoreMachineAddresses := <-ignoreAddressCh:
  1179  		if ignoreMachineAddresses {
  1180  			c.Fatalf("expected ignore-machine-addresses = false, got = true")
  1181  		}
  1182  	case <-time.After(coretesting.LongWait):
  1183  		c.Fatalf("timed out waiting for the machiner to start")
  1184  	}
  1185  	s.waitStopped(c, state.JobHostUnits, a, doneCh)
  1186  }
  1187  
  1188  func (s *MachineSuite) TestMachineAgentSetsPrepareRestore(c *gc.C) {
  1189  	// Start the machine agent.
  1190  	m, _, _ := s.primeAgent(c, state.JobHostUnits)
  1191  	a := s.newAgent(c, m)
  1192  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
  1193  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
  1194  	c.Check(a.IsRestorePreparing(), jc.IsFalse)
  1195  	c.Check(a.IsRestoreRunning(), jc.IsFalse)
  1196  	err := a.PrepareRestore()
  1197  	c.Assert(err, jc.ErrorIsNil)
  1198  	c.Assert(a.IsRestorePreparing(), jc.IsTrue)
  1199  	c.Assert(a.IsRestoreRunning(), jc.IsFalse)
  1200  	err = a.PrepareRestore()
  1201  	c.Assert(err, gc.ErrorMatches, "already in restore mode")
  1202  }
  1203  
  1204  func (s *MachineSuite) TestMachineAgentSetsRestoreInProgress(c *gc.C) {
  1205  	// Start the machine agent.
  1206  	m, _, _ := s.primeAgent(c, state.JobHostUnits)
  1207  	a := s.newAgent(c, m)
  1208  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
  1209  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
  1210  	c.Check(a.IsRestorePreparing(), jc.IsFalse)
  1211  	c.Check(a.IsRestoreRunning(), jc.IsFalse)
  1212  	err := a.PrepareRestore()
  1213  	c.Assert(err, jc.ErrorIsNil)
  1214  	c.Assert(a.IsRestorePreparing(), jc.IsTrue)
  1215  	err = a.BeginRestore()
  1216  	c.Assert(err, jc.ErrorIsNil)
  1217  	c.Assert(a.IsRestoreRunning(), jc.IsTrue)
  1218  	err = a.BeginRestore()
  1219  	c.Assert(err, gc.ErrorMatches, "already restoring")
  1220  }
  1221  
  1222  func (s *MachineSuite) TestMachineAgentRestoreRequiresPrepare(c *gc.C) {
  1223  	// Start the machine agent.
  1224  	m, _, _ := s.primeAgent(c, state.JobHostUnits)
  1225  	a := s.newAgent(c, m)
  1226  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
  1227  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
  1228  	c.Check(a.IsRestorePreparing(), jc.IsFalse)
  1229  	c.Check(a.IsRestoreRunning(), jc.IsFalse)
  1230  	err := a.BeginRestore()
  1231  	c.Assert(err, gc.ErrorMatches, "not in restore mode, cannot begin restoration")
  1232  	c.Assert(a.IsRestoreRunning(), jc.IsFalse)
  1233  }
  1234  
  1235  func (s *MachineSuite) TestMachineWorkers(c *gc.C) {
  1236  	tracker := NewEngineTracker()
  1237  	instrumented := TrackMachines(c, tracker, machineManifolds)
  1238  	s.PatchValue(&machineManifolds, instrumented)
  1239  
  1240  	m, _, _ := s.primeAgent(c, state.JobHostUnits)
  1241  	a := s.newAgent(c, m)
  1242  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
  1243  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
  1244  
  1245  	// Wait for it to stabilise, running as normal.
  1246  	matcher := NewWorkerMatcher(c, tracker, a.Tag().String(),
  1247  		append(alwaysMachineWorkers, notMigratingMachineWorkers...))
  1248  	WaitMatch(c, matcher.Check, coretesting.LongWait, s.BackingState.StartSync)
  1249  }
  1250  
  1251  func (s *MachineSuite) TestControllerModelWorkers(c *gc.C) {
  1252  	uuid := s.BackingState.ModelUUID()
  1253  
  1254  	tracker := NewEngineTracker()
  1255  	instrumented := TrackModels(c, tracker, modelManifolds)
  1256  	s.PatchValue(&modelManifolds, instrumented)
  1257  
  1258  	matcher := NewWorkerMatcher(c, tracker, uuid,
  1259  		append(alwaysModelWorkers, aliveModelWorkers...))
  1260  	s.assertJobWithState(c, state.JobManageModel, func(agent.Config, *state.State) {
  1261  		WaitMatch(c, matcher.Check, coretesting.LongWait, s.BackingState.StartSync)
  1262  	})
  1263  }
  1264  
  1265  func (s *MachineSuite) TestHostedModelWorkers(c *gc.C) {
  1266  	// The dummy provider blows up in the face of multi-model
  1267  	// scenarios so patch in a minimal environs.Environ that's good
  1268  	// enough to allow the model workers to run.
  1269  	s.PatchValue(&newEnvirons, func(environs.OpenParams) (environs.Environ, error) {
  1270  		return &minModelWorkersEnviron{}, nil
  1271  	})
  1272  
  1273  	st, closer := s.setUpNewModel(c)
  1274  	defer closer()
  1275  	uuid := st.ModelUUID()
  1276  
  1277  	tracker := NewEngineTracker()
  1278  	instrumented := TrackModels(c, tracker, modelManifolds)
  1279  	s.PatchValue(&modelManifolds, instrumented)
  1280  
  1281  	matcher := NewWorkerMatcher(c, tracker, uuid,
  1282  		append(alwaysModelWorkers, aliveModelWorkers...))
  1283  	s.assertJobWithState(c, state.JobManageModel, func(agent.Config, *state.State) {
  1284  		WaitMatch(c, matcher.Check, ReallyLongWait, st.StartSync)
  1285  	})
  1286  }
  1287  
  1288  func (s *MachineSuite) TestMigratingModelWorkers(c *gc.C) {
  1289  	st, closer := s.setUpNewModel(c)
  1290  	defer closer()
  1291  	uuid := st.ModelUUID()
  1292  
  1293  	tracker := NewEngineTracker()
  1294  
  1295  	// Replace the real migrationmaster worker with a fake one which
  1296  	// does nothing. This is required to make this test be reliable as
  1297  	// the environment required for the migrationmaster to operate
  1298  	// correctly is too involved to set up from here.
  1299  	//
  1300  	// TODO(mjs) - an alternative might be to provide a fake Facade
  1301  	// and api.Open to the real migrationmaster but this test is
  1302  	// awfully far away from the low level details of the worker.
  1303  	origModelManifolds := modelManifolds
  1304  	modelManifoldsDisablingMigrationMaster := func(config model.ManifoldsConfig) dependency.Manifolds {
  1305  		config.NewMigrationMaster = func(config migrationmaster.Config) (worker.Worker, error) {
  1306  			return &nullWorker{}, nil
  1307  		}
  1308  		return origModelManifolds(config)
  1309  	}
  1310  	instrumented := TrackModels(c, tracker, modelManifoldsDisablingMigrationMaster)
  1311  	s.PatchValue(&modelManifolds, instrumented)
  1312  
  1313  	targetControllerTag := names.NewControllerTag(utils.MustNewUUID().String())
  1314  	_, err := st.CreateMigration(state.MigrationSpec{
  1315  		InitiatedBy: names.NewUserTag("admin"),
  1316  		TargetInfo: migration.TargetInfo{
  1317  			ControllerTag: targetControllerTag,
  1318  			Addrs:         []string{"1.2.3.4:5555"},
  1319  			CACert:        "cert",
  1320  			AuthTag:       names.NewUserTag("user"),
  1321  			Password:      "password",
  1322  		},
  1323  	})
  1324  	c.Assert(err, jc.ErrorIsNil)
  1325  
  1326  	matcher := NewWorkerMatcher(c, tracker, uuid,
  1327  		append(alwaysModelWorkers, migratingModelWorkers...))
  1328  	s.assertJobWithState(c, state.JobManageModel, func(agent.Config, *state.State) {
  1329  		WaitMatch(c, matcher.Check, ReallyLongWait, st.StartSync)
  1330  	})
  1331  }
  1332  
  1333  func (s *MachineSuite) TestDyingModelCleanedUp(c *gc.C) {
  1334  	st, closer := s.setUpNewModel(c)
  1335  	defer closer()
  1336  
  1337  	timeout := time.After(ReallyLongWait)
  1338  	s.assertJobWithState(c, state.JobManageModel, func(agent.Config, *state.State) {
  1339  		model, err := st.Model()
  1340  		c.Assert(err, jc.ErrorIsNil)
  1341  		watch := model.Watch()
  1342  		defer workertest.CleanKill(c, watch)
  1343  
  1344  		err = model.Destroy()
  1345  		c.Assert(err, jc.ErrorIsNil)
  1346  		for {
  1347  			select {
  1348  			case <-watch.Changes():
  1349  				err := model.Refresh()
  1350  				cause := errors.Cause(err)
  1351  				if err == nil {
  1352  					continue // still there
  1353  				} else if errors.IsNotFound(cause) {
  1354  					return // successfully removed
  1355  				}
  1356  				c.Assert(err, jc.ErrorIsNil) // guaranteed fail
  1357  			case <-time.After(coretesting.ShortWait):
  1358  				st.StartSync()
  1359  			case <-timeout:
  1360  				c.Fatalf("timed out waiting for workers")
  1361  			}
  1362  		}
  1363  	})
  1364  }
  1365  
  1366  func (s *MachineSuite) TestModelWorkersRespectSingularResponsibilityFlag(c *gc.C) {
  1367  
  1368  	// Grab responsibility for the model on behalf of another machine.
  1369  	claimer := s.BackingState.SingularClaimer()
  1370  	uuid := s.BackingState.ModelUUID()
  1371  	err := claimer.Claim(uuid, "machine-999-lxd-99", time.Hour)
  1372  	c.Assert(err, jc.ErrorIsNil)
  1373  
  1374  	// Then run a normal model-tracking test, just checking for
  1375  	// a different set of workers.
  1376  	tracker := NewEngineTracker()
  1377  	instrumented := TrackModels(c, tracker, modelManifolds)
  1378  	s.PatchValue(&modelManifolds, instrumented)
  1379  
  1380  	matcher := NewWorkerMatcher(c, tracker, uuid, alwaysModelWorkers)
  1381  	s.assertJobWithState(c, state.JobManageModel, func(agent.Config, *state.State) {
  1382  		WaitMatch(c, matcher.Check, coretesting.LongWait, s.BackingState.StartSync)
  1383  	})
  1384  }
  1385  
  1386  func (s *MachineSuite) setUpNewModel(c *gc.C) (newSt *state.State, closer func()) {
  1387  	// Create a new environment, tests can now watch if workers start for it.
  1388  	newSt = s.Factory.MakeModel(c, nil)
  1389  	return newSt, func() {
  1390  		err := newSt.Close()
  1391  		c.Check(err, jc.ErrorIsNil)
  1392  	}
  1393  }
  1394  
  1395  func (s *MachineSuite) TestReplicasetInitForNewController(c *gc.C) {
  1396  	if runtime.GOOS == "windows" {
  1397  		c.Skip("controllers on windows aren't supported")
  1398  	}
  1399  
  1400  	s.fakeEnsureMongo.ServiceInstalled = false
  1401  
  1402  	m, _, _ := s.primeAgent(c, state.JobManageModel)
  1403  	a := s.newAgent(c, m)
  1404  	agentConfig := a.CurrentConfig()
  1405  
  1406  	err := a.ensureMongoServer(agentConfig)
  1407  	c.Assert(err, jc.ErrorIsNil)
  1408  
  1409  	c.Assert(s.fakeEnsureMongo.EnsureCount, gc.Equals, 1)
  1410  	c.Assert(s.fakeEnsureMongo.InitiateCount, gc.Equals, 0)
  1411  }
  1412  
  1413  type nullWorker struct {
  1414  	tomb tomb.Tomb
  1415  }
  1416  
  1417  func (w *nullWorker) Kill() {
  1418  	w.tomb.Kill(nil)
  1419  	w.tomb.Done()
  1420  }
  1421  
  1422  func (w *nullWorker) Wait() error {
  1423  	return w.tomb.Wait()
  1424  }