github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/upgradesteps/worker_test.go (about)

     1  // Copyright 2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package upgradesteps
     5  
     6  import (
     7  	"fmt"
     8  	"time"
     9  
    10  	"github.com/juju/clock"
    11  	"github.com/juju/errors"
    12  	"github.com/juju/loggo"
    13  	"github.com/juju/os/series"
    14  	jc "github.com/juju/testing/checkers"
    15  	"github.com/juju/utils"
    16  	"github.com/juju/utils/arch"
    17  	"github.com/juju/version"
    18  	gc "gopkg.in/check.v1"
    19  	"gopkg.in/juju/names.v2"
    20  	"gopkg.in/juju/worker.v1"
    21  
    22  	"github.com/juju/juju/agent"
    23  	cmdutil "github.com/juju/juju/cmd/jujud/util"
    24  	"github.com/juju/juju/core/constraints"
    25  	"github.com/juju/juju/core/instance"
    26  	"github.com/juju/juju/core/status"
    27  	"github.com/juju/juju/environs"
    28  	"github.com/juju/juju/state"
    29  	"github.com/juju/juju/state/multiwatcher"
    30  	"github.com/juju/juju/state/stateenvirons"
    31  	statetesting "github.com/juju/juju/state/testing"
    32  	coretesting "github.com/juju/juju/testing"
    33  	"github.com/juju/juju/testing/factory"
    34  	"github.com/juju/juju/upgrades"
    35  	jujuversion "github.com/juju/juju/version"
    36  	"github.com/juju/juju/worker/gate"
    37  )
    38  
    39  // TODO(mjs) - these tests are too tightly coupled to the
    40  // implementation. They needn't be internal tests.
    41  
    42  type UpgradeSuite struct {
    43  	statetesting.StateSuite
    44  
    45  	oldVersion      version.Binary
    46  	logWriter       loggo.TestWriter
    47  	connectionDead  bool
    48  	machineIsMaster bool
    49  	preUpgradeError bool
    50  }
    51  
    52  var _ = gc.Suite(&UpgradeSuite{})
    53  
    54  const fails = true
    55  const succeeds = false
    56  
    57  func (s *UpgradeSuite) SetUpTest(c *gc.C) {
    58  	s.StateSuite.SetUpTest(c)
    59  
    60  	s.preUpgradeError = false
    61  	// Most of these tests normally finish sub-second on a fast machine.
    62  	// If any given test hits a minute, we have almost certainly become
    63  	// wedged, so dump the logs.
    64  	coretesting.DumpTestLogsAfter(time.Minute, c, s)
    65  
    66  	s.oldVersion = version.Binary{
    67  		Number: jujuversion.Current,
    68  		Arch:   arch.HostArch(),
    69  		Series: series.MustHostSeries(),
    70  	}
    71  	s.oldVersion.Major = 1
    72  	s.oldVersion.Minor = 16
    73  
    74  	// Don't wait so long in tests.
    75  	s.PatchValue(&UpgradeStartTimeoutMaster, time.Duration(time.Millisecond*50))
    76  	s.PatchValue(&UpgradeStartTimeoutSecondary, time.Duration(time.Millisecond*60))
    77  
    78  	// Allow tests to make the API connection appear to be dead.
    79  	s.connectionDead = false
    80  	s.PatchValue(&cmdutil.ConnectionIsDead, func(loggo.Logger, cmdutil.Breakable) bool {
    81  		return s.connectionDead
    82  	})
    83  
    84  	s.machineIsMaster = true
    85  	fakeIsMachineMaster := func(*state.StatePool, string) (bool, error) {
    86  		return s.machineIsMaster, nil
    87  	}
    88  	s.PatchValue(&IsMachineMaster, fakeIsMachineMaster)
    89  
    90  }
    91  
    92  func (s *UpgradeSuite) captureLogs(c *gc.C) {
    93  	c.Assert(loggo.RegisterWriter("upgrade-tests", &s.logWriter), gc.IsNil)
    94  	s.AddCleanup(func(*gc.C) {
    95  		loggo.RemoveWriter("upgrade-tests")
    96  		s.logWriter.Clear()
    97  	})
    98  }
    99  
   100  func (s *UpgradeSuite) countUpgradeAttempts(upgradeErr error) *int {
   101  	count := 0
   102  	s.PatchValue(&PerformUpgrade, func(version.Number, []upgrades.Target, upgrades.Context) error {
   103  		count++
   104  		return upgradeErr
   105  	})
   106  	return &count
   107  }
   108  
   109  func (s *UpgradeSuite) TestNewChannelWhenNoUpgradeRequired(c *gc.C) {
   110  	// Set the agent's upgradedToVersion to version.Current,
   111  	// to simulate the upgrade steps having been run already.
   112  	initialVersion := jujuversion.Current
   113  	config := NewFakeConfigSetter(names.NewMachineTag("0"), initialVersion)
   114  
   115  	lock := NewLock(config)
   116  
   117  	// Upgrade steps have already been run.
   118  	c.Assert(lock.IsUnlocked(), jc.IsTrue)
   119  }
   120  
   121  func (s *UpgradeSuite) TestNewChannelWhenUpgradeRequired(c *gc.C) {
   122  	// Set the agent's upgradedToVersion so that upgrade steps are required.
   123  	initialVersion := version.MustParse("1.16.0")
   124  	config := NewFakeConfigSetter(names.NewMachineTag("0"), initialVersion)
   125  
   126  	lock := NewLock(config)
   127  
   128  	c.Assert(lock.IsUnlocked(), jc.IsFalse)
   129  	// The agent's version should NOT have been updated.
   130  	c.Assert(config.Version, gc.Equals, initialVersion)
   131  }
   132  
   133  func (s *UpgradeSuite) TestRetryStrategy(c *gc.C) {
   134  	retries := getUpgradeRetryStrategy()
   135  	c.Assert(retries.Delay, gc.Equals, 2*time.Minute)
   136  	c.Assert(retries.Min, gc.Equals, 5)
   137  }
   138  
   139  func (s *UpgradeSuite) TestNoUpgradeNecessary(c *gc.C) {
   140  	attemptsP := s.countUpgradeAttempts(nil)
   141  	s.captureLogs(c)
   142  	s.oldVersion.Number = jujuversion.Current // nothing to do
   143  
   144  	workerErr, config, _, doneLock := s.runUpgradeWorker(c, multiwatcher.JobHostUnits)
   145  
   146  	c.Check(workerErr, gc.IsNil)
   147  	c.Check(*attemptsP, gc.Equals, 0)
   148  	c.Check(config.Version, gc.Equals, jujuversion.Current)
   149  	c.Check(doneLock.IsUnlocked(), jc.IsTrue)
   150  }
   151  
   152  func (s *UpgradeSuite) TestNoUpgradeNecessaryIgnoresBuildNumbers(c *gc.C) {
   153  	attemptsP := s.countUpgradeAttempts(nil)
   154  	s.captureLogs(c)
   155  	s.oldVersion.Number = jujuversion.Current
   156  	s.oldVersion.Build = 1 // Ensure there's a build number mismatch.
   157  
   158  	workerErr, config, _, doneLock := s.runUpgradeWorker(c, multiwatcher.JobHostUnits)
   159  
   160  	c.Check(workerErr, gc.IsNil)
   161  	c.Check(*attemptsP, gc.Equals, 0)
   162  	c.Check(config.Version, gc.Equals, s.oldVersion.Number)
   163  	c.Check(doneLock.IsUnlocked(), jc.IsTrue)
   164  }
   165  
   166  func (s *UpgradeSuite) TestUpgradeStepsFailure(c *gc.C) {
   167  	// This test checks what happens when every upgrade attempt fails.
   168  	// A number of retries should be observed and the agent should end
   169  	// up in a state where it is is still running but is reporting an
   170  	// error and the upgrade is not flagged as having completed (which
   171  	// prevents most of the agent's workers from running and keeps the
   172  	// API in restricted mode).
   173  
   174  	attemptsP := s.countUpgradeAttempts(errors.New("boom"))
   175  	s.captureLogs(c)
   176  
   177  	workerErr, config, statusCalls, doneLock := s.runUpgradeWorker(c, multiwatcher.JobHostUnits)
   178  
   179  	// The worker shouldn't return an error so that the worker and
   180  	// agent keep running.
   181  	c.Check(workerErr, gc.IsNil)
   182  
   183  	c.Check(*attemptsP, gc.Equals, maxUpgradeRetries)
   184  	c.Check(config.Version, gc.Equals, s.oldVersion.Number) // Upgrade didn't finish
   185  	c.Assert(statusCalls, jc.DeepEquals,
   186  		s.makeExpectedStatusCalls(maxUpgradeRetries-1, fails, "boom"))
   187  	c.Assert(s.logWriter.Log(), jc.LogMatches,
   188  		s.makeExpectedUpgradeLogs(maxUpgradeRetries-1, "hostMachine", fails, "boom"))
   189  	c.Assert(doneLock.IsUnlocked(), jc.IsFalse)
   190  }
   191  
   192  func (s *UpgradeSuite) TestUpgradeStepsRetries(c *gc.C) {
   193  	// This test checks what happens when the first upgrade attempt
   194  	// fails but the following on succeeds. The final state should be
   195  	// the same as a successful upgrade which worked first go.
   196  	attempts := 0
   197  	fail := true
   198  	fakePerformUpgrade := func(version.Number, []upgrades.Target, upgrades.Context) error {
   199  		attempts++
   200  		if fail {
   201  			fail = false
   202  			return errors.New("boom")
   203  		} else {
   204  			return nil
   205  		}
   206  	}
   207  	s.PatchValue(&PerformUpgrade, fakePerformUpgrade)
   208  	s.captureLogs(c)
   209  
   210  	workerErr, config, statusCalls, doneLock := s.runUpgradeWorker(c, multiwatcher.JobHostUnits)
   211  
   212  	c.Check(workerErr, gc.IsNil)
   213  	c.Check(attempts, gc.Equals, 2)
   214  	c.Check(config.Version, gc.Equals, jujuversion.Current) // Upgrade finished
   215  	c.Assert(statusCalls, jc.DeepEquals, s.makeExpectedStatusCalls(1, succeeds, "boom"))
   216  	c.Assert(s.logWriter.Log(), jc.LogMatches, s.makeExpectedUpgradeLogs(1, "hostMachine", succeeds, "boom"))
   217  	c.Check(doneLock.IsUnlocked(), jc.IsTrue)
   218  }
   219  
   220  func (s *UpgradeSuite) TestOtherUpgradeRunFailure(c *gc.C) {
   221  	// This test checks what happens something other than the upgrade
   222  	// steps themselves fails, ensuring the something is logged and
   223  	// the agent status is updated.
   224  
   225  	fakePerformUpgrade := func(version.Number, []upgrades.Target, upgrades.Context) error {
   226  		// Delete UpgradeInfo for the upgrade so that finaliseUpgrade() will fail
   227  		s.State.ClearUpgradeInfo()
   228  		return nil
   229  	}
   230  	s.PatchValue(&PerformUpgrade, fakePerformUpgrade)
   231  	s.Factory.MakeMachine(c, &factory.MachineParams{
   232  		Jobs: []state.MachineJob{state.JobManageModel},
   233  	})
   234  	s.captureLogs(c)
   235  
   236  	workerErr, config, statusCalls, doneLock := s.runUpgradeWorker(c, multiwatcher.JobManageModel)
   237  
   238  	c.Check(workerErr, gc.IsNil)
   239  	c.Check(config.Version, gc.Equals, jujuversion.Current) // Upgrade almost finished
   240  	failReason := `upgrade done but: cannot set upgrade status to "finishing": ` +
   241  		`Another status change may have occurred concurrently`
   242  	c.Assert(statusCalls, jc.DeepEquals,
   243  		s.makeExpectedStatusCalls(0, fails, failReason))
   244  	c.Assert(s.logWriter.Log(), jc.LogMatches,
   245  		s.makeExpectedUpgradeLogs(0, "databaseMaster", fails, failReason))
   246  	c.Assert(doneLock.IsUnlocked(), jc.IsFalse)
   247  }
   248  
   249  func (s *UpgradeSuite) TestAPIConnectionFailure(c *gc.C) {
   250  	// This test checks what happens when an upgrade fails because the
   251  	// connection to mongo has gone away. This will happen when the
   252  	// mongo master changes. In this case we want the upgrade worker
   253  	// to return immediately without further retries. The error should
   254  	// be returned by the worker so that the agent will restart.
   255  
   256  	attemptsP := s.countUpgradeAttempts(errors.New("boom"))
   257  	s.connectionDead = true // Make the connection to state appear to be dead
   258  	s.captureLogs(c)
   259  
   260  	workerErr, config, _, doneLock := s.runUpgradeWorker(c, multiwatcher.JobHostUnits)
   261  
   262  	c.Check(workerErr, gc.ErrorMatches, "API connection lost during upgrade: boom")
   263  	c.Check(*attemptsP, gc.Equals, 1)
   264  	c.Check(config.Version, gc.Equals, s.oldVersion.Number) // Upgrade didn't finish
   265  	c.Assert(doneLock.IsUnlocked(), jc.IsFalse)
   266  }
   267  
   268  func (s *UpgradeSuite) TestAbortWhenOtherControllerDoesntStartUpgrade(c *gc.C) {
   269  	// This test checks when a controller is upgrading and one of
   270  	// the other controllers doesn't signal it is ready in time.
   271  
   272  	err := s.State.SetModelAgentVersion(jujuversion.Current, false)
   273  	c.Assert(err, jc.ErrorIsNil)
   274  
   275  	// The master controller in this scenario is functionally tested
   276  	// elsewhere.
   277  	s.machineIsMaster = false
   278  
   279  	s.create3Controllers(c)
   280  	s.captureLogs(c)
   281  	attemptsP := s.countUpgradeAttempts(nil)
   282  
   283  	workerErr, config, statusCalls, doneLock := s.runUpgradeWorker(c, multiwatcher.JobManageModel)
   284  
   285  	c.Check(workerErr, gc.IsNil)
   286  	c.Check(*attemptsP, gc.Equals, 0)
   287  	c.Check(config.Version, gc.Equals, s.oldVersion.Number) // Upgrade didn't happen
   288  	c.Assert(doneLock.IsUnlocked(), jc.IsFalse)
   289  
   290  	// The environment agent-version should still be the new version.
   291  	// It's up to the master to trigger the rollback.
   292  	s.assertEnvironAgentVersion(c, jujuversion.Current)
   293  
   294  	causeMsg := " timed out after 60ms"
   295  	c.Assert(s.logWriter.Log(), jc.LogMatches, []jc.SimpleMessage{
   296  		{loggo.INFO, "waiting for other controllers to be ready for upgrade"},
   297  		{loggo.ERROR, "aborted wait for other controllers: timed out after 60ms"},
   298  		{loggo.ERROR, `upgrade from .+ to .+ for "machine-0" failed \(giving up\): ` +
   299  			"aborted wait for other controllers:" + causeMsg},
   300  	})
   301  	c.Assert(statusCalls, jc.DeepEquals, []StatusCall{{
   302  		status.Error,
   303  		fmt.Sprintf(
   304  			"upgrade to %s failed (giving up): aborted wait for other controllers:"+causeMsg,
   305  			jujuversion.Current),
   306  	}})
   307  }
   308  
   309  func (s *UpgradeSuite) TestSuccessMaster(c *gc.C) {
   310  	// This test checks what happens when an upgrade works on the
   311  	// first attempt on a master controller.
   312  	s.machineIsMaster = true
   313  	info := s.checkSuccess(c, "databaseMaster", func(*state.UpgradeInfo) {})
   314  	c.Assert(info.Status(), gc.Equals, state.UpgradeFinishing)
   315  }
   316  
   317  func (s *UpgradeSuite) TestSuccessSecondary(c *gc.C) {
   318  	// This test checks what happens when an upgrade works on the
   319  	// first attempt on a secondary controller.
   320  	s.machineIsMaster = false
   321  	mungeInfo := func(info *state.UpgradeInfo) {
   322  		// Indicate that the master is done
   323  		err := info.SetStatus(state.UpgradeRunning)
   324  		c.Assert(err, jc.ErrorIsNil)
   325  		err = info.SetStatus(state.UpgradeFinishing)
   326  		c.Assert(err, jc.ErrorIsNil)
   327  	}
   328  	s.checkSuccess(c, "controller", mungeInfo)
   329  }
   330  
   331  func (s *UpgradeSuite) checkSuccess(c *gc.C, target string, mungeInfo func(*state.UpgradeInfo)) *state.UpgradeInfo {
   332  	_, machineIdB, machineIdC := s.create3Controllers(c)
   333  
   334  	// Indicate that machine B and C are ready to upgrade
   335  	vPrevious := s.oldVersion.Number
   336  	vNext := jujuversion.Current
   337  	info, err := s.State.EnsureUpgradeInfo(machineIdB, vPrevious, vNext)
   338  	c.Assert(err, jc.ErrorIsNil)
   339  	_, err = s.State.EnsureUpgradeInfo(machineIdC, vPrevious, vNext)
   340  	c.Assert(err, jc.ErrorIsNil)
   341  
   342  	mungeInfo(info)
   343  
   344  	attemptsP := s.countUpgradeAttempts(nil)
   345  	s.captureLogs(c)
   346  
   347  	workerErr, config, statusCalls, doneLock := s.runUpgradeWorker(c, multiwatcher.JobManageModel)
   348  
   349  	c.Check(workerErr, gc.IsNil)
   350  	c.Check(*attemptsP, gc.Equals, 1)
   351  	c.Check(config.Version, gc.Equals, jujuversion.Current) // Upgrade finished
   352  	c.Assert(statusCalls, jc.DeepEquals, s.makeExpectedStatusCalls(0, succeeds, ""))
   353  	c.Assert(s.logWriter.Log(), jc.LogMatches, s.makeExpectedUpgradeLogs(0, target, succeeds, ""))
   354  	c.Check(doneLock.IsUnlocked(), jc.IsTrue)
   355  
   356  	err = info.Refresh()
   357  	c.Assert(err, jc.ErrorIsNil)
   358  	c.Assert(info.ControllersDone(), jc.DeepEquals, []string{"0"})
   359  	return info
   360  }
   361  
   362  func (s *UpgradeSuite) TestJobsToTargets(c *gc.C) {
   363  	check := func(jobs []multiwatcher.MachineJob, isMaster bool, expectedTargets ...upgrades.Target) {
   364  		c.Assert(jobsToTargets(jobs, isMaster), jc.SameContents, expectedTargets)
   365  	}
   366  
   367  	check([]multiwatcher.MachineJob{multiwatcher.JobHostUnits}, false, upgrades.HostMachine)
   368  	check([]multiwatcher.MachineJob{multiwatcher.JobManageModel}, false, upgrades.Controller)
   369  	check([]multiwatcher.MachineJob{multiwatcher.JobManageModel}, true,
   370  		upgrades.Controller, upgrades.DatabaseMaster)
   371  	check([]multiwatcher.MachineJob{multiwatcher.JobManageModel, multiwatcher.JobHostUnits}, false,
   372  		upgrades.Controller, upgrades.HostMachine)
   373  	check([]multiwatcher.MachineJob{multiwatcher.JobManageModel, multiwatcher.JobHostUnits}, true,
   374  		upgrades.Controller, upgrades.DatabaseMaster, upgrades.HostMachine)
   375  }
   376  
   377  func (s *UpgradeSuite) TestPreUpgradeFail(c *gc.C) {
   378  	s.preUpgradeError = true
   379  	s.captureLogs(c)
   380  
   381  	workerErr, config, statusCalls, doneLock := s.runUpgradeWorker(c, multiwatcher.JobHostUnits)
   382  
   383  	c.Check(workerErr, jc.ErrorIsNil)
   384  	c.Check(config.Version, gc.Equals, s.oldVersion.Number) // Upgrade didn't finish
   385  	c.Assert(doneLock.IsUnlocked(), jc.IsFalse)
   386  
   387  	causeMessage := `machine 0 cannot be upgraded: preupgrade error`
   388  	failMessage := fmt.Sprintf(
   389  		`upgrade from %s to %s for "machine-0" failed \(giving up\): %s`,
   390  		s.oldVersion.Number, jujuversion.Current, causeMessage)
   391  	c.Assert(s.logWriter.Log(), jc.LogMatches, []jc.SimpleMessage{
   392  		{loggo.INFO, "checking that upgrade can proceed"},
   393  		{loggo.ERROR, failMessage},
   394  	})
   395  
   396  	statusMessage := fmt.Sprintf(
   397  		`upgrade to %s failed (giving up): %s`, jujuversion.Current, causeMessage)
   398  	c.Assert(statusCalls, jc.DeepEquals, []StatusCall{{
   399  		status.Error, statusMessage,
   400  	}})
   401  }
   402  
   403  // Run just the upgradesteps worker with a fake machine agent and
   404  // fake agent config.
   405  func (s *UpgradeSuite) runUpgradeWorker(c *gc.C, jobs ...multiwatcher.MachineJob) (
   406  	error, *fakeConfigSetter, []StatusCall, gate.Lock,
   407  ) {
   408  	s.setInstantRetryStrategy(c)
   409  	config := s.makeFakeConfig()
   410  	agent := NewFakeAgent(config)
   411  	doneLock := NewLock(config)
   412  	machineStatus := &testStatusSetter{}
   413  	worker, err := NewWorker(
   414  		doneLock,
   415  		agent,
   416  		nil,
   417  		jobs,
   418  		s.openStateForUpgrade,
   419  		s.preUpgradeSteps,
   420  		machineStatus,
   421  		func(environs.OpenParams) (environs.Environ, error) {
   422  			return nil, errors.NotImplementedf("NewEnviron")
   423  		},
   424  	)
   425  	c.Assert(err, jc.ErrorIsNil)
   426  	return worker.Wait(), config, machineStatus.Calls, doneLock
   427  }
   428  
   429  func (s *UpgradeSuite) openStateForUpgrade() (*state.StatePool, error) {
   430  	newPolicy := stateenvirons.GetNewPolicyFunc()
   431  	pool, err := state.OpenStatePool(state.OpenParams{
   432  		Clock:              clock.WallClock,
   433  		ControllerTag:      s.State.ControllerTag(),
   434  		ControllerModelTag: s.Model.ModelTag(),
   435  		MongoSession:       s.State.MongoSession(),
   436  		NewPolicy:          newPolicy,
   437  	})
   438  	if err != nil {
   439  		return nil, err
   440  	}
   441  	return pool, nil
   442  }
   443  
   444  func (s *UpgradeSuite) preUpgradeSteps(pool *state.StatePool, agentConf agent.Config, isController, isMasterController bool) error {
   445  	if s.preUpgradeError {
   446  		return errors.New("preupgrade error")
   447  	}
   448  	return nil
   449  }
   450  
   451  func (s *UpgradeSuite) makeFakeConfig() *fakeConfigSetter {
   452  	return NewFakeConfigSetter(names.NewMachineTag("0"), s.oldVersion.Number)
   453  }
   454  
   455  func (s *UpgradeSuite) create3Controllers(c *gc.C) (machineIdA, machineIdB, machineIdC string) {
   456  	machine0 := s.Factory.MakeMachine(c, &factory.MachineParams{
   457  		Jobs: []state.MachineJob{state.JobManageModel},
   458  	})
   459  	machineIdA = machine0.Id()
   460  	s.setMachineAlive(c, machineIdA)
   461  
   462  	changes, err := s.State.EnableHA(3, constraints.Value{}, "quantal", nil)
   463  	c.Assert(err, jc.ErrorIsNil)
   464  	c.Assert(len(changes.Added), gc.Equals, 2)
   465  
   466  	machineIdB = changes.Added[0]
   467  	s.setMachineProvisioned(c, machineIdB)
   468  	s.setMachineAlive(c, machineIdB)
   469  
   470  	machineIdC = changes.Added[1]
   471  	s.setMachineProvisioned(c, machineIdC)
   472  	s.setMachineAlive(c, machineIdC)
   473  
   474  	return
   475  }
   476  
   477  func (s *UpgradeSuite) setMachineProvisioned(c *gc.C, id string) {
   478  	machine, err := s.State.Machine(id)
   479  	c.Assert(err, jc.ErrorIsNil)
   480  	err = machine.SetProvisioned(instance.Id(id+"-inst"), "", "nonce", nil)
   481  	c.Assert(err, jc.ErrorIsNil)
   482  }
   483  
   484  func (s *UpgradeSuite) setMachineAlive(c *gc.C, id string) {
   485  	machine, err := s.State.Machine(id)
   486  	c.Assert(err, jc.ErrorIsNil)
   487  	pinger, err := machine.SetAgentPresence()
   488  	c.Assert(err, jc.ErrorIsNil)
   489  	s.AddCleanup(func(c *gc.C) {
   490  		c.Assert(worker.Stop(pinger), jc.ErrorIsNil)
   491  	})
   492  }
   493  
   494  const maxUpgradeRetries = 3
   495  
   496  func (s *UpgradeSuite) setInstantRetryStrategy(c *gc.C) {
   497  	// TODO(katco): 2016-08-09: lp:1611427
   498  	s.PatchValue(&getUpgradeRetryStrategy, func() utils.AttemptStrategy {
   499  		c.Logf("setting instant retry strategy for upgrade: retries=%d", maxUpgradeRetries)
   500  		return utils.AttemptStrategy{
   501  			Delay: 0,
   502  			Min:   maxUpgradeRetries,
   503  		}
   504  	})
   505  }
   506  
   507  func (s *UpgradeSuite) makeExpectedStatusCalls(retryCount int, expectFail bool, failReason string) []StatusCall {
   508  	calls := []StatusCall{{
   509  		status.Started,
   510  		fmt.Sprintf("upgrading to %s", jujuversion.Current),
   511  	}}
   512  	for i := 0; i < retryCount; i++ {
   513  		calls = append(calls, StatusCall{
   514  			status.Error,
   515  			fmt.Sprintf("upgrade to %s failed (will retry): %s", jujuversion.Current, failReason),
   516  		})
   517  	}
   518  	if expectFail {
   519  		calls = append(calls, StatusCall{
   520  			status.Error,
   521  			fmt.Sprintf("upgrade to %s failed (giving up): %s", jujuversion.Current, failReason),
   522  		})
   523  	} else {
   524  		calls = append(calls, StatusCall{status.Started, ""})
   525  	}
   526  	return calls
   527  }
   528  
   529  func (s *UpgradeSuite) makeExpectedUpgradeLogs(retryCount int, target string, expectFail bool, failReason string) []jc.SimpleMessage {
   530  	outLogs := []jc.SimpleMessage{}
   531  
   532  	if target == "databaseMaster" || target == "controller" {
   533  		outLogs = append(outLogs, jc.SimpleMessage{
   534  			loggo.INFO, "waiting for other controllers to be ready for upgrade",
   535  		})
   536  		var waitMsg string
   537  		switch target {
   538  		case "databaseMaster":
   539  			waitMsg = "all controllers are ready to run upgrade steps"
   540  		case "controller":
   541  			waitMsg = "the master has completed its upgrade steps"
   542  		}
   543  		outLogs = append(outLogs, jc.SimpleMessage{loggo.INFO, "finished waiting - " + waitMsg})
   544  	}
   545  
   546  	outLogs = append(outLogs, jc.SimpleMessage{
   547  		loggo.INFO, fmt.Sprintf(
   548  			`starting upgrade from %s to %s for "machine-0"`,
   549  			s.oldVersion.Number, jujuversion.Current),
   550  	})
   551  
   552  	failMessage := fmt.Sprintf(
   553  		`upgrade from %s to %s for "machine-0" failed \(%%s\): %s`,
   554  		s.oldVersion.Number, jujuversion.Current, failReason)
   555  
   556  	for i := 0; i < retryCount; i++ {
   557  		outLogs = append(outLogs, jc.SimpleMessage{loggo.ERROR, fmt.Sprintf(failMessage, "will retry")})
   558  	}
   559  	if expectFail {
   560  		outLogs = append(outLogs, jc.SimpleMessage{loggo.ERROR, fmt.Sprintf(failMessage, "giving up")})
   561  	} else {
   562  		outLogs = append(outLogs, jc.SimpleMessage{loggo.INFO,
   563  			fmt.Sprintf(`upgrade to %s completed successfully.`, jujuversion.Current)})
   564  	}
   565  	return outLogs
   566  }
   567  
   568  func (s *UpgradeSuite) assertEnvironAgentVersion(c *gc.C, expected version.Number) {
   569  	envConfig, err := s.Model.ModelConfig()
   570  	c.Assert(err, jc.ErrorIsNil)
   571  	agentVersion, ok := envConfig.AgentVersion()
   572  	c.Assert(ok, jc.IsTrue)
   573  	c.Assert(agentVersion, gc.Equals, expected)
   574  }
   575  
   576  // NewFakeConfigSetter returns a fakeConfigSetter which implements
   577  // just enough of the agent.ConfigSetter interface to keep the upgrade
   578  // steps worker happy.
   579  func NewFakeConfigSetter(agentTag names.Tag, initialVersion version.Number) *fakeConfigSetter {
   580  	return &fakeConfigSetter{
   581  		AgentTag: agentTag,
   582  		Version:  initialVersion,
   583  	}
   584  }
   585  
   586  type fakeConfigSetter struct {
   587  	agent.ConfigSetter
   588  	AgentTag names.Tag
   589  	Version  version.Number
   590  }
   591  
   592  func (s *fakeConfigSetter) Tag() names.Tag {
   593  	return s.AgentTag
   594  }
   595  
   596  func (s *fakeConfigSetter) UpgradedToVersion() version.Number {
   597  	return s.Version
   598  }
   599  
   600  func (s *fakeConfigSetter) SetUpgradedToVersion(newVersion version.Number) {
   601  	s.Version = newVersion
   602  }
   603  
   604  // NewFakeAgent returns a fakeAgent which implements the agent.Agent
   605  // interface. This provides enough MachineAgent functionality to
   606  // support upgrades.
   607  func NewFakeAgent(confSetter agent.ConfigSetter) *fakeAgent {
   608  	return &fakeAgent{
   609  		config: confSetter,
   610  	}
   611  }
   612  
   613  type fakeAgent struct {
   614  	config agent.ConfigSetter
   615  }
   616  
   617  func (a *fakeAgent) CurrentConfig() agent.Config {
   618  	return a.config
   619  }
   620  
   621  func (a *fakeAgent) ChangeConfig(mutate agent.ConfigMutator) error {
   622  	return mutate(a.config)
   623  }
   624  
   625  type StatusCall struct {
   626  	Status status.Status
   627  	Info   string
   628  }
   629  
   630  type testStatusSetter struct {
   631  	Calls []StatusCall
   632  }
   633  
   634  func (s *testStatusSetter) SetStatus(status status.Status, info string, _ map[string]interface{}) error {
   635  	s.Calls = append(s.Calls, StatusCall{status, info})
   636  	return nil
   637  }