github.com/wallyworld/juju@v0.0.0-20161013125918-6cf1bc9d917a/featuretests/upgrade_test.go (about)

     1  // Copyright 2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  // These tests check aspects of upgrade behaviour of the machine agent
     5  // as a whole.
     6  
     7  package featuretests
     8  
     9  import (
    10  	"strings"
    11  	"time"
    12  
    13  	jc "github.com/juju/testing/checkers"
    14  	"github.com/juju/utils"
    15  	"github.com/juju/utils/arch"
    16  	pacman "github.com/juju/utils/packaging/manager"
    17  	"github.com/juju/utils/series"
    18  	gc "gopkg.in/check.v1"
    19  	"gopkg.in/juju/names.v2"
    20  
    21  	"github.com/juju/juju/agent"
    22  	"github.com/juju/juju/api"
    23  	"github.com/juju/juju/apiserver/params"
    24  	agentcmd "github.com/juju/juju/cmd/jujud/agent"
    25  	"github.com/juju/juju/cmd/jujud/agent/agenttest"
    26  	cmdutil "github.com/juju/juju/cmd/jujud/util"
    27  	"github.com/juju/juju/constraints"
    28  	envtesting "github.com/juju/juju/environs/testing"
    29  	jujutesting "github.com/juju/juju/juju/testing"
    30  	"github.com/juju/juju/mongo"
    31  	"github.com/juju/juju/state"
    32  	"github.com/juju/juju/state/watcher"
    33  	coretesting "github.com/juju/juju/testing"
    34  	"github.com/juju/juju/testing/factory"
    35  	"github.com/juju/juju/tools"
    36  	"github.com/juju/juju/upgrades"
    37  	jujuversion "github.com/juju/juju/version"
    38  	"github.com/juju/juju/worker/upgrader"
    39  	"github.com/juju/juju/worker/upgradesteps"
    40  	"github.com/juju/version"
    41  )
    42  
    43  const (
    44  	FullAPIExposed       = true
    45  	RestrictedAPIExposed = false
    46  )
    47  
    48  // TODO(katco): 2016-08-09: lp:1611427
    49  var ShortAttempt = &utils.AttemptStrategy{
    50  	Total: time.Second * 10,
    51  	Delay: time.Millisecond * 200,
    52  }
    53  
    54  type upgradeSuite struct {
    55  	agenttest.AgentSuite
    56  	oldVersion version.Binary
    57  }
    58  
    59  func (s *upgradeSuite) SetUpTest(c *gc.C) {
    60  	s.AgentSuite.SetUpTest(c)
    61  
    62  	s.oldVersion = version.Binary{
    63  		Number: jujuversion.Current,
    64  		Arch:   arch.HostArch(),
    65  		Series: series.HostSeries(),
    66  	}
    67  	s.oldVersion.Major = 1
    68  	s.oldVersion.Minor = 16
    69  
    70  	// Don't wait so long in tests.
    71  	s.PatchValue(&upgradesteps.UpgradeStartTimeoutMaster, time.Duration(time.Millisecond*50))
    72  	s.PatchValue(&upgradesteps.UpgradeStartTimeoutSecondary, time.Duration(time.Millisecond*60))
    73  
    74  	// Ensure we don't fail disk space check.
    75  	s.PatchValue(&upgrades.MinDiskSpaceMib, uint64(0))
    76  
    77  	// Consume apt-get commands that get run before upgrades.
    78  	aptCmds := s.AgentSuite.HookCommandOutput(&pacman.CommandOutput, nil, nil)
    79  	go func() {
    80  		for _ = range aptCmds {
    81  		}
    82  	}()
    83  
    84  	// TODO(mjs) - the following should maybe be part of AgentSuite.SetUpTest()
    85  	s.PatchValue(&cmdutil.EnsureMongoServer, func(mongo.EnsureServerParams) error {
    86  		return nil
    87  	})
    88  	s.PatchValue(&agentcmd.ProductionMongoWriteConcern, false)
    89  
    90  }
    91  
    92  func (s *upgradeSuite) TestLoginsDuringUpgrade(c *gc.C) {
    93  	coretesting.SkipIfWindowsBug(c, "lp:1446885")
    94  
    95  	// Create machine agent to upgrade
    96  	machine, machine0Conf := s.makeStateAgentVersion(c, s.oldVersion)
    97  
    98  	// Set up a second machine to log in as. API logins are tested
    99  	// manually so there's no need to actually start this machine.
   100  	machine1, password := s.Factory.MakeMachineReturningPassword(c, &factory.MachineParams{
   101  		Nonce: agent.BootstrapNonce,
   102  	})
   103  	machine1Conf, _ := s.PrimeAgent(c, machine1.Tag(), password)
   104  
   105  	// Mock out upgrade logic, using a channel so that the test knows
   106  	// when upgrades have started and can control when upgrades
   107  	// should finish.
   108  	upgradeCh := make(chan bool)
   109  	abort := make(chan bool)
   110  	fakePerformUpgrade := func(version.Number, []upgrades.Target, upgrades.Context) error {
   111  		// Signal that upgrade has started.
   112  		select {
   113  		case upgradeCh <- true:
   114  		case <-abort:
   115  			return nil
   116  		}
   117  
   118  		// Wait for signal that upgrades should finish.
   119  		select {
   120  		case <-upgradeCh:
   121  		case <-abort:
   122  			return nil
   123  		}
   124  		return nil
   125  	}
   126  	s.PatchValue(&upgradesteps.PerformUpgrade, fakePerformUpgrade)
   127  
   128  	a := s.newAgent(c, machine)
   129  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
   130  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
   131  
   132  	c.Assert(waitForUpgradeToStart(upgradeCh), jc.IsTrue)
   133  
   134  	// Only user and local logins are allowed during upgrade. Users get a restricted API.
   135  	s.checkLoginToAPIAsUser(c, machine0Conf, RestrictedAPIExposed)
   136  	c.Assert(canLoginToAPIAsMachine(c, machine0Conf, machine0Conf), jc.IsTrue)
   137  	c.Assert(canLoginToAPIAsMachine(c, machine1Conf, machine0Conf), jc.IsFalse)
   138  
   139  	close(upgradeCh) // Allow upgrade to complete
   140  
   141  	waitForUpgradeToFinish(c, machine0Conf)
   142  
   143  	// All logins are allowed after upgrade
   144  	s.checkLoginToAPIAsUser(c, machine0Conf, FullAPIExposed)
   145  	c.Assert(canLoginToAPIAsMachine(c, machine0Conf, machine0Conf), jc.IsTrue)
   146  	c.Assert(canLoginToAPIAsMachine(c, machine1Conf, machine0Conf), jc.IsTrue)
   147  }
   148  
   149  func (s *upgradeSuite) TestDowngradeOnMasterWhenOtherControllerDoesntStartUpgrade(c *gc.C) {
   150  	coretesting.SkipIfWindowsBug(c, "lp:1446885")
   151  
   152  	// This test checks that the master triggers a downgrade if one of
   153  	// the other controller fails to signal it is ready for upgrade.
   154  	//
   155  	// This test is functional, ensuring that the upgrader worker
   156  	// terminates the machine agent with the UpgradeReadyError which
   157  	// makes the downgrade happen.
   158  
   159  	// Speed up the watcher frequency to make the test much faster.
   160  	s.PatchValue(&watcher.Period, 200*time.Millisecond)
   161  
   162  	// Provide (fake) tools so that the upgrader has something to downgrade to.
   163  	envtesting.AssertUploadFakeToolsVersions(
   164  		c, s.DefaultToolsStorage, s.Environ.Config().AgentStream(), s.Environ.Config().AgentStream(), s.oldVersion)
   165  
   166  	// Create 3 controllers
   167  	machineA, _ := s.makeStateAgentVersion(c, s.oldVersion)
   168  	changes, err := s.State.EnableHA(3, constraints.Value{}, "quantal", nil)
   169  	c.Assert(err, jc.ErrorIsNil)
   170  	c.Assert(len(changes.Added), gc.Equals, 2)
   171  	machineB, _, _ := s.configureMachine(c, changes.Added[0], s.oldVersion)
   172  	s.configureMachine(c, changes.Added[1], s.oldVersion)
   173  
   174  	// One of the other controllers is ready for upgrade (but machine C isn't).
   175  	info, err := s.State.EnsureUpgradeInfo(machineB.Id(), s.oldVersion.Number, jujuversion.Current)
   176  	c.Assert(err, jc.ErrorIsNil)
   177  
   178  	// Ensure the agent will think it's the master controller.
   179  	fakeIsMachineMaster := func(*state.State, string) (bool, error) {
   180  		return true, nil
   181  	}
   182  	s.PatchValue(&upgradesteps.IsMachineMaster, fakeIsMachineMaster)
   183  
   184  	// Start the agent
   185  	agent := s.newAgent(c, machineA)
   186  	defer agent.Stop()
   187  	agentDone := make(chan error)
   188  	go func() {
   189  		agentDone <- agent.Run(nil)
   190  	}()
   191  
   192  	select {
   193  	case agentErr := <-agentDone:
   194  		upgradeReadyErr, ok := agentErr.(*upgrader.UpgradeReadyError)
   195  		if !ok {
   196  			c.Fatalf("didn't see UpgradeReadyError, instead got: %v", agentErr)
   197  		}
   198  		// Confirm that the downgrade is back to the previous version.
   199  		current := version.Binary{
   200  			Number: jujuversion.Current,
   201  			Arch:   arch.HostArch(),
   202  			Series: series.HostSeries(),
   203  		}
   204  		c.Assert(upgradeReadyErr.OldTools, gc.Equals, current)
   205  		c.Assert(upgradeReadyErr.NewTools, gc.Equals, s.oldVersion)
   206  
   207  	case <-time.After(coretesting.LongWait):
   208  		c.Fatal("machine agent did not exit as expected")
   209  	}
   210  
   211  	// UpgradeInfo doc should now be archived.
   212  	err = info.Refresh()
   213  	c.Assert(err, gc.ErrorMatches, "current upgrade info not found")
   214  }
   215  
   216  // TODO(mjs) - the following should maybe be part of AgentSuite
   217  func (s *upgradeSuite) newAgent(c *gc.C, m *state.Machine) *agentcmd.MachineAgent {
   218  	agentConf := agentcmd.NewAgentConf(s.DataDir())
   219  	agentConf.ReadConfig(m.Tag().String())
   220  	machineAgentFactory := agentcmd.MachineAgentFactoryFn(agentConf, nil, c.MkDir())
   221  	return machineAgentFactory(m.Id())
   222  }
   223  
   224  // TODO(mjs) - the following should maybe be part of AgentSuite
   225  func (s *upgradeSuite) makeStateAgentVersion(c *gc.C, vers version.Binary) (*state.Machine, agent.ConfigSetterWriter) {
   226  	machine := s.Factory.MakeMachine(c, &factory.MachineParams{
   227  		Jobs:  []state.MachineJob{state.JobManageModel},
   228  		Nonce: agent.BootstrapNonce,
   229  	})
   230  	_, config, _ := s.configureMachine(c, machine.Id(), vers)
   231  	return machine, config
   232  }
   233  
   234  const initialMachinePassword = "machine-password-1234567890"
   235  
   236  // TODO(mjs) - the following should maybe be part of AgentSuite
   237  func (s *upgradeSuite) configureMachine(c *gc.C, machineId string, vers version.Binary) (
   238  	machine *state.Machine, agentConfig agent.ConfigSetterWriter, tools *tools.Tools,
   239  ) {
   240  	m, err := s.State.Machine(machineId)
   241  	c.Assert(err, jc.ErrorIsNil)
   242  
   243  	// Provision the machine if it isn't already
   244  	if _, err := m.InstanceId(); err != nil {
   245  		inst, md := jujutesting.AssertStartInstance(c, s.Environ, s.ControllerConfig.ControllerUUID(), machineId)
   246  		c.Assert(m.SetProvisioned(inst.Id(), agent.BootstrapNonce, md), jc.ErrorIsNil)
   247  	}
   248  
   249  	// Make the machine live
   250  	pinger, err := m.SetAgentPresence()
   251  	c.Assert(err, jc.ErrorIsNil)
   252  	s.AddCleanup(func(c *gc.C) { pinger.Stop() })
   253  
   254  	// Set up the new machine.
   255  	err = m.SetAgentVersion(vers)
   256  	c.Assert(err, jc.ErrorIsNil)
   257  	err = m.SetPassword(initialMachinePassword)
   258  	c.Assert(err, jc.ErrorIsNil)
   259  	tag := m.Tag()
   260  	if m.IsManager() {
   261  		err = m.SetMongoPassword(initialMachinePassword)
   262  		c.Assert(err, jc.ErrorIsNil)
   263  		agentConfig, tools = s.PrimeStateAgentVersion(c, tag, initialMachinePassword, vers)
   264  		info, ok := agentConfig.StateServingInfo()
   265  		c.Assert(ok, jc.IsTrue)
   266  		ssi := cmdutil.ParamsStateServingInfoToStateStateServingInfo(info)
   267  		err = s.State.SetStateServingInfo(ssi)
   268  		c.Assert(err, jc.ErrorIsNil)
   269  	} else {
   270  		agentConfig, tools = s.PrimeAgentVersion(c, tag, initialMachinePassword, vers)
   271  	}
   272  	err = agentConfig.Write()
   273  	c.Assert(err, jc.ErrorIsNil)
   274  	return m, agentConfig, tools
   275  }
   276  
   277  func canLoginToAPIAsMachine(c *gc.C, fromConf, toConf agent.Config) bool {
   278  	fromInfo, ok := fromConf.APIInfo()
   279  	c.Assert(ok, jc.IsTrue)
   280  	toInfo, ok := toConf.APIInfo()
   281  	c.Assert(ok, jc.IsTrue)
   282  	fromInfo.Addrs = toInfo.Addrs
   283  	var err error
   284  	var apiState api.Connection
   285  	for a := ShortAttempt.Start(); a.Next(); {
   286  		apiState, err = api.Open(fromInfo, upgradeTestDialOpts)
   287  		// If space discovery is still in progress we retry.
   288  		if err != nil && strings.Contains(err.Error(), "spaces are still being discovered") {
   289  			if !a.HasNext() {
   290  				return false
   291  			}
   292  			continue
   293  		}
   294  		if apiState != nil {
   295  			apiState.Close()
   296  		}
   297  		break
   298  	}
   299  	return apiState != nil && err == nil
   300  }
   301  
   302  func (s *upgradeSuite) checkLoginToAPIAsUser(c *gc.C, conf agent.Config, expectFullAPI bool) {
   303  	var err error
   304  	// Multiple attempts may be necessary because there is a small gap
   305  	// between the post-upgrade version being written to the agent's
   306  	// config (as observed by waitForUpgradeToFinish) and the end of
   307  	// "upgrade mode" (i.e. when the agent's UpgradeComplete channel
   308  	// is closed). Without this tests that call checkLoginToAPIAsUser
   309  	// can occasionally fail.
   310  	for a := coretesting.LongAttempt.Start(); a.Next(); {
   311  		err = s.attemptRestrictedAPIAsUser(c, conf)
   312  		switch expectFullAPI {
   313  		case FullAPIExposed:
   314  			if err == nil {
   315  				return
   316  			}
   317  		case RestrictedAPIExposed:
   318  			if params.IsCodeUpgradeInProgress(err) {
   319  				return
   320  			}
   321  		}
   322  	}
   323  	c.Fatalf("timed out waiting for expected API behaviour. last error was: %v", err)
   324  }
   325  
   326  func (s *upgradeSuite) attemptRestrictedAPIAsUser(c *gc.C, conf agent.Config) error {
   327  	info, ok := conf.APIInfo()
   328  	c.Assert(ok, jc.IsTrue)
   329  	info.Tag = s.AdminUserTag(c)
   330  	info.Password = "dummy-secret"
   331  	info.Nonce = ""
   332  
   333  	apiState, err := api.Open(info, upgradeTestDialOpts)
   334  	if err != nil {
   335  		// If space discovery is in progress we'll get an error here
   336  		// and need to retry.
   337  		return err
   338  	}
   339  	defer apiState.Close()
   340  
   341  	// this call should always work
   342  	var result params.FullStatus
   343  	err = apiState.APICall("Client", 1, "", "FullStatus", nil, &result)
   344  	c.Assert(err, jc.ErrorIsNil)
   345  
   346  	// this call should only work if API is not restricted
   347  	return apiState.APICall("Client", 1, "", "WatchAll", nil, nil)
   348  }
   349  
   350  var upgradeTestDialOpts = api.DialOpts{
   351  	Timeout:             2 * time.Minute,
   352  	RetryDelay:          250 * time.Millisecond,
   353  	DialAddressInterval: 50 * time.Millisecond,
   354  }
   355  
   356  func waitForUpgradeToStart(upgradeCh chan bool) bool {
   357  	select {
   358  	case <-upgradeCh:
   359  		return true
   360  	case <-time.After(coretesting.LongWait):
   361  		return false
   362  	}
   363  }
   364  
   365  func waitForUpgradeToFinish(c *gc.C, conf agent.Config) {
   366  	success := false
   367  	for attempt := coretesting.LongAttempt.Start(); attempt.Next(); {
   368  		diskConf := readConfigFromDisk(c, conf.DataDir(), conf.Tag())
   369  		success = diskConf.UpgradedToVersion() == jujuversion.Current
   370  		if success {
   371  			break
   372  		}
   373  	}
   374  	c.Assert(success, jc.IsTrue)
   375  }
   376  
   377  func readConfigFromDisk(c *gc.C, dir string, tag names.Tag) agent.Config {
   378  	conf, err := agent.ReadConfig(agent.ConfigPath(dir, tag))
   379  	c.Assert(err, jc.ErrorIsNil)
   380  	return conf
   381  }