github.com/mwhudson/juju@v0.0.0-20160512215208-90ff01f3497f/featuretests/upgrade_test.go (about)

     1  // Copyright 2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  // These tests check aspects of upgrade behaviour of the machine agent
     5  // as a whole.
     6  
     7  package featuretests
     8  
     9  import (
    10  	"reflect"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/juju/errors"
    15  	"github.com/juju/names"
    16  	jc "github.com/juju/testing/checkers"
    17  	"github.com/juju/utils"
    18  	"github.com/juju/utils/arch"
    19  	pacman "github.com/juju/utils/packaging/manager"
    20  	"github.com/juju/utils/series"
    21  	gc "gopkg.in/check.v1"
    22  
    23  	"github.com/juju/juju/agent"
    24  	"github.com/juju/juju/api"
    25  	"github.com/juju/juju/apiserver/params"
    26  	agentcmd "github.com/juju/juju/cmd/jujud/agent"
    27  	agenttesting "github.com/juju/juju/cmd/jujud/agent/testing"
    28  	cmdutil "github.com/juju/juju/cmd/jujud/util"
    29  	"github.com/juju/juju/constraints"
    30  	envtesting "github.com/juju/juju/environs/testing"
    31  	jujutesting "github.com/juju/juju/juju/testing"
    32  	"github.com/juju/juju/mongo"
    33  	"github.com/juju/juju/rpc"
    34  	"github.com/juju/juju/state"
    35  	"github.com/juju/juju/state/watcher"
    36  	coretesting "github.com/juju/juju/testing"
    37  	"github.com/juju/juju/testing/factory"
    38  	"github.com/juju/juju/tools"
    39  	"github.com/juju/juju/upgrades"
    40  	jujuversion "github.com/juju/juju/version"
    41  	"github.com/juju/juju/worker/upgrader"
    42  	"github.com/juju/juju/worker/upgradesteps"
    43  	"github.com/juju/version"
    44  )
    45  
    46  const (
    47  	FullAPIExposed       = true
    48  	RestrictedAPIExposed = false
    49  )
    50  
    51  var ShortAttempt = &utils.AttemptStrategy{
    52  	Total: time.Second * 10,
    53  	Delay: time.Millisecond * 200,
    54  }
    55  
    56  type upgradeSuite struct {
    57  	agenttesting.AgentSuite
    58  	oldVersion version.Binary
    59  }
    60  
    61  func (s *upgradeSuite) SetUpTest(c *gc.C) {
    62  	s.AgentSuite.SetUpTest(c)
    63  
    64  	s.oldVersion = version.Binary{
    65  		Number: jujuversion.Current,
    66  		Arch:   arch.HostArch(),
    67  		Series: series.HostSeries(),
    68  	}
    69  	s.oldVersion.Major = 1
    70  	s.oldVersion.Minor = 16
    71  
    72  	// Don't wait so long in tests.
    73  	s.PatchValue(&upgradesteps.UpgradeStartTimeoutMaster, time.Duration(time.Millisecond*50))
    74  	s.PatchValue(&upgradesteps.UpgradeStartTimeoutSecondary, time.Duration(time.Millisecond*60))
    75  
    76  	// Ensure we don't fail disk space check.
    77  	s.PatchValue(&upgrades.MinDiskSpaceMib, uint64(0))
    78  
    79  	// Consume apt-get commands that get run before upgrades.
    80  	aptCmds := s.AgentSuite.HookCommandOutput(&pacman.CommandOutput, nil, nil)
    81  	go func() {
    82  		for _ = range aptCmds {
    83  		}
    84  	}()
    85  
    86  	// TODO(mjs) - the following should maybe be part of AgentSuite.SetUpTest()
    87  	s.PatchValue(&cmdutil.EnsureMongoServer, func(mongo.EnsureServerParams) error {
    88  		return nil
    89  	})
    90  	s.PatchValue(&agentcmd.ProductionMongoWriteConcern, false)
    91  
    92  }
    93  
    94  func (s *upgradeSuite) TestLoginsDuringUpgrade(c *gc.C) {
    95  	coretesting.SkipIfWindowsBug(c, "lp:1446885")
    96  
    97  	// Create machine agent to upgrade
    98  	machine, machine0Conf := s.makeStateAgentVersion(c, s.oldVersion)
    99  
   100  	// Set up a second machine to log in as. API logins are tested
   101  	// manually so there's no need to actually start this machine.
   102  	machine1, password := s.Factory.MakeMachineReturningPassword(c, &factory.MachineParams{
   103  		Nonce: agent.BootstrapNonce,
   104  	})
   105  	machine1Conf, _ := s.PrimeAgent(c, machine1.Tag(), password)
   106  
   107  	// Mock out upgrade logic, using a channel so that the test knows
   108  	// when upgrades have started and can control when upgrades
   109  	// should finish.
   110  	upgradeCh := make(chan bool)
   111  	abort := make(chan bool)
   112  	fakePerformUpgrade := func(version.Number, []upgrades.Target, upgrades.Context) error {
   113  		// Signal that upgrade has started.
   114  		select {
   115  		case upgradeCh <- true:
   116  		case <-abort:
   117  			return nil
   118  		}
   119  
   120  		// Wait for signal that upgrades should finish.
   121  		select {
   122  		case <-upgradeCh:
   123  		case <-abort:
   124  			return nil
   125  		}
   126  		return nil
   127  	}
   128  	s.PatchValue(&upgradesteps.PerformUpgrade, fakePerformUpgrade)
   129  
   130  	a := s.newAgent(c, machine)
   131  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
   132  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
   133  
   134  	c.Assert(waitForUpgradeToStart(upgradeCh), jc.IsTrue)
   135  
   136  	// Only user and local logins are allowed during upgrade. Users get a restricted API.
   137  	s.checkLoginToAPIAsUser(c, machine0Conf, RestrictedAPIExposed)
   138  	c.Assert(canLoginToAPIAsMachine(c, machine0Conf, machine0Conf), jc.IsTrue)
   139  	c.Assert(canLoginToAPIAsMachine(c, machine1Conf, machine0Conf), jc.IsFalse)
   140  
   141  	close(upgradeCh) // Allow upgrade to complete
   142  
   143  	waitForUpgradeToFinish(c, machine0Conf)
   144  
   145  	// All logins are allowed after upgrade
   146  	s.checkLoginToAPIAsUser(c, machine0Conf, FullAPIExposed)
   147  	c.Assert(canLoginToAPIAsMachine(c, machine0Conf, machine0Conf), jc.IsTrue)
   148  	c.Assert(canLoginToAPIAsMachine(c, machine1Conf, machine0Conf), jc.IsTrue)
   149  }
   150  
   151  func (s *upgradeSuite) TestDowngradeOnMasterWhenOtherControllerDoesntStartUpgrade(c *gc.C) {
   152  	coretesting.SkipIfWindowsBug(c, "lp:1446885")
   153  
   154  	// This test checks that the master triggers a downgrade if one of
   155  	// the other controller fails to signal it is ready for upgrade.
   156  	//
   157  	// This test is functional, ensuring that the upgrader worker
   158  	// terminates the machine agent with the UpgradeReadyError which
   159  	// makes the downgrade happen.
   160  
   161  	// Speed up the watcher frequency to make the test much faster.
   162  	s.PatchValue(&watcher.Period, 200*time.Millisecond)
   163  
   164  	// Provide (fake) tools so that the upgrader has something to downgrade to.
   165  	envtesting.AssertUploadFakeToolsVersions(
   166  		c, s.DefaultToolsStorage, s.Environ.Config().AgentStream(), s.Environ.Config().AgentStream(), s.oldVersion)
   167  
   168  	// Create 3 controllers
   169  	machineA, _ := s.makeStateAgentVersion(c, s.oldVersion)
   170  	changes, err := s.State.EnableHA(3, constraints.Value{}, "quantal", nil)
   171  	c.Assert(err, jc.ErrorIsNil)
   172  	c.Assert(len(changes.Added), gc.Equals, 2)
   173  	machineB, _, _ := s.configureMachine(c, changes.Added[0], s.oldVersion)
   174  	s.configureMachine(c, changes.Added[1], s.oldVersion)
   175  
   176  	// One of the other controllers is ready for upgrade (but machine C isn't).
   177  	info, err := s.State.EnsureUpgradeInfo(machineB.Id(), s.oldVersion.Number, jujuversion.Current)
   178  	c.Assert(err, jc.ErrorIsNil)
   179  
   180  	// Ensure the agent will think it's the master controller.
   181  	fakeIsMachineMaster := func(*state.State, string) (bool, error) {
   182  		return true, nil
   183  	}
   184  	s.PatchValue(&upgradesteps.IsMachineMaster, fakeIsMachineMaster)
   185  
   186  	// Start the agent
   187  	agent := s.newAgent(c, machineA)
   188  	defer agent.Stop()
   189  	agentDone := make(chan error)
   190  	go func() {
   191  		agentDone <- agent.Run(nil)
   192  	}()
   193  
   194  	select {
   195  	case agentErr := <-agentDone:
   196  		upgradeReadyErr, ok := agentErr.(*upgrader.UpgradeReadyError)
   197  		if !ok {
   198  			c.Fatalf("didn't see UpgradeReadyError, instead got: %v", agentErr)
   199  		}
   200  		// Confirm that the downgrade is back to the previous version.
   201  		current := version.Binary{
   202  			Number: jujuversion.Current,
   203  			Arch:   arch.HostArch(),
   204  			Series: series.HostSeries(),
   205  		}
   206  		c.Assert(upgradeReadyErr.OldTools, gc.Equals, current)
   207  		c.Assert(upgradeReadyErr.NewTools, gc.Equals, s.oldVersion)
   208  
   209  	case <-time.After(coretesting.LongWait):
   210  		c.Fatal("machine agent did not exit as expected")
   211  	}
   212  
   213  	// UpgradeInfo doc should now be archived.
   214  	err = info.Refresh()
   215  	c.Assert(err, gc.ErrorMatches, "current upgrade info not found")
   216  }
   217  
   218  // TODO(mjs) - the following should maybe be part of AgentSuite
   219  func (s *upgradeSuite) newAgent(c *gc.C, m *state.Machine) *agentcmd.MachineAgent {
   220  	agentConf := agentcmd.NewAgentConf(s.DataDir())
   221  	agentConf.ReadConfig(m.Tag().String())
   222  	machineAgentFactory := agentcmd.MachineAgentFactoryFn(agentConf, nil, c.MkDir())
   223  	return machineAgentFactory(m.Id())
   224  }
   225  
   226  // TODO(mjs) - the following should maybe be part of AgentSuite
   227  func (s *upgradeSuite) makeStateAgentVersion(c *gc.C, vers version.Binary) (*state.Machine, agent.ConfigSetterWriter) {
   228  	machine := s.Factory.MakeMachine(c, &factory.MachineParams{
   229  		Jobs:  []state.MachineJob{state.JobManageModel},
   230  		Nonce: agent.BootstrapNonce,
   231  	})
   232  	_, config, _ := s.configureMachine(c, machine.Id(), vers)
   233  	return machine, config
   234  }
   235  
   236  const initialMachinePassword = "machine-password-1234567890"
   237  
   238  // TODO(mjs) - the following should maybe be part of AgentSuite
   239  func (s *upgradeSuite) configureMachine(c *gc.C, machineId string, vers version.Binary) (
   240  	machine *state.Machine, agentConfig agent.ConfigSetterWriter, tools *tools.Tools,
   241  ) {
   242  	m, err := s.State.Machine(machineId)
   243  	c.Assert(err, jc.ErrorIsNil)
   244  
   245  	// Provision the machine if it isn't already
   246  	if _, err := m.InstanceId(); err != nil {
   247  		inst, md := jujutesting.AssertStartInstance(c, s.Environ, machineId)
   248  		c.Assert(m.SetProvisioned(inst.Id(), agent.BootstrapNonce, md), jc.ErrorIsNil)
   249  	}
   250  
   251  	// Make the machine live
   252  	pinger, err := m.SetAgentPresence()
   253  	c.Assert(err, jc.ErrorIsNil)
   254  	s.AddCleanup(func(c *gc.C) { pinger.Stop() })
   255  
   256  	// Set up the new machine.
   257  	err = m.SetAgentVersion(vers)
   258  	c.Assert(err, jc.ErrorIsNil)
   259  	err = m.SetPassword(initialMachinePassword)
   260  	c.Assert(err, jc.ErrorIsNil)
   261  	tag := m.Tag()
   262  	if m.IsManager() {
   263  		err = m.SetMongoPassword(initialMachinePassword)
   264  		c.Assert(err, jc.ErrorIsNil)
   265  		agentConfig, tools = s.PrimeStateAgentVersion(c, tag, initialMachinePassword, vers)
   266  		info, ok := agentConfig.StateServingInfo()
   267  		c.Assert(ok, jc.IsTrue)
   268  		ssi := cmdutil.ParamsStateServingInfoToStateStateServingInfo(info)
   269  		err = s.State.SetStateServingInfo(ssi)
   270  		c.Assert(err, jc.ErrorIsNil)
   271  	} else {
   272  		agentConfig, tools = s.PrimeAgentVersion(c, tag, initialMachinePassword, vers)
   273  	}
   274  	err = agentConfig.Write()
   275  	c.Assert(err, jc.ErrorIsNil)
   276  	return m, agentConfig, tools
   277  }
   278  
   279  func canLoginToAPIAsMachine(c *gc.C, fromConf, toConf agent.Config) bool {
   280  	fromInfo, ok := fromConf.APIInfo()
   281  	c.Assert(ok, jc.IsTrue)
   282  	toInfo, ok := toConf.APIInfo()
   283  	c.Assert(ok, jc.IsTrue)
   284  	fromInfo.Addrs = toInfo.Addrs
   285  	var err error
   286  	var apiState api.Connection
   287  	for a := ShortAttempt.Start(); a.Next(); {
   288  		apiState, err = api.Open(fromInfo, upgradeTestDialOpts)
   289  		// If space discovery is still in progress we retry.
   290  		if err != nil && strings.Contains(err.Error(), "spaces are still being discovered") {
   291  			if !a.HasNext() {
   292  				return false
   293  			}
   294  			continue
   295  		}
   296  		if apiState != nil {
   297  			apiState.Close()
   298  		}
   299  		break
   300  	}
   301  	return apiState != nil && err == nil
   302  }
   303  
   304  func (s *upgradeSuite) checkLoginToAPIAsUser(c *gc.C, conf agent.Config, expectFullApi bool) {
   305  	var err error
   306  	// Multiple attempts may be necessary because there is a small gap
   307  	// between the post-upgrade version being written to the agent's
   308  	// config (as observed by waitForUpgradeToFinish) and the end of
   309  	// "upgrade mode" (i.e. when the agent's UpgradeComplete channel
   310  	// is closed). Without this tests that call checkLoginToAPIAsUser
   311  	// can occasionally fail.
   312  	for a := coretesting.LongAttempt.Start(); a.Next(); {
   313  		err = s.attemptRestrictedAPIAsUser(c, conf)
   314  		switch expectFullApi {
   315  		case FullAPIExposed:
   316  			if err == nil {
   317  				return
   318  			}
   319  		case RestrictedAPIExposed:
   320  			if reflect.DeepEqual(errors.Cause(err), &rpc.RequestError{Message: params.CodeUpgradeInProgress, Code: params.CodeUpgradeInProgress}) {
   321  				return
   322  			}
   323  		}
   324  	}
   325  	c.Fatalf("timed out waiting for expected API behaviour. last error was: %v", err)
   326  }
   327  
   328  func (s *upgradeSuite) attemptRestrictedAPIAsUser(c *gc.C, conf agent.Config) error {
   329  	info, ok := conf.APIInfo()
   330  	c.Assert(ok, jc.IsTrue)
   331  	info.Tag = s.AdminUserTag(c)
   332  	info.Password = "dummy-secret"
   333  	info.Nonce = ""
   334  
   335  	apiState, err := api.Open(info, upgradeTestDialOpts)
   336  	if err != nil {
   337  		// If space discovery is in progress we'll get an error here
   338  		// and need to retry.
   339  		return err
   340  	}
   341  	defer apiState.Close()
   342  
   343  	// this call should always work
   344  	var result params.FullStatus
   345  	err = apiState.APICall("Client", 1, "", "FullStatus", nil, &result)
   346  	c.Assert(err, jc.ErrorIsNil)
   347  
   348  	// this call should only work if API is not restricted
   349  	return apiState.APICall("Client", 1, "", "WatchAll", nil, nil)
   350  }
   351  
   352  var upgradeTestDialOpts = api.DialOpts{
   353  	Timeout:             2 * time.Minute,
   354  	RetryDelay:          250 * time.Millisecond,
   355  	DialAddressInterval: 50 * time.Millisecond,
   356  }
   357  
   358  func waitForUpgradeToStart(upgradeCh chan bool) bool {
   359  	select {
   360  	case <-upgradeCh:
   361  		return true
   362  	case <-time.After(coretesting.LongWait):
   363  		return false
   364  	}
   365  }
   366  
   367  func waitForUpgradeToFinish(c *gc.C, conf agent.Config) {
   368  	success := false
   369  	for attempt := coretesting.LongAttempt.Start(); attempt.Next(); {
   370  		diskConf := readConfigFromDisk(c, conf.DataDir(), conf.Tag())
   371  		success = diskConf.UpgradedToVersion() == jujuversion.Current
   372  		if success {
   373  			break
   374  		}
   375  	}
   376  	c.Assert(success, jc.IsTrue)
   377  }
   378  
   379  func readConfigFromDisk(c *gc.C, dir string, tag names.Tag) agent.Config {
   380  	conf, err := agent.ReadConfig(agent.ConfigPath(dir, tag))
   381  	c.Assert(err, jc.ErrorIsNil)
   382  	return conf
   383  }