github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/featuretests/upgrade_test.go (about)

     1  // Copyright 2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  // These tests check aspects of upgrade behaviour of the machine agent
     5  // as a whole.
     6  
     7  package featuretests
     8  
     9  import (
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/juju/errors"
    14  	"github.com/juju/os/series"
    15  	pacman "github.com/juju/packaging/manager"
    16  	jc "github.com/juju/testing/checkers"
    17  	"github.com/juju/utils"
    18  	"github.com/juju/utils/arch"
    19  	"github.com/juju/version"
    20  	gc "gopkg.in/check.v1"
    21  	"gopkg.in/juju/names.v2"
    22  
    23  	"github.com/juju/juju/agent"
    24  	"github.com/juju/juju/api"
    25  	"github.com/juju/juju/apiserver/params"
    26  	agentcmd "github.com/juju/juju/cmd/jujud/agent"
    27  	"github.com/juju/juju/cmd/jujud/agent/agenttest"
    28  	cmdutil "github.com/juju/juju/cmd/jujud/util"
    29  	"github.com/juju/juju/core/constraints"
    30  	"github.com/juju/juju/environs/context"
    31  	envtesting "github.com/juju/juju/environs/testing"
    32  	jujutesting "github.com/juju/juju/juju/testing"
    33  	"github.com/juju/juju/state"
    34  	"github.com/juju/juju/state/watcher"
    35  	coretesting "github.com/juju/juju/testing"
    36  	"github.com/juju/juju/testing/factory"
    37  	"github.com/juju/juju/tools"
    38  	"github.com/juju/juju/upgrades"
    39  	jujuversion "github.com/juju/juju/version"
    40  	"github.com/juju/juju/worker/logsender"
    41  	"github.com/juju/juju/worker/upgrader"
    42  	"github.com/juju/juju/worker/upgradesteps"
    43  )
    44  
    45  const (
    46  	FullAPIExposed       = true
    47  	RestrictedAPIExposed = false
    48  )
    49  
    50  // TODO(katco): 2016-08-09: lp:1611427
    51  var ShortAttempt = &utils.AttemptStrategy{
    52  	Total: time.Second * 10,
    53  	Delay: time.Millisecond * 200,
    54  }
    55  
    56  type upgradeSuite struct {
    57  	agenttest.AgentSuite
    58  	oldVersion version.Binary
    59  }
    60  
    61  func (s *upgradeSuite) SetUpSuite(c *gc.C) {
    62  	s.AgentSuite.SetUpSuite(c)
    63  	// Speed up the watcher frequency to make the test much faster.
    64  	s.PatchValue(&watcher.Period, 200*time.Millisecond)
    65  
    66  	agenttest.InstallFakeEnsureMongo(s)
    67  	s.PatchValue(&agentcmd.ProductionMongoWriteConcern, false)
    68  }
    69  
    70  func (s *upgradeSuite) SetUpTest(c *gc.C) {
    71  	s.AgentSuite.SetUpTest(c)
    72  
    73  	s.oldVersion = version.Binary{
    74  		Number: jujuversion.Current,
    75  		Arch:   arch.HostArch(),
    76  		Series: series.MustHostSeries(),
    77  	}
    78  	s.oldVersion.Major = 1
    79  	s.oldVersion.Minor = 16
    80  
    81  	// Don't wait so long in tests.
    82  	s.PatchValue(&upgradesteps.UpgradeStartTimeoutMaster, time.Duration(time.Millisecond*50))
    83  	s.PatchValue(&upgradesteps.UpgradeStartTimeoutSecondary, time.Duration(time.Millisecond*60))
    84  
    85  	// Ensure we don't fail disk space check.
    86  	s.PatchValue(&upgrades.MinDiskSpaceMib, uint64(0))
    87  
    88  	// Consume apt-get commands that get run before upgrades.
    89  	aptCmds := s.AgentSuite.HookCommandOutput(&pacman.CommandOutput, nil, nil)
    90  	go func() {
    91  		for range aptCmds {
    92  		}
    93  	}()
    94  }
    95  
    96  func (s *upgradeSuite) TestLoginsDuringUpgrade(c *gc.C) {
    97  	coretesting.SkipIfWindowsBug(c, "lp:1446885")
    98  
    99  	// Create machine agent to upgrade
   100  	machine, machine0Conf := s.makeStateAgentVersion(c, s.oldVersion)
   101  
   102  	// Set up a second machine to log in as. API logins are tested
   103  	// manually so there's no need to actually start this machine.
   104  	machine1, password := s.Factory.MakeMachineReturningPassword(c, &factory.MachineParams{
   105  		Nonce: agent.BootstrapNonce,
   106  	})
   107  	machine1Conf, _ := s.PrimeAgent(c, machine1.Tag(), password)
   108  
   109  	// Mock out upgrade logic, using a channel so that the test knows
   110  	// when upgrades have started and can control when upgrades
   111  	// should finish.
   112  	upgradeCh := make(chan bool)
   113  	upgradeChClosed := false
   114  	abort := make(chan bool)
   115  	fakePerformUpgrade := func(version.Number, []upgrades.Target, upgrades.Context) error {
   116  		// Signal that upgrade has started.
   117  		select {
   118  		case upgradeCh <- true:
   119  		case <-abort:
   120  			return nil
   121  		}
   122  
   123  		// Wait for signal that upgrades should finish.
   124  		select {
   125  		case <-upgradeCh:
   126  		case <-abort:
   127  			return nil
   128  		}
   129  		return nil
   130  	}
   131  	s.PatchValue(&upgradesteps.PerformUpgrade, fakePerformUpgrade)
   132  
   133  	a := s.newAgent(c, machine)
   134  	go func() { c.Check(a.Run(nil), jc.ErrorIsNil) }()
   135  	defer func() { c.Check(a.Stop(), jc.ErrorIsNil) }()
   136  
   137  	c.Assert(waitForUpgradeToStart(upgradeCh), jc.IsTrue)
   138  
   139  	// The test will hang if there's a failure in the assertions below
   140  	// and upgradeCh isn't closed.
   141  	defer func() {
   142  		if !upgradeChClosed {
   143  			close(upgradeCh)
   144  		}
   145  	}()
   146  
   147  	// Only user and local logins are allowed during upgrade. Users get a restricted API.
   148  	s.checkLoginToAPIAsUser(c, machine0Conf, RestrictedAPIExposed)
   149  	c.Assert(canLoginToAPIAsMachine(c, machine0Conf, machine0Conf), jc.IsTrue)
   150  	c.Assert(canLoginToAPIAsMachine(c, machine1Conf, machine0Conf), jc.IsFalse)
   151  
   152  	close(upgradeCh) // Allow upgrade to complete
   153  	upgradeChClosed = true
   154  
   155  	waitForUpgradeToFinish(c, machine0Conf)
   156  
   157  	// All logins are allowed after upgrade
   158  	s.checkLoginToAPIAsUser(c, machine0Conf, FullAPIExposed)
   159  	c.Assert(canLoginToAPIAsMachine(c, machine0Conf, machine0Conf), jc.IsTrue)
   160  	c.Assert(canLoginToAPIAsMachine(c, machine1Conf, machine0Conf), jc.IsTrue)
   161  }
   162  
   163  func (s *upgradeSuite) TestDowngradeOnMasterWhenOtherControllerDoesntStartUpgrade(c *gc.C) {
   164  	coretesting.SkipIfWindowsBug(c, "lp:1446885")
   165  
   166  	// This test checks that the master triggers a downgrade if one of
   167  	// the other controller fails to signal it is ready for upgrade.
   168  	//
   169  	// This test is functional, ensuring that the upgrader worker
   170  	// terminates the machine agent with the UpgradeReadyError which
   171  	// makes the downgrade happen.
   172  
   173  	// Provide (fake) tools so that the upgrader has something to downgrade to.
   174  	envtesting.AssertUploadFakeToolsVersions(
   175  		c, s.DefaultToolsStorage, s.Environ.Config().AgentStream(), s.Environ.Config().AgentStream(), s.oldVersion)
   176  
   177  	// Create 3 controllers
   178  	machineA, _ := s.makeStateAgentVersion(c, s.oldVersion)
   179  	// We're not going to start the agents for machines A or B - we
   180  	// need to make sure the API port is still set to the one picked
   181  	// for this machine after we create the other machines.
   182  	apiPort := s.ControllerConfig.APIPort()
   183  
   184  	changes, err := s.State.EnableHA(3, constraints.Value{}, "quantal", nil)
   185  	c.Assert(err, jc.ErrorIsNil)
   186  	c.Assert(len(changes.Added), gc.Equals, 2)
   187  	machineB, _, _ := s.configureMachine(c, changes.Added[0], s.oldVersion)
   188  	s.configureMachine(c, changes.Added[1], s.oldVersion)
   189  
   190  	s.SetControllerConfigAPIPort(c, apiPort)
   191  
   192  	// One of the other controllers is ready for upgrade (but machine C isn't).
   193  	info, err := s.State.EnsureUpgradeInfo(machineB.Id(), s.oldVersion.Number, jujuversion.Current)
   194  	c.Assert(err, jc.ErrorIsNil)
   195  
   196  	// Ensure the agent will think it's the master controller.
   197  	fakeIsMachineMaster := func(*state.StatePool, string) (bool, error) {
   198  		return true, nil
   199  	}
   200  	s.PatchValue(&upgradesteps.IsMachineMaster, fakeIsMachineMaster)
   201  
   202  	// Start the agent
   203  	agent := s.newAgent(c, machineA)
   204  	defer agent.Stop()
   205  	agentDone := make(chan error)
   206  	go func() {
   207  		agentDone <- agent.Run(nil)
   208  	}()
   209  
   210  	select {
   211  	case agentErr := <-agentDone:
   212  		upgradeReadyErr, ok := agentErr.(*upgrader.UpgradeReadyError)
   213  		if !ok {
   214  			c.Fatalf("didn't see UpgradeReadyError, instead got: %v", agentErr)
   215  		}
   216  		// Confirm that the downgrade is back to the previous version.
   217  		current := version.Binary{
   218  			Number: jujuversion.Current,
   219  			Arch:   arch.HostArch(),
   220  			Series: series.MustHostSeries(),
   221  		}
   222  		c.Assert(upgradeReadyErr.OldTools, gc.Equals, current)
   223  		c.Assert(upgradeReadyErr.NewTools, gc.Equals, s.oldVersion)
   224  
   225  	case <-time.After(coretesting.LongWait):
   226  		c.Fatal("machine agent did not exit as expected")
   227  	}
   228  
   229  	// UpgradeInfo doc should now be archived.
   230  	err = info.Refresh()
   231  	c.Assert(err, gc.ErrorMatches, "current upgrade info not found")
   232  }
   233  
   234  // TODO(mjs) - the following should maybe be part of AgentSuite
   235  func (s *upgradeSuite) newAgent(c *gc.C, m *state.Machine) *agentcmd.MachineAgent {
   236  	agentConf := agentcmd.NewAgentConf(s.DataDir())
   237  	agentConf.ReadConfig(m.Tag().String())
   238  	logger := logsender.NewBufferedLogWriter(1024)
   239  	s.AddCleanup(func(*gc.C) { logger.Close() })
   240  	machineAgentFactory := agentcmd.MachineAgentFactoryFn(
   241  		agentConf,
   242  		logger,
   243  		agentcmd.DefaultIntrospectionSocketName,
   244  		noPreUpgradeSteps,
   245  		c.MkDir(),
   246  	)
   247  	a, err := machineAgentFactory(m.Id())
   248  	c.Assert(err, jc.ErrorIsNil)
   249  	return a
   250  }
   251  
   252  func noPreUpgradeSteps(_ *state.StatePool, _ agent.Config, isController, isMaster bool) error {
   253  	return nil
   254  }
   255  
   256  // TODO(mjs) - the following should maybe be part of AgentSuite
   257  func (s *upgradeSuite) makeStateAgentVersion(c *gc.C, vers version.Binary) (*state.Machine, agent.ConfigSetterWriter) {
   258  	machine := s.Factory.MakeMachine(c, &factory.MachineParams{
   259  		Jobs:  []state.MachineJob{state.JobManageModel},
   260  		Nonce: agent.BootstrapNonce,
   261  	})
   262  	_, config, _ := s.configureMachine(c, machine.Id(), vers)
   263  	return machine, config
   264  }
   265  
   266  const initialMachinePassword = "machine-password-1234567890"
   267  
   268  // TODO(mjs) - the following should maybe be part of AgentSuite
   269  func (s *upgradeSuite) configureMachine(c *gc.C, machineId string, vers version.Binary) (
   270  	machine *state.Machine, agentConfig agent.ConfigSetterWriter, tools *tools.Tools,
   271  ) {
   272  	m, err := s.State.Machine(machineId)
   273  	c.Assert(err, jc.ErrorIsNil)
   274  
   275  	// Provision the machine if it isn't already
   276  	if _, err := m.InstanceId(); err != nil {
   277  		inst, md := jujutesting.AssertStartInstance(c, s.Environ, context.NewCloudCallContext(), s.ControllerConfig.ControllerUUID(), machineId)
   278  		c.Assert(m.SetProvisioned(inst.Id(), "", agent.BootstrapNonce, md), jc.ErrorIsNil)
   279  	}
   280  
   281  	// Make the machine live
   282  	pinger, err := m.SetAgentPresence()
   283  	c.Assert(err, jc.ErrorIsNil)
   284  	s.AddCleanup(func(c *gc.C) { pinger.Stop() })
   285  
   286  	// Set up the new machine.
   287  	err = m.SetAgentVersion(vers)
   288  	c.Assert(err, jc.ErrorIsNil)
   289  	err = m.SetPassword(initialMachinePassword)
   290  	c.Assert(err, jc.ErrorIsNil)
   291  	tag := m.Tag()
   292  	if m.IsManager() {
   293  		err = m.SetMongoPassword(initialMachinePassword)
   294  		c.Assert(err, jc.ErrorIsNil)
   295  		agentConfig, tools = s.PrimeStateAgentVersion(c, tag, initialMachinePassword, vers)
   296  		info, ok := agentConfig.StateServingInfo()
   297  		c.Assert(ok, jc.IsTrue)
   298  		ssi := cmdutil.ParamsStateServingInfoToStateStateServingInfo(info)
   299  		err = s.State.SetStateServingInfo(ssi)
   300  		c.Assert(err, jc.ErrorIsNil)
   301  	} else {
   302  		agentConfig, tools = s.PrimeAgentVersion(c, tag, initialMachinePassword, vers)
   303  	}
   304  	err = agentConfig.Write()
   305  	c.Assert(err, jc.ErrorIsNil)
   306  	return m, agentConfig, tools
   307  }
   308  
   309  func canLoginToAPIAsMachine(c *gc.C, fromConf, toConf agent.Config) bool {
   310  	fromInfo, ok := fromConf.APIInfo()
   311  	c.Assert(ok, jc.IsTrue)
   312  	toInfo, ok := toConf.APIInfo()
   313  	c.Assert(ok, jc.IsTrue)
   314  	fromInfo.Addrs = toInfo.Addrs
   315  	var err error
   316  	var apiState api.Connection
   317  	for a := ShortAttempt.Start(); a.Next(); {
   318  		apiState, err = api.Open(fromInfo, upgradeTestDialOpts)
   319  		// If space discovery is still in progress we retry.
   320  		if err != nil && strings.Contains(err.Error(), "spaces are still being discovered") {
   321  			if !a.HasNext() {
   322  				return false
   323  			}
   324  			continue
   325  		}
   326  		if apiState != nil {
   327  			apiState.Close()
   328  		}
   329  		break
   330  	}
   331  	return apiState != nil && err == nil
   332  }
   333  
   334  func (s *upgradeSuite) checkLoginToAPIAsUser(c *gc.C, conf agent.Config, expectFullAPI bool) {
   335  	var err error
   336  	// Multiple attempts may be necessary because there is a small gap
   337  	// between the post-upgrade version being written to the agent's
   338  	// config (as observed by waitForUpgradeToFinish) and the end of
   339  	// "upgrade mode" (i.e. when the agent's UpgradeComplete channel
   340  	// is closed). Without this tests that call checkLoginToAPIAsUser
   341  	// can occasionally fail.
   342  	for a := coretesting.LongAttempt.Start(); a.Next(); {
   343  		err = s.attemptRestrictedAPIAsUser(c, conf)
   344  		switch expectFullAPI {
   345  		case FullAPIExposed:
   346  			if err == nil {
   347  				return
   348  			}
   349  		case RestrictedAPIExposed:
   350  			if params.IsCodeUpgradeInProgress(err) {
   351  				return
   352  			}
   353  		}
   354  	}
   355  	c.Fatalf("timed out waiting for expected API behaviour. last error was: %v", err)
   356  }
   357  
   358  func (s *upgradeSuite) attemptRestrictedAPIAsUser(c *gc.C, conf agent.Config) error {
   359  	info, ok := conf.APIInfo()
   360  	c.Assert(ok, jc.IsTrue)
   361  	info.Tag = s.AdminUserTag(c)
   362  	info.Password = "dummy-secret"
   363  	info.Nonce = ""
   364  
   365  	apiState, err := api.Open(info, upgradeTestDialOpts)
   366  	if err != nil {
   367  		// If space discovery is in progress we'll get an error here
   368  		// and need to retry.
   369  		return err
   370  	}
   371  	defer apiState.Close()
   372  
   373  	// This call should always work, but might fail if the apiserver
   374  	// is restarting. If it fails just return the error so retries
   375  	// can continue.
   376  	err = apiState.APICall("Client", 1, "", "FullStatus", nil, new(params.FullStatus))
   377  	if err != nil {
   378  		return errors.Annotate(err, "FullStatus call")
   379  	}
   380  
   381  	// this call should only work if API is not restricted
   382  	err = apiState.APICall("Client", 1, "", "WatchAll", nil, nil)
   383  	return errors.Annotate(err, "WatchAll call")
   384  }
   385  
   386  var upgradeTestDialOpts = api.DialOpts{
   387  	Timeout:             2 * time.Minute,
   388  	RetryDelay:          250 * time.Millisecond,
   389  	DialAddressInterval: 50 * time.Millisecond,
   390  }
   391  
   392  func waitForUpgradeToStart(upgradeCh chan bool) bool {
   393  	select {
   394  	case <-upgradeCh:
   395  		return true
   396  	case <-time.After(coretesting.LongWait):
   397  		return false
   398  	}
   399  }
   400  
   401  func waitForUpgradeToFinish(c *gc.C, conf agent.Config) {
   402  	success := false
   403  	for attempt := coretesting.LongAttempt.Start(); attempt.Next(); {
   404  		diskConf := readConfigFromDisk(c, conf.DataDir(), conf.Tag())
   405  		success = diskConf.UpgradedToVersion() == jujuversion.Current
   406  		if success {
   407  			break
   408  		}
   409  	}
   410  	c.Assert(success, jc.IsTrue)
   411  }
   412  
   413  func readConfigFromDisk(c *gc.C, dir string, tag names.Tag) agent.Config {
   414  	conf, err := agent.ReadConfig(agent.ConfigPath(dir, tag))
   415  	c.Assert(err, jc.ErrorIsNil)
   416  	return conf
   417  }