github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/worker/provisioner/provisioner_test.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package provisioner_test
     5  
     6  import (
     7  	stdcontext "context"
     8  	"fmt"
     9  	"strings"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/juju/collections/set"
    14  	"github.com/juju/errors"
    15  	"github.com/juju/loggo"
    16  	"github.com/juju/names/v5"
    17  	jc "github.com/juju/testing/checkers"
    18  	"github.com/juju/utils/v3"
    19  	"github.com/juju/version/v2"
    20  	"github.com/juju/worker/v3"
    21  	"github.com/juju/worker/v3/workertest"
    22  	gc "gopkg.in/check.v1"
    23  
    24  	"github.com/juju/juju/agent"
    25  	"github.com/juju/juju/api"
    26  	apiprovisioner "github.com/juju/juju/api/agent/provisioner"
    27  	apiserverprovisioner "github.com/juju/juju/apiserver/facades/agent/provisioner"
    28  	"github.com/juju/juju/controller/authentication"
    29  	"github.com/juju/juju/core/arch"
    30  	"github.com/juju/juju/core/constraints"
    31  	"github.com/juju/juju/core/instance"
    32  	"github.com/juju/juju/core/life"
    33  	"github.com/juju/juju/core/model"
    34  	corenetwork "github.com/juju/juju/core/network"
    35  	coreos "github.com/juju/juju/core/os"
    36  	"github.com/juju/juju/core/os/ostype"
    37  	"github.com/juju/juju/core/status"
    38  	"github.com/juju/juju/environs"
    39  	"github.com/juju/juju/environs/config"
    40  	"github.com/juju/juju/environs/context"
    41  	"github.com/juju/juju/environs/filestorage"
    42  	"github.com/juju/juju/environs/imagemetadata"
    43  	imagetesting "github.com/juju/juju/environs/imagemetadata/testing"
    44  	"github.com/juju/juju/environs/instances"
    45  	"github.com/juju/juju/environs/simplestreams"
    46  	sstesting "github.com/juju/juju/environs/simplestreams/testing"
    47  	envtesting "github.com/juju/juju/environs/testing"
    48  	"github.com/juju/juju/environs/tools"
    49  	"github.com/juju/juju/juju/testing"
    50  	providercommon "github.com/juju/juju/provider/common"
    51  	"github.com/juju/juju/provider/dummy"
    52  	"github.com/juju/juju/rpc/params"
    53  	"github.com/juju/juju/state"
    54  	"github.com/juju/juju/state/cloudimagemetadata"
    55  	"github.com/juju/juju/storage"
    56  	"github.com/juju/juju/storage/poolmanager"
    57  	coretesting "github.com/juju/juju/testing"
    58  	coretools "github.com/juju/juju/tools"
    59  	"github.com/juju/juju/worker/provisioner"
    60  )
    61  
    62  type CommonProvisionerSuite struct {
    63  	testing.JujuConnSuite
    64  	op  <-chan dummy.Operation
    65  	cfg *config.Config
    66  	// defaultConstraints are used when adding a machine and then later in test assertions.
    67  	defaultConstraints constraints.Value
    68  
    69  	st          api.Connection
    70  	provisioner *apiprovisioner.State
    71  	callCtx     context.ProviderCallContext
    72  }
    73  
    74  func (s *CommonProvisionerSuite) assertProvisionerObservesConfigChanges(c *gc.C, p provisioner.Provisioner) {
    75  	// Inject our observer into the provisioner
    76  	cfgObserver := make(chan *config.Config)
    77  	provisioner.SetObserver(p, cfgObserver)
    78  
    79  	// Switch to reaping on All machines.
    80  	attrs := map[string]interface{}{
    81  		config.ProvisionerHarvestModeKey: config.HarvestAll.String(),
    82  	}
    83  	err := s.Model.UpdateModelConfig(attrs, nil)
    84  	c.Assert(err, jc.ErrorIsNil)
    85  
    86  	// Wait for the PA to load the new configuration. We wait for the change we expect
    87  	// like this because sometimes we pick up the initial harvest config (destroyed)
    88  	// rather than the one we change to (all).
    89  	var received []string
    90  	timeout := time.After(coretesting.LongWait)
    91  	for {
    92  		select {
    93  		case newCfg := <-cfgObserver:
    94  			if newCfg.ProvisionerHarvestMode().String() == config.HarvestAll.String() {
    95  				return
    96  			}
    97  			received = append(received, newCfg.ProvisionerHarvestMode().String())
    98  		case <-time.After(coretesting.ShortWait):
    99  		case <-timeout:
   100  			if len(received) == 0 {
   101  				c.Fatalf("PA did not action config change")
   102  			} else {
   103  				c.Fatalf("timed out waiting for config to change to '%s', received %+v",
   104  					config.HarvestAll.String(), received)
   105  			}
   106  		}
   107  	}
   108  }
   109  
   110  func (s *CommonProvisionerSuite) assertProvisionerObservesConfigChangesWorkerCount(c *gc.C, p provisioner.Provisioner, container bool) {
   111  	// Inject our observer into the provisioner
   112  	cfgObserver := make(chan *config.Config)
   113  	provisioner.SetObserver(p, cfgObserver)
   114  
   115  	// Switch to reaping on All machines.
   116  	attrs := map[string]interface{}{}
   117  	if container {
   118  		attrs[config.NumContainerProvisionWorkersKey] = 10
   119  	} else {
   120  		attrs[config.NumProvisionWorkersKey] = 42
   121  	}
   122  	err := s.Model.UpdateModelConfig(attrs, nil)
   123  	c.Assert(err, jc.ErrorIsNil)
   124  
   125  	// Wait for the PA to load the new configuration. We wait for the change we expect
   126  	// like this because sometimes we pick up the initial harvest config (destroyed)
   127  	// rather than the one we change to (all).
   128  	var received []int
   129  	timeout := time.After(coretesting.LongWait)
   130  	for {
   131  		select {
   132  		case newCfg := <-cfgObserver:
   133  			if container {
   134  				if newCfg.NumContainerProvisionWorkers() == 10 {
   135  					return
   136  				}
   137  				received = append(received, newCfg.NumContainerProvisionWorkers())
   138  			} else {
   139  				if newCfg.NumProvisionWorkers() == 42 {
   140  					return
   141  				}
   142  				received = append(received, newCfg.NumProvisionWorkers())
   143  			}
   144  		case <-timeout:
   145  			if len(received) == 0 {
   146  				c.Fatalf("PA did not action config change")
   147  			} else {
   148  				c.Fatalf("timed out waiting for config to change to '%s', received %+v",
   149  					config.HarvestAll.String(), received)
   150  			}
   151  		}
   152  	}
   153  }
   154  
   155  type ProvisionerSuite struct {
   156  	CommonProvisionerSuite
   157  }
   158  
   159  var _ = gc.Suite(&ProvisionerSuite{})
   160  
   161  func (s *CommonProvisionerSuite) SetUpSuite(c *gc.C) {
   162  	s.JujuConnSuite.SetUpSuite(c)
   163  	s.defaultConstraints = constraints.MustParse("arch=amd64 mem=4G cores=1 root-disk=8G")
   164  }
   165  
   166  func (s *CommonProvisionerSuite) SetUpTest(c *gc.C) {
   167  	s.JujuConnSuite.SetUpTest(c)
   168  
   169  	// We do not want to pull published image metadata for tests...
   170  	imagetesting.PatchOfficialDataSources(&s.CleanupSuite, "")
   171  	// We want an image to start test instances
   172  	err := s.State.CloudImageMetadataStorage.SaveMetadata([]cloudimagemetadata.Metadata{{
   173  		MetadataAttributes: cloudimagemetadata.MetadataAttributes{
   174  			Region:          "region",
   175  			Version:         "22.04",
   176  			Arch:            "amd64",
   177  			VirtType:        "",
   178  			RootStorageType: "",
   179  			Source:          "test",
   180  			Stream:          "released",
   181  		},
   182  		Priority: 10,
   183  		ImageId:  "-999",
   184  	}})
   185  	c.Assert(err, jc.ErrorIsNil)
   186  
   187  	// Create the operations channel with more than enough space
   188  	// for those tests that don't listen on it.
   189  	op := make(chan dummy.Operation, 500)
   190  	dummy.Listen(op)
   191  	s.op = op
   192  
   193  	cfg, err := s.Model.ModelConfig()
   194  	c.Assert(err, jc.ErrorIsNil)
   195  	s.cfg = cfg
   196  
   197  	s.callCtx = context.NewEmptyCloudCallContext()
   198  
   199  	// Create a machine for the dummy bootstrap instance,
   200  	// so the provisioner doesn't destroy it.
   201  	insts, err := s.Environ.Instances(s.callCtx, []instance.Id{dummy.BootstrapInstanceId})
   202  	c.Assert(err, jc.ErrorIsNil)
   203  	addrs, err := insts[0].Addresses(s.callCtx)
   204  	c.Assert(err, jc.ErrorIsNil)
   205  
   206  	pAddrs := make(corenetwork.SpaceAddresses, len(addrs))
   207  	for i, addr := range addrs {
   208  		pAddrs[i] = corenetwork.SpaceAddress{MachineAddress: addr.MachineAddress}
   209  	}
   210  
   211  	machine, err := s.State.AddOneMachine(state.MachineTemplate{
   212  		Addresses:  pAddrs,
   213  		Base:       state.UbuntuBase("12.10"),
   214  		Nonce:      agent.BootstrapNonce,
   215  		InstanceId: dummy.BootstrapInstanceId,
   216  		Jobs:       []state.MachineJob{state.JobManageModel},
   217  	})
   218  	c.Assert(err, jc.ErrorIsNil)
   219  	c.Assert(machine.Id(), gc.Equals, "0")
   220  
   221  	current := coretesting.CurrentVersion()
   222  	err = machine.SetAgentVersion(current)
   223  	c.Assert(err, jc.ErrorIsNil)
   224  
   225  	password, err := utils.RandomPassword()
   226  	c.Assert(err, jc.ErrorIsNil)
   227  	err = machine.SetPassword(password)
   228  	c.Assert(err, jc.ErrorIsNil)
   229  
   230  	s.st = s.OpenAPIAsMachine(c, machine.Tag(), password, agent.BootstrapNonce)
   231  	c.Assert(s.st, gc.NotNil)
   232  	c.Logf("API: login as %q successful", machine.Tag())
   233  	s.provisioner = apiprovisioner.NewState(s.st)
   234  	c.Assert(s.provisioner, gc.NotNil)
   235  
   236  }
   237  
   238  func (s *CommonProvisionerSuite) startUnknownInstance(c *gc.C, id string) instances.Instance {
   239  	instance, _ := testing.AssertStartInstance(c, s.Environ, s.callCtx, s.ControllerConfig.ControllerUUID(), id)
   240  	select {
   241  	case o := <-s.op:
   242  		switch o := o.(type) {
   243  		case dummy.OpStartInstance:
   244  		default:
   245  			c.Fatalf("unexpected operation %#v", o)
   246  		}
   247  	case <-time.After(coretesting.LongWait):
   248  		c.Fatalf("timed out waiting for startinstance operation")
   249  	}
   250  	return instance
   251  }
   252  
   253  func (s *CommonProvisionerSuite) checkStartInstance(c *gc.C, m *state.Machine) instances.Instance {
   254  	retVal := s.checkStartInstancesCustom(c, []*state.Machine{m}, "pork", s.defaultConstraints,
   255  		nil, nil, nil, nil, nil, nil, true)
   256  	return retVal[m.Id()]
   257  }
   258  
   259  func (s *CommonProvisionerSuite) checkStartInstanceCustom(
   260  	c *gc.C, m *state.Machine,
   261  	secret string, cons constraints.Value,
   262  	networkInfo corenetwork.InterfaceInfos,
   263  	subnetsToZones map[corenetwork.Id][]string,
   264  	rootDisk *storage.VolumeParams,
   265  	volumes []storage.Volume,
   266  	volumeAttachments []storage.VolumeAttachment,
   267  	checkPossibleTools coretools.List,
   268  	waitInstanceId bool,
   269  ) instances.Instance {
   270  	retVal := s.checkStartInstancesCustom(c, []*state.Machine{m},
   271  		secret, cons, networkInfo, subnetsToZones, rootDisk, volumes,
   272  		volumeAttachments, checkPossibleTools, waitInstanceId)
   273  	return retVal[m.Id()]
   274  }
   275  
   276  func (s *CommonProvisionerSuite) checkStartInstances(c *gc.C, machines []*state.Machine) map[string]instances.Instance {
   277  	return s.checkStartInstancesCustom(c, machines, "pork", s.defaultConstraints, nil, nil,
   278  		nil, nil, nil, nil, true)
   279  }
   280  
   281  // checkStartInstanceCustom takes a slice of Machines.  A
   282  // map of machine Ids to instances is returned
   283  func (s *CommonProvisionerSuite) checkStartInstancesCustom(
   284  	c *gc.C, machines []*state.Machine,
   285  	secret string, cons constraints.Value,
   286  	networkInfo corenetwork.InterfaceInfos,
   287  	subnetsToZones map[corenetwork.Id][]string,
   288  	rootDisk *storage.VolumeParams,
   289  	volumes []storage.Volume,
   290  	volumeAttachments []storage.VolumeAttachment,
   291  	checkPossibleTools coretools.List,
   292  	waitInstanceId bool,
   293  ) (
   294  	returnInstances map[string]instances.Instance,
   295  ) {
   296  	returnInstances = make(map[string]instances.Instance, len(machines))
   297  	found := 0
   298  	for {
   299  		select {
   300  		case o := <-s.op:
   301  			switch o := o.(type) {
   302  			case dummy.OpStartInstance:
   303  				inst := o.Instance
   304  
   305  				var m *state.Machine
   306  				for _, machine := range machines {
   307  					if machine.Id() == o.MachineId {
   308  						m = machine
   309  						found += 1
   310  						break
   311  					}
   312  				}
   313  				c.Assert(m, gc.NotNil)
   314  				if waitInstanceId {
   315  					s.waitInstanceId(c, m, inst.Id())
   316  				}
   317  
   318  				// Check the instance was started with the expected params.
   319  				c.Assert(o.MachineId, gc.Equals, m.Id())
   320  				nonceParts := strings.SplitN(o.MachineNonce, ":", 2)
   321  				c.Assert(nonceParts, gc.HasLen, 2)
   322  				c.Assert(nonceParts[0], gc.Equals, names.NewMachineTag("0").String())
   323  				c.Assert(nonceParts[1], jc.Satisfies, utils.IsValidUUIDString)
   324  				c.Assert(o.Secret, gc.Equals, secret)
   325  				c.Assert(o.SubnetsToZones, jc.DeepEquals, subnetsToZones)
   326  				c.Assert(o.NetworkInfo, jc.DeepEquals, networkInfo)
   327  				c.Assert(o.RootDisk, jc.DeepEquals, rootDisk)
   328  				c.Assert(o.Volumes, jc.DeepEquals, volumes)
   329  				c.Assert(o.VolumeAttachments, jc.DeepEquals, volumeAttachments)
   330  
   331  				var jobs []model.MachineJob
   332  				for _, job := range m.Jobs() {
   333  					jobs = append(jobs, job.ToParams())
   334  				}
   335  				c.Assert(o.Jobs, jc.SameContents, jobs)
   336  
   337  				if checkPossibleTools != nil {
   338  					for _, t := range o.PossibleTools {
   339  						url := fmt.Sprintf("https://%s/model/%s/tools/%s",
   340  							s.st.Addr(), coretesting.ModelTag.Id(), t.Version)
   341  						c.Check(t.URL, gc.Equals, url)
   342  						t.URL = ""
   343  					}
   344  					for _, t := range checkPossibleTools {
   345  						t.URL = ""
   346  					}
   347  					c.Assert(o.PossibleTools, gc.DeepEquals, checkPossibleTools)
   348  				}
   349  
   350  				// All provisioned machines in this test suite have
   351  				// their hardware characteristics attributes set to
   352  				// the same values as the constraints due to the dummy
   353  				// environment being used.
   354  				if !constraints.IsEmpty(&cons) {
   355  					c.Assert(o.Constraints, gc.DeepEquals, cons)
   356  					hc, err := m.HardwareCharacteristics()
   357  					c.Assert(err, jc.ErrorIsNil)
   358  					// At this point we don't care what the AvailabilityZone is,
   359  					// it can be a few different valid things.
   360  					zone := hc.AvailabilityZone
   361  					hc.AvailabilityZone = nil
   362  					c.Assert(*hc, gc.DeepEquals, instance.HardwareCharacteristics{
   363  						Arch:     cons.Arch,
   364  						Mem:      cons.Mem,
   365  						RootDisk: cons.RootDisk,
   366  						CpuCores: cons.CpuCores,
   367  						CpuPower: cons.CpuPower,
   368  						Tags:     cons.Tags,
   369  					})
   370  					hc.AvailabilityZone = zone
   371  				}
   372  				returnInstances[m.Id()] = inst
   373  				if found == len(machines) {
   374  					return
   375  				}
   376  				break
   377  			default:
   378  				c.Logf("ignoring unexpected operation %#v", o)
   379  			}
   380  		case <-time.After(2 * time.Second):
   381  			c.Fatalf("provisioner did not start an instance")
   382  			return
   383  		}
   384  	}
   385  }
   386  
   387  // checkNoOperations checks that the environ was not operated upon.
   388  func (s *CommonProvisionerSuite) checkNoOperations(c *gc.C) {
   389  	select {
   390  	case o := <-s.op:
   391  		c.Fatalf("unexpected operation %+v", o)
   392  	case <-time.After(coretesting.ShortWait):
   393  		return
   394  	}
   395  }
   396  
   397  // checkStopInstances checks that an instance has been stopped.
   398  func (s *CommonProvisionerSuite) checkStopInstances(c *gc.C, instances ...instances.Instance) {
   399  	s.checkStopSomeInstances(c, instances, nil)
   400  }
   401  
   402  // checkStopSomeInstances checks that instancesToStop are stopped while instancesToKeep are not.
   403  func (s *CommonProvisionerSuite) checkStopSomeInstances(c *gc.C,
   404  	instancesToStop []instances.Instance, instancesToKeep []instances.Instance) {
   405  
   406  	instanceIdsToStop := set.NewStrings()
   407  	for _, instance := range instancesToStop {
   408  		instanceIdsToStop.Add(string(instance.Id()))
   409  	}
   410  	instanceIdsToKeep := set.NewStrings()
   411  	for _, instance := range instancesToKeep {
   412  		instanceIdsToKeep.Add(string(instance.Id()))
   413  	}
   414  	// Continue checking for stop instance calls until all the instances we
   415  	// are waiting on to finish, actually finish, or we time out.
   416  	for !instanceIdsToStop.IsEmpty() {
   417  		select {
   418  		case o := <-s.op:
   419  			switch o := o.(type) {
   420  			case dummy.OpStopInstances:
   421  				for _, id := range o.Ids {
   422  					instId := string(id)
   423  					instanceIdsToStop.Remove(instId)
   424  					if instanceIdsToKeep.Contains(instId) {
   425  						c.Errorf("provisioner unexpectedly stopped instance %s", instId)
   426  					}
   427  				}
   428  			default:
   429  				c.Fatalf("unexpected operation %#v", o)
   430  				return
   431  			}
   432  		case <-time.After(2 * time.Second):
   433  			c.Fatalf("provisioner did not stop an instance")
   434  			return
   435  		}
   436  	}
   437  }
   438  
   439  func (s *CommonProvisionerSuite) waitForWatcher(c *gc.C, w state.NotifyWatcher, name string, check func() bool) {
   440  	// TODO(jam): We need to grow a new method on NotifyWatcherC
   441  	// that calls StartSync while waiting for changes, then
   442  	// waitMachine and waitHardwareCharacteristics can use that
   443  	// instead
   444  	defer workertest.CleanKill(c, w)
   445  	timeout := time.After(coretesting.LongWait)
   446  	resync := time.After(0)
   447  	for {
   448  		select {
   449  		case <-w.Changes():
   450  			if check() {
   451  				return
   452  			}
   453  		case <-resync:
   454  			resync = time.After(coretesting.ShortWait)
   455  
   456  		case <-timeout:
   457  			c.Fatalf("%v wait timed out", name)
   458  		}
   459  	}
   460  }
   461  
   462  func (s *CommonProvisionerSuite) waitHardwareCharacteristics(c *gc.C, m *state.Machine, check func() bool) {
   463  	w := m.WatchInstanceData()
   464  	name := fmt.Sprintf("hardware characteristics for machine %v", m)
   465  	s.waitForWatcher(c, w, name, check)
   466  }
   467  
   468  // waitForRemovalMark waits for the supplied machine to be marked for removal.
   469  func (s *CommonProvisionerSuite) waitForRemovalMark(c *gc.C, m *state.Machine) {
   470  	w := s.BackingState.WatchMachineRemovals()
   471  	name := fmt.Sprintf("machine %v marked for removal", m)
   472  	s.waitForWatcher(c, w, name, func() bool {
   473  		removals, err := s.BackingState.AllMachineRemovals()
   474  		c.Assert(err, jc.ErrorIsNil)
   475  		for _, removal := range removals {
   476  			if removal == m.Id() {
   477  				return true
   478  			}
   479  		}
   480  		return false
   481  	})
   482  }
   483  
   484  // waitInstanceId waits until the supplied machine has an instance id, then
   485  // asserts it is as expected.
   486  func (s *CommonProvisionerSuite) waitInstanceId(c *gc.C, m *state.Machine, expect instance.Id) {
   487  	s.waitHardwareCharacteristics(c, m, func() bool {
   488  		if actual, err := m.InstanceId(); err == nil {
   489  			c.Assert(actual, gc.Equals, expect)
   490  			return true
   491  		} else if !errors.IsNotProvisioned(err) {
   492  			// We don't expect any errors.
   493  			panic(err)
   494  		} else {
   495  			c.Logf("got not provisioned error while waiting: %v", err)
   496  		}
   497  		return false
   498  	})
   499  }
   500  
   501  func (s *CommonProvisionerSuite) newEnvironProvisioner(c *gc.C) provisioner.Provisioner {
   502  	machineTag := names.NewMachineTag("0")
   503  	agentConfig := s.AgentConfigForTag(c, machineTag)
   504  	apiState := apiprovisioner.NewState(s.st)
   505  	w, err := provisioner.NewEnvironProvisioner(apiState, agentConfig, loggo.GetLogger("test"), s.Environ, &credentialAPIForTest{})
   506  	c.Assert(err, jc.ErrorIsNil)
   507  	return w
   508  }
   509  
   510  func (s *CommonProvisionerSuite) addMachine() (*state.Machine, error) {
   511  	return s.addMachineWithConstraints(s.defaultConstraints)
   512  }
   513  
   514  func (s *CommonProvisionerSuite) addMachineWithConstraints(cons constraints.Value) (*state.Machine, error) {
   515  	return s.BackingState.AddOneMachine(state.MachineTemplate{
   516  		Base:        state.DefaultLTSBase(),
   517  		Jobs:        []state.MachineJob{state.JobHostUnits},
   518  		Constraints: cons,
   519  	})
   520  }
   521  
   522  func (s *CommonProvisionerSuite) addMachines(number int) ([]*state.Machine, error) {
   523  	templates := make([]state.MachineTemplate, number)
   524  	for i := range templates {
   525  		templates[i] = state.MachineTemplate{
   526  			Base:        state.DefaultLTSBase(),
   527  			Jobs:        []state.MachineJob{state.JobHostUnits},
   528  			Constraints: s.defaultConstraints,
   529  		}
   530  	}
   531  	return s.BackingState.AddMachines(templates...)
   532  }
   533  
   534  func (s *CommonProvisionerSuite) enableHA(c *gc.C, n int) []*state.Machine {
   535  	changes, err := s.BackingState.EnableHA(n, s.defaultConstraints, state.DefaultLTSBase(), nil)
   536  	c.Assert(err, jc.ErrorIsNil)
   537  	added := make([]*state.Machine, len(changes.Added))
   538  	for i, mid := range changes.Added {
   539  		m, err := s.BackingState.Machine(mid)
   540  		c.Assert(err, jc.ErrorIsNil)
   541  		added[i] = m
   542  	}
   543  	return added
   544  }
   545  
   546  func (s *ProvisionerSuite) TestProvisionerStartStop(c *gc.C) {
   547  	p := s.newEnvironProvisioner(c)
   548  	workertest.CleanKill(c, p)
   549  }
   550  
   551  func (s *ProvisionerSuite) TestSimple(c *gc.C) {
   552  	p := s.newEnvironProvisioner(c)
   553  	defer workertest.CleanKill(c, p)
   554  
   555  	// Check that an instance is provisioned when the machine is created...
   556  	m, err := s.addMachine()
   557  	c.Assert(err, jc.ErrorIsNil)
   558  	instance := s.checkStartInstance(c, m)
   559  
   560  	// ...and removed, along with the machine, when the machine is Dead.
   561  	c.Assert(m.EnsureDead(), gc.IsNil)
   562  	s.checkStopInstances(c, instance)
   563  	s.waitForRemovalMark(c, m)
   564  }
   565  
   566  func (s *ProvisionerSuite) TestConstraints(c *gc.C) {
   567  	// Create a machine with non-standard constraints.
   568  	m, err := s.addMachine()
   569  	c.Assert(err, jc.ErrorIsNil)
   570  	cons := constraints.MustParse("mem=8G arch=amd64 cores=2 root-disk=10G")
   571  	err = m.SetConstraints(cons)
   572  	c.Assert(err, jc.ErrorIsNil)
   573  
   574  	// Start a provisioner and check those constraints are used.
   575  	p := s.newEnvironProvisioner(c)
   576  	defer workertest.CleanKill(c, p)
   577  
   578  	s.checkStartInstanceCustom(c, m, "pork", cons, nil, nil, nil, nil, nil, nil, true)
   579  }
   580  
   581  func (s *ProvisionerSuite) TestPossibleTools(c *gc.C) {
   582  
   583  	storageDir := c.MkDir()
   584  	s.PatchValue(&tools.DefaultBaseURL, storageDir)
   585  	stor, err := filestorage.NewFileStorageWriter(storageDir)
   586  	c.Assert(err, jc.ErrorIsNil)
   587  	currentVersion := version.MustParseBinary("1.2.3-ubuntu-amd64")
   588  
   589  	// The current version is determined by the current model's agent
   590  	// version when locating tools to provision an added unit
   591  	attrs := map[string]interface{}{
   592  		config.AgentVersionKey: currentVersion.Number.String(),
   593  	}
   594  	err = s.Model.UpdateModelConfig(attrs, nil)
   595  	c.Assert(err, jc.ErrorIsNil)
   596  
   597  	s.PatchValue(&arch.HostArch, func() string { return currentVersion.Arch })
   598  	s.PatchValue(&coreos.HostOS, func() ostype.OSType { return ostype.Ubuntu })
   599  
   600  	// Upload some plausible matches, and some that should be filtered out.
   601  	compatibleVersion := version.MustParseBinary("1.2.3-quantal-arm64")
   602  	ignoreVersion1 := version.MustParseBinary("1.2.4-ubuntu-arm64")
   603  	ignoreVersion2 := version.MustParseBinary("1.2.3-windows-arm64")
   604  	availableVersions := []version.Binary{
   605  		currentVersion, compatibleVersion, ignoreVersion1, ignoreVersion2,
   606  	}
   607  	envtesting.AssertUploadFakeToolsVersions(c, stor, s.cfg.AgentStream(), s.cfg.AgentStream(), availableVersions...)
   608  
   609  	// Extract the tools that we expect to actually match.
   610  	ss := simplestreams.NewSimpleStreams(sstesting.TestDataSourceFactory())
   611  	expectedList, err := tools.FindTools(ss, s.Environ, -1, -1, []string{s.cfg.AgentStream()}, coretools.Filter{
   612  		Number: currentVersion.Number,
   613  		OSType: "ubuntu",
   614  	})
   615  	c.Assert(err, jc.ErrorIsNil)
   616  
   617  	// Create the machine and check the tools that get passed into StartInstance.
   618  	machine, err := s.BackingState.AddOneMachine(state.MachineTemplate{
   619  		Base: state.UbuntuBase("12.10"),
   620  		Jobs: []state.MachineJob{state.JobHostUnits},
   621  	})
   622  	c.Assert(err, jc.ErrorIsNil)
   623  
   624  	provisioner := s.newEnvironProvisioner(c)
   625  	defer workertest.CleanKill(c, provisioner)
   626  	s.checkStartInstanceCustom(
   627  		c, machine, "pork", constraints.Value{},
   628  		nil, nil, nil, nil, nil, expectedList, true,
   629  	)
   630  }
   631  
   632  var validCloudInitUserData = `
   633  packages:
   634    - 'python-keystoneclient'
   635    - 'python-glanceclient'
   636  preruncmd:
   637    - mkdir /tmp/preruncmd
   638    - mkdir /tmp/preruncmd2
   639  postruncmd:
   640    - mkdir /tmp/postruncmd
   641    - mkdir /tmp/postruncmd2
   642  package_upgrade: false
   643  `[1:]
   644  
   645  func (s *ProvisionerSuite) TestSetUpToStartMachine(c *gc.C) {
   646  	attrs := map[string]interface{}{
   647  		config.CloudInitUserDataKey: validCloudInitUserData,
   648  	}
   649  
   650  	err := s.Model.UpdateModelConfig(attrs, nil)
   651  	c.Assert(err, jc.ErrorIsNil)
   652  
   653  	task := s.newProvisionerTask(
   654  		c,
   655  		config.HarvestAll,
   656  		s.Environ,
   657  		s.provisioner,
   658  		&mockDistributionGroupFinder{},
   659  		mockToolsFinder{},
   660  	)
   661  	defer workertest.CleanKill(c, task)
   662  
   663  	machine, err := s.addMachine()
   664  	c.Assert(err, jc.ErrorIsNil)
   665  
   666  	mRes, err := s.provisioner.Machines(machine.MachineTag())
   667  	c.Assert(err, gc.IsNil)
   668  	c.Assert(mRes, gc.HasLen, 1)
   669  	c.Assert(mRes[0].Err, gc.IsNil)
   670  	apiMachine := mRes[0].Machine
   671  
   672  	pRes, err := s.provisioner.ProvisioningInfo([]names.MachineTag{machine.MachineTag()})
   673  	c.Assert(err, gc.IsNil)
   674  	c.Assert(pRes.Results, gc.HasLen, 1)
   675  
   676  	v, err := apiMachine.ModelAgentVersion()
   677  	c.Assert(err, jc.ErrorIsNil)
   678  
   679  	startInstanceParams, err := provisioner.SetupToStartMachine(task, apiMachine, v, pRes.Results[0])
   680  	c.Assert(err, jc.ErrorIsNil)
   681  	cloudInitUserData := startInstanceParams.InstanceConfig.CloudInitUserData
   682  	c.Assert(cloudInitUserData, gc.DeepEquals, map[string]interface{}{
   683  		"packages":        []interface{}{"python-keystoneclient", "python-glanceclient"},
   684  		"preruncmd":       []interface{}{"mkdir /tmp/preruncmd", "mkdir /tmp/preruncmd2"},
   685  		"postruncmd":      []interface{}{"mkdir /tmp/postruncmd", "mkdir /tmp/postruncmd2"},
   686  		"package_upgrade": false},
   687  	)
   688  }
   689  
   690  func (s *ProvisionerSuite) TestProvisionerSetsErrorStatusWhenNoToolsAreAvailable(c *gc.C) {
   691  	p := s.newEnvironProvisioner(c)
   692  	defer workertest.CleanKill(c, p)
   693  
   694  	// Check that an instance is not provisioned when the machine is created...
   695  	m, err := s.BackingState.AddOneMachine(state.MachineTemplate{
   696  		// We need a valid series that has no tools uploaded
   697  		Base:        state.Base{OS: "centos", Channel: "7"},
   698  		Jobs:        []state.MachineJob{state.JobHostUnits},
   699  		Constraints: s.defaultConstraints,
   700  	})
   701  	c.Assert(err, jc.ErrorIsNil)
   702  	s.checkNoOperations(c)
   703  
   704  	// Ensure machine error status was set, and the error matches
   705  	agentStatus, instanceStatus := s.waitUntilMachineNotPending(c, m)
   706  	c.Check(agentStatus.Status, gc.Equals, status.Error)
   707  	c.Check(agentStatus.Message, gc.Equals, "no matching agent binaries available")
   708  	c.Check(instanceStatus.Status, gc.Equals, status.ProvisioningError)
   709  	c.Check(instanceStatus.Message, gc.Equals, "no matching agent binaries available")
   710  
   711  	// Restart the PA to make sure the machine is skipped again.
   712  	workertest.CleanKill(c, p)
   713  	p = s.newEnvironProvisioner(c)
   714  	defer workertest.CleanKill(c, p)
   715  	s.checkNoOperations(c)
   716  }
   717  
   718  func (s *ProvisionerSuite) waitUntilMachineNotPending(c *gc.C, m *state.Machine) (status.StatusInfo, status.StatusInfo) {
   719  	t0 := time.Now()
   720  	for time.Since(t0) < 10*coretesting.LongWait {
   721  		agentStatusInfo, err := m.Status()
   722  		c.Assert(err, jc.ErrorIsNil)
   723  		if agentStatusInfo.Status == status.Pending {
   724  			time.Sleep(coretesting.ShortWait)
   725  			continue
   726  		}
   727  		instanceStatusInfo, err := m.InstanceStatus()
   728  		c.Assert(err, jc.ErrorIsNil)
   729  		// officially InstanceStatus is only supposed to be Provisioning, but
   730  		// all current Providers have their unknown state as Pending.
   731  		if instanceStatusInfo.Status == status.Provisioning ||
   732  			instanceStatusInfo.Status == status.Pending {
   733  			time.Sleep(coretesting.ShortWait)
   734  			continue
   735  		}
   736  		return agentStatusInfo, instanceStatusInfo
   737  	}
   738  	c.Fatalf("machine %q stayed in pending", m.Id())
   739  	// Satisfy Go, Fatal should be a panic anyway
   740  	return status.StatusInfo{}, status.StatusInfo{}
   741  }
   742  
   743  func (s *ProvisionerSuite) TestProvisionerFailedStartInstanceWithInjectedCreationError(c *gc.C) {
   744  	// Set the retry delay to 0, and retry count to 2 to keep tests short
   745  	s.PatchValue(provisioner.RetryStrategyDelay, 0*time.Second)
   746  	s.PatchValue(provisioner.RetryStrategyCount, 2)
   747  
   748  	// create the error injection channel
   749  	errorInjectionChannel := make(chan error, 3)
   750  
   751  	p := s.newEnvironProvisioner(c)
   752  	defer workertest.CleanKill(c, p)
   753  
   754  	// patch the dummy provider error injection channel
   755  	cleanup := dummy.PatchTransientErrorInjectionChannel(errorInjectionChannel)
   756  	defer cleanup()
   757  
   758  	retryableError := environs.ZoneIndependentError(
   759  		errors.New("container failed to start and was destroyed"),
   760  	)
   761  	destroyError := environs.ZoneIndependentError(
   762  		errors.New("container failed to start and failed to destroy: manual cleanup of containers needed"),
   763  	)
   764  	// send the error message three times, because the provisioner will retry twice as patched above.
   765  	errorInjectionChannel <- retryableError
   766  	errorInjectionChannel <- retryableError
   767  	errorInjectionChannel <- destroyError
   768  
   769  	m, err := s.addMachine()
   770  	c.Assert(err, jc.ErrorIsNil)
   771  	s.checkNoOperations(c)
   772  
   773  	agentStatus, instanceStatus := s.waitUntilMachineNotPending(c, m)
   774  	// check that the status matches the error message
   775  	c.Check(agentStatus.Status, gc.Equals, status.Error)
   776  	c.Check(agentStatus.Message, gc.Equals, destroyError.Error())
   777  	c.Check(instanceStatus.Status, gc.Equals, status.ProvisioningError)
   778  	c.Check(instanceStatus.Message, gc.Equals, destroyError.Error())
   779  }
   780  
   781  func (s *ProvisionerSuite) TestProvisionerSucceedStartInstanceWithInjectedRetryableCreationError(c *gc.C) {
   782  	// Set the retry delay to 0, and retry count to 2 to keep tests short
   783  	s.PatchValue(provisioner.RetryStrategyDelay, 0*time.Second)
   784  	s.PatchValue(provisioner.RetryStrategyCount, 2)
   785  
   786  	// create the error injection channel
   787  	errorInjectionChannel := make(chan error, 1)
   788  	c.Assert(errorInjectionChannel, gc.NotNil)
   789  
   790  	p := s.newEnvironProvisioner(c)
   791  	defer workertest.CleanKill(c, p)
   792  
   793  	// patch the dummy provider error injection channel
   794  	cleanup := dummy.PatchTransientErrorInjectionChannel(errorInjectionChannel)
   795  	defer cleanup()
   796  
   797  	// send the error message once
   798  	// - instance creation should succeed
   799  	retryableError := errors.New("container failed to start and was destroyed")
   800  	errorInjectionChannel <- retryableError
   801  
   802  	m, err := s.addMachine()
   803  	c.Assert(err, jc.ErrorIsNil)
   804  	s.checkStartInstance(c, m)
   805  }
   806  
   807  func (s *ProvisionerSuite) TestProvisionerStopRetryingIfDying(c *gc.C) {
   808  	// Create the error injection channel and inject
   809  	// a retryable error
   810  	errorInjectionChannel := make(chan error, 1)
   811  
   812  	p := s.newEnvironProvisioner(c)
   813  	// Don't refer the stop.  We will manually stop and verify the result.
   814  
   815  	// patch the dummy provider error injection channel
   816  	cleanup := dummy.PatchTransientErrorInjectionChannel(errorInjectionChannel)
   817  	defer cleanup()
   818  
   819  	retryableError := errors.New("container failed to start and was destroyed")
   820  	errorInjectionChannel <- retryableError
   821  
   822  	m, err := s.addMachine()
   823  	c.Assert(err, jc.ErrorIsNil)
   824  
   825  	time.Sleep(coretesting.ShortWait)
   826  
   827  	workertest.CleanKill(c, p)
   828  	statusInfo, err := m.Status()
   829  	c.Assert(err, jc.ErrorIsNil)
   830  	c.Check(statusInfo.Status, gc.Equals, status.Pending)
   831  	statusInfo, err = m.InstanceStatus()
   832  	c.Assert(err, jc.ErrorIsNil)
   833  	if statusInfo.Status != status.Pending && statusInfo.Status != status.Provisioning {
   834  		c.Errorf("statusInfo.Status was %q not one of %q or %q",
   835  			statusInfo.Status, status.Pending, status.Provisioning)
   836  	}
   837  	s.checkNoOperations(c)
   838  }
   839  
   840  func (s *ProvisionerSuite) TestProvisioningDoesNotOccurForLXD(c *gc.C) {
   841  	p := s.newEnvironProvisioner(c)
   842  	defer workertest.CleanKill(c, p)
   843  
   844  	// create a machine to host the container.
   845  	m, err := s.addMachine()
   846  	c.Assert(err, jc.ErrorIsNil)
   847  	inst := s.checkStartInstance(c, m)
   848  
   849  	// make a container on the machine we just created
   850  	template := state.MachineTemplate{
   851  		Base: state.DefaultLTSBase(),
   852  		Jobs: []state.MachineJob{state.JobHostUnits},
   853  	}
   854  	container, err := s.State.AddMachineInsideMachine(template, m.Id(), instance.LXD)
   855  	c.Assert(err, jc.ErrorIsNil)
   856  
   857  	// the PA should not attempt to create it
   858  	s.checkNoOperations(c)
   859  
   860  	// cleanup
   861  	c.Assert(container.EnsureDead(), gc.IsNil)
   862  	c.Assert(container.Remove(), gc.IsNil)
   863  	c.Assert(m.EnsureDead(), gc.IsNil)
   864  	s.checkStopInstances(c, inst)
   865  	s.waitForRemovalMark(c, m)
   866  }
   867  
   868  func (s *ProvisionerSuite) TestProvisioningDoesNotOccurForKVM(c *gc.C) {
   869  	p := s.newEnvironProvisioner(c)
   870  	defer workertest.CleanKill(c, p)
   871  
   872  	// create a machine to host the container.
   873  	m, err := s.addMachine()
   874  	c.Assert(err, jc.ErrorIsNil)
   875  	inst := s.checkStartInstance(c, m)
   876  
   877  	// make a container on the machine we just created
   878  	template := state.MachineTemplate{
   879  		Base: state.DefaultLTSBase(),
   880  		Jobs: []state.MachineJob{state.JobHostUnits},
   881  	}
   882  	container, err := s.State.AddMachineInsideMachine(template, m.Id(), instance.KVM)
   883  	c.Assert(err, jc.ErrorIsNil)
   884  
   885  	// the PA should not attempt to create it
   886  	s.checkNoOperations(c)
   887  
   888  	// cleanup
   889  	c.Assert(container.EnsureDead(), gc.IsNil)
   890  	c.Assert(container.Remove(), gc.IsNil)
   891  	c.Assert(m.EnsureDead(), gc.IsNil)
   892  	s.checkStopInstances(c, inst)
   893  	s.waitForRemovalMark(c, m)
   894  }
   895  
   896  type MachineClassifySuite struct {
   897  }
   898  
   899  var _ = gc.Suite(&MachineClassifySuite{})
   900  
   901  type MockMachine struct {
   902  	life          life.Value
   903  	status        status.Status
   904  	id            string
   905  	idErr         error
   906  	ensureDeadErr error
   907  	statusErr     error
   908  }
   909  
   910  func (m *MockMachine) Life() life.Value {
   911  	return m.life
   912  }
   913  
   914  func (m *MockMachine) InstanceId() (instance.Id, error) {
   915  	return instance.Id(m.id), m.idErr
   916  }
   917  
   918  func (m *MockMachine) InstanceNames() (instance.Id, string, error) {
   919  	instId, err := m.InstanceId()
   920  	return instId, "", err
   921  }
   922  
   923  func (m *MockMachine) EnsureDead() error {
   924  	return m.ensureDeadErr
   925  }
   926  
   927  func (m *MockMachine) Status() (status.Status, string, error) {
   928  	return m.status, "", m.statusErr
   929  }
   930  
   931  func (m *MockMachine) InstanceStatus() (status.Status, string, error) {
   932  	return m.status, "", m.statusErr
   933  }
   934  
   935  func (m *MockMachine) Id() string {
   936  	return m.id
   937  }
   938  
   939  type machineClassificationTest struct {
   940  	description    string
   941  	life           life.Value
   942  	status         status.Status
   943  	idErr          string
   944  	ensureDeadErr  string
   945  	expectErrCode  string
   946  	expectErrFmt   string
   947  	statusErr      string
   948  	classification provisioner.MachineClassification
   949  }
   950  
   951  var machineClassificationTestsNoMaintenance = machineClassificationTest{
   952  	description:    "Machine doesn't need maintaining",
   953  	life:           life.Alive,
   954  	status:         status.Started,
   955  	classification: provisioner.None,
   956  }
   957  
   958  func (s *MachineClassifySuite) TestMachineClassification(c *gc.C) {
   959  	test := func(t machineClassificationTest, id string) {
   960  		// Run a sub-test from the test table
   961  		s2e := func(s string) error {
   962  			// Little helper to turn a non-empty string into a useful error for "ErrorMatches"
   963  			if s != "" {
   964  				return &params.Error{Code: s}
   965  			}
   966  			return nil
   967  		}
   968  
   969  		c.Logf("%s: %s", id, t.description)
   970  		machine := MockMachine{t.life, t.status, id, s2e(t.idErr), s2e(t.ensureDeadErr), s2e(t.statusErr)}
   971  		classification, err := provisioner.ClassifyMachine(loggo.GetLogger("test"), &machine)
   972  		if err != nil {
   973  			c.Assert(err, gc.ErrorMatches, fmt.Sprintf(t.expectErrFmt, machine.Id()))
   974  		} else {
   975  			c.Assert(err, gc.Equals, s2e(t.expectErrCode))
   976  		}
   977  		c.Assert(classification, gc.Equals, t.classification)
   978  	}
   979  
   980  	test(machineClassificationTestsNoMaintenance, "0")
   981  }
   982  
   983  func (s *ProvisionerSuite) TestProvisioningMachinesWithSpacesSuccess(c *gc.C) {
   984  	p := s.newEnvironProvisioner(c)
   985  	defer workertest.CleanKill(c, p)
   986  
   987  	// Add the spaces used in constraints.
   988  	space1, err := s.State.AddSpace("space1", "", nil, false)
   989  	c.Assert(err, jc.ErrorIsNil)
   990  	space2, err := s.State.AddSpace("space2", "", nil, false)
   991  	c.Assert(err, jc.ErrorIsNil)
   992  
   993  	// Add 1 subnet into space1, and 2 into space2.
   994  	// Each subnet is in a matching zone (e.g "subnet-#" in "zone#").
   995  	testing.AddSubnetsWithTemplate(c, s.State, 3, corenetwork.SubnetInfo{
   996  		CIDR:              "10.10.{{.}}.0/24",
   997  		ProviderId:        "subnet-{{.}}",
   998  		AvailabilityZones: []string{"zone{{.}}"},
   999  		SpaceID:           fmt.Sprintf("{{if (lt . 2)}}%s{{else}}%s{{end}}", space1.Id(), space2.Id()),
  1000  		VLANTag:           42,
  1001  	})
  1002  
  1003  	// Add and provision a machine with spaces specified.
  1004  	cons := constraints.MustParse(
  1005  		s.defaultConstraints.String(), "spaces=space2,^space1",
  1006  	)
  1007  	// The dummy provider simulates 2 subnets per included space.
  1008  	expectedSubnetsToZones := map[corenetwork.Id][]string{
  1009  		"subnet-0": {"zone0"},
  1010  		"subnet-1": {"zone1"},
  1011  	}
  1012  	m, err := s.addMachineWithConstraints(cons)
  1013  	c.Assert(err, jc.ErrorIsNil)
  1014  	inst := s.checkStartInstanceCustom(
  1015  		c, m, "pork", cons,
  1016  		nil,
  1017  		expectedSubnetsToZones,
  1018  		nil, nil, nil, nil, true,
  1019  	)
  1020  
  1021  	// Cleanup.
  1022  	c.Assert(m.EnsureDead(), gc.IsNil)
  1023  	s.checkStopInstances(c, inst)
  1024  	s.waitForRemovalMark(c, m)
  1025  }
  1026  
  1027  func (s *ProvisionerSuite) testProvisioningFailsAndSetsErrorStatusForConstraints(
  1028  	c *gc.C,
  1029  	cons constraints.Value,
  1030  	expectedErrorStatus string,
  1031  ) {
  1032  	machine, err := s.addMachineWithConstraints(cons)
  1033  	c.Assert(err, jc.ErrorIsNil)
  1034  
  1035  	// Start the PA.
  1036  	p := s.newEnvironProvisioner(c)
  1037  	defer workertest.CleanKill(c, p)
  1038  
  1039  	// Expect StartInstance to fail.
  1040  	s.checkNoOperations(c)
  1041  
  1042  	// Ensure machine error status was set, and the error matches
  1043  	agentStatus, instanceStatus := s.waitUntilMachineNotPending(c, machine)
  1044  	c.Check(agentStatus.Status, gc.Equals, status.Error)
  1045  	c.Check(agentStatus.Message, gc.Equals, expectedErrorStatus)
  1046  	c.Check(instanceStatus.Status, gc.Equals, status.ProvisioningError)
  1047  	c.Check(instanceStatus.Message, gc.Equals, expectedErrorStatus)
  1048  
  1049  	// Make sure the task didn't stop with an error
  1050  	died := make(chan error)
  1051  	go func() {
  1052  		died <- p.Wait()
  1053  	}()
  1054  	select {
  1055  	case <-time.After(coretesting.ShortWait):
  1056  	case err := <-died:
  1057  		c.Fatalf("provisioner task died unexpectedly with err: %v", err)
  1058  	}
  1059  
  1060  	// Restart the PA to make sure the machine is not retried.
  1061  	workertest.CleanKill(c, p)
  1062  	p = s.newEnvironProvisioner(c)
  1063  	defer workertest.CleanKill(c, p)
  1064  
  1065  	s.checkNoOperations(c)
  1066  }
  1067  
  1068  func (s *ProvisionerSuite) TestProvisioningMachinesFailsWithUnknownSpaces(c *gc.C) {
  1069  	cons := constraints.MustParse(
  1070  		s.defaultConstraints.String(), "spaces=missing,missing-too,^ignored-too",
  1071  	)
  1072  	expectedErrorStatus := `matching subnets to zones: space "missing" not found`
  1073  	s.testProvisioningFailsAndSetsErrorStatusForConstraints(c, cons, expectedErrorStatus)
  1074  }
  1075  
  1076  func (s *ProvisionerSuite) TestProvisioningMachinesFailsWithEmptySpaces(c *gc.C) {
  1077  	_, err := s.State.AddSpace("empty", "", nil, false)
  1078  	c.Assert(err, jc.ErrorIsNil)
  1079  	cons := constraints.MustParse(
  1080  		s.defaultConstraints.String(), "spaces=empty",
  1081  	)
  1082  	expectedErrorStatus := `matching subnets to zones: ` +
  1083  		`cannot use space "empty" as deployment target: no subnets`
  1084  	s.testProvisioningFailsAndSetsErrorStatusForConstraints(c, cons, expectedErrorStatus)
  1085  }
  1086  
  1087  func (s *CommonProvisionerSuite) addMachineWithRequestedVolumes(volumes []state.HostVolumeParams, cons constraints.Value) (*state.Machine, error) {
  1088  	return s.BackingState.AddOneMachine(state.MachineTemplate{
  1089  		Base:        state.DefaultLTSBase(),
  1090  		Jobs:        []state.MachineJob{state.JobHostUnits},
  1091  		Constraints: cons,
  1092  		Volumes:     volumes,
  1093  	})
  1094  }
  1095  
  1096  func (s *ProvisionerSuite) TestProvisioningMachinesWithRequestedRootDisk(c *gc.C) {
  1097  	// Set up a persistent pool.
  1098  	poolManager := poolmanager.New(state.NewStateSettings(s.State), s.Environ)
  1099  	_, err := poolManager.Create("persistent-pool", "static", map[string]interface{}{"persistent": true})
  1100  	c.Assert(err, jc.ErrorIsNil)
  1101  
  1102  	p := s.newEnvironProvisioner(c)
  1103  	defer workertest.CleanKill(c, p)
  1104  
  1105  	cons := constraints.MustParse("root-disk-source=persistent-pool " + s.defaultConstraints.String())
  1106  	m, err := s.BackingState.AddOneMachine(state.MachineTemplate{
  1107  		Base:        state.DefaultLTSBase(),
  1108  		Jobs:        []state.MachineJob{state.JobHostUnits},
  1109  		Constraints: cons,
  1110  	})
  1111  	c.Assert(err, jc.ErrorIsNil)
  1112  
  1113  	inst := s.checkStartInstanceCustom(
  1114  		c, m, "pork", cons,
  1115  		nil, nil,
  1116  		&storage.VolumeParams{
  1117  			Provider:   "static",
  1118  			Attributes: map[string]interface{}{"persistent": true},
  1119  		},
  1120  		nil,
  1121  		nil,
  1122  		nil, true,
  1123  	)
  1124  
  1125  	// Cleanup.
  1126  	c.Assert(m.EnsureDead(), gc.IsNil)
  1127  	s.checkStopInstances(c, inst)
  1128  	s.waitForRemovalMark(c, m)
  1129  }
  1130  
  1131  func (s *ProvisionerSuite) TestProvisioningMachinesWithRequestedVolumes(c *gc.C) {
  1132  	// Set up a persistent pool.
  1133  	poolManager := poolmanager.New(state.NewStateSettings(s.State), s.Environ)
  1134  	_, err := poolManager.Create("persistent-pool", "static", map[string]interface{}{"persistent": true})
  1135  	c.Assert(err, jc.ErrorIsNil)
  1136  
  1137  	p := s.newEnvironProvisioner(c)
  1138  	defer workertest.CleanKill(c, p)
  1139  
  1140  	// Add a machine with volumes to state.
  1141  	requestedVolumes := []state.HostVolumeParams{{
  1142  		Volume:     state.VolumeParams{Pool: "static", Size: 1024},
  1143  		Attachment: state.VolumeAttachmentParams{},
  1144  	}, {
  1145  		Volume:     state.VolumeParams{Pool: "persistent-pool", Size: 2048},
  1146  		Attachment: state.VolumeAttachmentParams{},
  1147  	}, {
  1148  		Volume:     state.VolumeParams{Pool: "persistent-pool", Size: 4096},
  1149  		Attachment: state.VolumeAttachmentParams{},
  1150  	}}
  1151  	m, err := s.addMachineWithRequestedVolumes(requestedVolumes, s.defaultConstraints)
  1152  	c.Assert(err, jc.ErrorIsNil)
  1153  
  1154  	// Provision volume-2, so that it is attached rather than created.
  1155  	sb, err := state.NewStorageBackend(s.State)
  1156  	c.Assert(err, jc.ErrorIsNil)
  1157  	err = sb.SetVolumeInfo(names.NewVolumeTag("2"), state.VolumeInfo{
  1158  		Pool:     "persistent-pool",
  1159  		VolumeId: "vol-ume",
  1160  		Size:     4096,
  1161  	})
  1162  	c.Assert(err, jc.ErrorIsNil)
  1163  
  1164  	// Provision the machine, checking the volume and volume attachment arguments.
  1165  	expectedVolumes := []storage.Volume{{
  1166  		names.NewVolumeTag("0"),
  1167  		storage.VolumeInfo{
  1168  			Size: 1024,
  1169  		},
  1170  	}, {
  1171  		names.NewVolumeTag("1"),
  1172  		storage.VolumeInfo{
  1173  			Size:       2048,
  1174  			Persistent: true,
  1175  		},
  1176  	}}
  1177  	expectedVolumeAttachments := []storage.VolumeAttachment{{
  1178  		Volume:  names.NewVolumeTag("2"),
  1179  		Machine: m.MachineTag(),
  1180  		VolumeAttachmentInfo: storage.VolumeAttachmentInfo{
  1181  			DeviceName: "sdb",
  1182  		},
  1183  	}}
  1184  	inst := s.checkStartInstanceCustom(
  1185  		c, m, "pork", s.defaultConstraints,
  1186  		nil, nil, nil,
  1187  		expectedVolumes,
  1188  		expectedVolumeAttachments,
  1189  		nil, true,
  1190  	)
  1191  
  1192  	// Cleanup.
  1193  	c.Assert(m.EnsureDead(), gc.IsNil)
  1194  	s.checkStopInstances(c, inst)
  1195  	s.waitForRemovalMark(c, m)
  1196  }
  1197  
  1198  func (s *ProvisionerSuite) TestProvisioningDoesNotProvisionTheSameMachineAfterRestart(c *gc.C) {
  1199  	p := s.newEnvironProvisioner(c)
  1200  	defer workertest.CleanKill(c, p)
  1201  
  1202  	// create a machine
  1203  	m, err := s.addMachine()
  1204  	c.Assert(err, jc.ErrorIsNil)
  1205  	s.checkStartInstance(c, m)
  1206  
  1207  	// restart the PA
  1208  	workertest.CleanKill(c, p)
  1209  	p = s.newEnvironProvisioner(c)
  1210  	defer workertest.CleanKill(c, p)
  1211  
  1212  	// check that there is only one machine provisioned.
  1213  	machines, err := s.State.AllMachines()
  1214  	c.Assert(err, jc.ErrorIsNil)
  1215  	c.Check(len(machines), gc.Equals, 2)
  1216  	c.Check(machines[0].Id(), gc.Equals, "0")
  1217  	c.Check(machines[1].CheckProvisioned("fake_nonce"), jc.IsFalse)
  1218  
  1219  	// the PA should not create it a second time
  1220  	s.checkNoOperations(c)
  1221  }
  1222  
  1223  func (s *ProvisionerSuite) TestDyingMachines(c *gc.C) {
  1224  	p := s.newEnvironProvisioner(c)
  1225  	defer workertest.CleanKill(c, p)
  1226  
  1227  	// provision a machine
  1228  	m0, err := s.addMachine()
  1229  	c.Assert(err, jc.ErrorIsNil)
  1230  	s.checkStartInstance(c, m0)
  1231  
  1232  	// stop the provisioner and make the machine dying
  1233  	workertest.CleanKill(c, p)
  1234  	err = m0.Destroy()
  1235  	c.Assert(err, jc.ErrorIsNil)
  1236  
  1237  	// add a new, dying, unprovisioned machine
  1238  	m1, err := s.addMachine()
  1239  	c.Assert(err, jc.ErrorIsNil)
  1240  	err = m1.Destroy()
  1241  	c.Assert(err, jc.ErrorIsNil)
  1242  
  1243  	// start the provisioner and wait for it to reap the useless machine
  1244  	p = s.newEnvironProvisioner(c)
  1245  	defer workertest.CleanKill(c, p)
  1246  	s.checkNoOperations(c)
  1247  	s.waitForRemovalMark(c, m1)
  1248  
  1249  	// verify the other one's still fine
  1250  	err = m0.Refresh()
  1251  	c.Assert(err, jc.ErrorIsNil)
  1252  	c.Assert(m0.Life(), gc.Equals, state.Dying)
  1253  }
  1254  
  1255  type mockTaskAPI struct {
  1256  	provisioner.TaskAPI
  1257  }
  1258  
  1259  func (mock *mockTaskAPI) Machines(tags ...names.MachineTag) ([]apiprovisioner.MachineResult, error) {
  1260  	return nil, fmt.Errorf("error")
  1261  }
  1262  
  1263  func (*mockTaskAPI) MachinesWithTransientErrors() ([]apiprovisioner.MachineStatusResult, error) {
  1264  	return nil, fmt.Errorf("error")
  1265  }
  1266  
  1267  type mockDistributionGroupFinder struct {
  1268  	groups map[names.MachineTag][]string
  1269  }
  1270  
  1271  func (mock *mockDistributionGroupFinder) DistributionGroupByMachineId(
  1272  	tags ...names.MachineTag,
  1273  ) ([]apiprovisioner.DistributionGroupResult, error) {
  1274  	result := make([]apiprovisioner.DistributionGroupResult, len(tags))
  1275  	if len(mock.groups) == 0 {
  1276  		for i := range tags {
  1277  			result[i] = apiprovisioner.DistributionGroupResult{MachineIds: []string{}}
  1278  		}
  1279  	} else {
  1280  		for i, tag := range tags {
  1281  			if dg, ok := mock.groups[tag]; ok {
  1282  				result[i] = apiprovisioner.DistributionGroupResult{MachineIds: dg}
  1283  			} else {
  1284  				result[i] = apiprovisioner.DistributionGroupResult{
  1285  					MachineIds: []string{}, Err: &params.Error{Code: params.CodeNotFound, Message: "Fail"}}
  1286  			}
  1287  		}
  1288  	}
  1289  	return result, nil
  1290  }
  1291  
  1292  func (s *ProvisionerSuite) TestMachineErrorsRetainInstances(c *gc.C) {
  1293  	task := s.newProvisionerTask(
  1294  		c,
  1295  		config.HarvestAll,
  1296  		s.Environ,
  1297  		s.provisioner,
  1298  		&mockDistributionGroupFinder{},
  1299  		mockToolsFinder{},
  1300  	)
  1301  	defer workertest.CleanKill(c, task)
  1302  
  1303  	// create a machine
  1304  	m0, err := s.addMachine()
  1305  	c.Assert(err, jc.ErrorIsNil)
  1306  	s.checkStartInstance(c, m0)
  1307  
  1308  	// create an instance out of band
  1309  	s.startUnknownInstance(c, "999")
  1310  
  1311  	// start the provisioner and ensure it doesn't kill any
  1312  	// instances if there are errors getting machines.
  1313  	task = s.newProvisionerTask(
  1314  		c,
  1315  		config.HarvestAll,
  1316  		s.Environ,
  1317  		&mockTaskAPI{},
  1318  		&mockDistributionGroupFinder{},
  1319  		&mockToolsFinder{},
  1320  	)
  1321  	defer func() {
  1322  		err := worker.Stop(task)
  1323  		c.Assert(err, gc.ErrorMatches, ".*getting machine.*")
  1324  	}()
  1325  	s.checkNoOperations(c)
  1326  }
  1327  
  1328  func (s *ProvisionerSuite) TestEnvironProvisionerObservesConfigChanges(c *gc.C) {
  1329  	p := s.newEnvironProvisioner(c)
  1330  	defer workertest.CleanKill(c, p)
  1331  	s.assertProvisionerObservesConfigChanges(c, p)
  1332  }
  1333  
  1334  func (s *ProvisionerSuite) TestEnvironProvisionerObservesConfigChangesWorkerCount(c *gc.C) {
  1335  	p := s.newEnvironProvisioner(c)
  1336  	defer workertest.CleanKill(c, p)
  1337  	s.assertProvisionerObservesConfigChangesWorkerCount(c, p, false)
  1338  }
  1339  
  1340  func (s *ProvisionerSuite) newProvisionerTask(
  1341  	c *gc.C,
  1342  	harvestingMethod config.HarvestMode,
  1343  	broker environs.InstanceBroker,
  1344  	taskAPI provisioner.TaskAPI,
  1345  	distributionGroupFinder provisioner.DistributionGroupFinder,
  1346  	toolsFinder provisioner.ToolsFinder,
  1347  ) provisioner.ProvisionerTask {
  1348  
  1349  	retryStrategy := provisioner.NewRetryStrategy(0*time.Second, 0)
  1350  
  1351  	return s.newProvisionerTaskWithRetryStrategy(c, harvestingMethod, broker,
  1352  		taskAPI, distributionGroupFinder, toolsFinder, retryStrategy)
  1353  }
  1354  
  1355  func (s *ProvisionerSuite) newProvisionerTaskWithRetryStrategy(
  1356  	c *gc.C,
  1357  	harvestingMethod config.HarvestMode,
  1358  	broker environs.InstanceBroker,
  1359  	taskAPI provisioner.TaskAPI,
  1360  	distributionGroupFinder provisioner.DistributionGroupFinder,
  1361  	toolsFinder provisioner.ToolsFinder,
  1362  	retryStrategy provisioner.RetryStrategy,
  1363  ) provisioner.ProvisionerTask {
  1364  
  1365  	machineWatcher, err := s.provisioner.WatchModelMachines()
  1366  	c.Assert(err, jc.ErrorIsNil)
  1367  	retryWatcher, err := s.provisioner.WatchMachineErrorRetry()
  1368  	c.Assert(err, jc.ErrorIsNil)
  1369  	auth, err := authentication.NewAPIAuthenticator(s.provisioner)
  1370  	c.Assert(err, jc.ErrorIsNil)
  1371  
  1372  	w, err := provisioner.NewProvisionerTask(provisioner.TaskConfig{
  1373  		ControllerUUID:             s.ControllerConfig.ControllerUUID(),
  1374  		HostTag:                    names.NewMachineTag("0"),
  1375  		Logger:                     loggo.GetLogger("test"),
  1376  		HarvestMode:                harvestingMethod,
  1377  		TaskAPI:                    taskAPI,
  1378  		DistributionGroupFinder:    distributionGroupFinder,
  1379  		ToolsFinder:                toolsFinder,
  1380  		MachineWatcher:             machineWatcher,
  1381  		RetryWatcher:               retryWatcher,
  1382  		Broker:                     broker,
  1383  		Auth:                       auth,
  1384  		ImageStream:                imagemetadata.ReleasedStream,
  1385  		RetryStartInstanceStrategy: retryStrategy,
  1386  		CloudCallContextFunc:       func(_ stdcontext.Context) context.ProviderCallContext { return s.callCtx },
  1387  		NumProvisionWorkers:        numProvisionWorkersForTesting,
  1388  	})
  1389  	c.Assert(err, jc.ErrorIsNil)
  1390  	return w
  1391  }
  1392  
  1393  func (s *ProvisionerSuite) TestHarvestNoneReapsNothing(c *gc.C) {
  1394  
  1395  	task := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{})
  1396  	defer workertest.CleanKill(c, task)
  1397  	task.SetHarvestMode(config.HarvestNone)
  1398  
  1399  	// Create a machine and an unknown instance.
  1400  	m0, err := s.addMachine()
  1401  	c.Assert(err, jc.ErrorIsNil)
  1402  	s.checkStartInstance(c, m0)
  1403  	s.startUnknownInstance(c, "999")
  1404  
  1405  	// Mark the first machine as dead.
  1406  	c.Assert(m0.EnsureDead(), gc.IsNil)
  1407  
  1408  	// Ensure we're doing nothing.
  1409  	s.checkNoOperations(c)
  1410  }
  1411  
  1412  func (s *ProvisionerSuite) TestHarvestUnknownReapsOnlyUnknown(c *gc.C) {
  1413  	task := s.newProvisionerTask(c,
  1414  		config.HarvestDestroyed,
  1415  		s.Environ,
  1416  		s.provisioner,
  1417  		&mockDistributionGroupFinder{},
  1418  		mockToolsFinder{},
  1419  	)
  1420  	defer workertest.CleanKill(c, task)
  1421  	task.SetHarvestMode(config.HarvestUnknown)
  1422  
  1423  	// Create a machine and an unknown instance.
  1424  	m0, err := s.addMachine()
  1425  	c.Assert(err, jc.ErrorIsNil)
  1426  	i0 := s.checkStartInstance(c, m0)
  1427  	i1 := s.startUnknownInstance(c, "999")
  1428  
  1429  	// Mark the first machine as dead.
  1430  	c.Assert(m0.EnsureDead(), gc.IsNil)
  1431  
  1432  	// When only harvesting unknown machines, only one of the machines
  1433  	// is stopped.
  1434  	s.checkStopSomeInstances(c, []instances.Instance{i1}, []instances.Instance{i0})
  1435  	s.waitForRemovalMark(c, m0)
  1436  }
  1437  
  1438  func (s *ProvisionerSuite) TestHarvestDestroyedReapsOnlyDestroyed(c *gc.C) {
  1439  
  1440  	task := s.newProvisionerTask(
  1441  		c,
  1442  		config.HarvestDestroyed,
  1443  		s.Environ,
  1444  		s.provisioner,
  1445  		&mockDistributionGroupFinder{},
  1446  		mockToolsFinder{},
  1447  	)
  1448  	defer workertest.CleanKill(c, task)
  1449  
  1450  	// Create a machine and an unknown instance.
  1451  	m0, err := s.addMachine()
  1452  	c.Assert(err, jc.ErrorIsNil)
  1453  	i0 := s.checkStartInstance(c, m0)
  1454  	i1 := s.startUnknownInstance(c, "999")
  1455  
  1456  	// Mark the first machine as dead.
  1457  	c.Assert(m0.EnsureDead(), gc.IsNil)
  1458  
  1459  	// When only harvesting destroyed machines, only one of the
  1460  	// machines is stopped.
  1461  	s.checkStopSomeInstances(c, []instances.Instance{i0}, []instances.Instance{i1})
  1462  	s.waitForRemovalMark(c, m0)
  1463  }
  1464  
  1465  func (s *ProvisionerSuite) TestHarvestAllReapsAllTheThings(c *gc.C) {
  1466  
  1467  	task := s.newProvisionerTask(c,
  1468  		config.HarvestDestroyed,
  1469  		s.Environ,
  1470  		s.provisioner,
  1471  		&mockDistributionGroupFinder{},
  1472  		mockToolsFinder{},
  1473  	)
  1474  	defer workertest.CleanKill(c, task)
  1475  	task.SetHarvestMode(config.HarvestAll)
  1476  
  1477  	// Create a machine and an unknown instance.
  1478  	m0, err := s.addMachine()
  1479  	c.Assert(err, jc.ErrorIsNil)
  1480  	i0 := s.checkStartInstance(c, m0)
  1481  	i1 := s.startUnknownInstance(c, "999")
  1482  
  1483  	// Mark the first machine as dead.
  1484  	c.Assert(m0.EnsureDead(), gc.IsNil)
  1485  
  1486  	// Everything must die!
  1487  	s.checkStopSomeInstances(c, []instances.Instance{i0, i1}, []instances.Instance{})
  1488  	s.waitForRemovalMark(c, m0)
  1489  }
  1490  
  1491  func (s *ProvisionerSuite) TestProvisionerObservesMachineJobs(c *gc.C) {
  1492  	s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond)
  1493  	broker := &mockBroker{Environ: s.Environ, retryCount: make(map[string]int),
  1494  		startInstanceFailureInfo: map[string]mockBrokerFailures{
  1495  			"3": {whenSucceed: 2, err: fmt.Errorf("error: some error")},
  1496  			"4": {whenSucceed: 2, err: fmt.Errorf("error: some error")},
  1497  		},
  1498  	}
  1499  	task := s.newProvisionerTask(c, config.HarvestAll, broker, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{})
  1500  	defer workertest.CleanKill(c, task)
  1501  
  1502  	added := s.enableHA(c, 3)
  1503  	c.Assert(added, gc.HasLen, 2)
  1504  	s.checkStartInstances(c, added)
  1505  }
  1506  
  1507  func assertAvailabilityZoneMachines(c *gc.C,
  1508  	machines []*state.Machine,
  1509  	failedAZMachines []*state.Machine,
  1510  	obtained []provisioner.AvailabilityZoneMachine,
  1511  ) {
  1512  	if len(machines) > 0 {
  1513  		// Do machine zones match AvailabilityZoneMachine
  1514  		for _, m := range machines {
  1515  			zone, err := m.AvailabilityZone()
  1516  			c.Assert(err, jc.ErrorIsNil)
  1517  			found := 0
  1518  			for _, zoneInfo := range obtained {
  1519  				if zone == zoneInfo.ZoneName {
  1520  					c.Assert(zoneInfo.MachineIds.Contains(m.Id()), gc.Equals, true, gc.Commentf(
  1521  						"machine %q not found in list for zone %q; zone list: %#v", m.Id(), zone, zoneInfo,
  1522  					))
  1523  					found += 1
  1524  				}
  1525  			}
  1526  			c.Assert(found, gc.Equals, 1)
  1527  		}
  1528  	}
  1529  	if len(failedAZMachines) > 0 {
  1530  		for _, m := range failedAZMachines {
  1531  			// Is the failed machine listed as failed in at least one zone?
  1532  			failedZones := 0
  1533  			for _, zoneInfo := range obtained {
  1534  				if zoneInfo.FailedMachineIds.Contains(m.Id()) {
  1535  					failedZones += 1
  1536  				}
  1537  			}
  1538  			c.Assert(failedZones, jc.GreaterThan, 0)
  1539  		}
  1540  	}
  1541  }
  1542  
  1543  // assertAvailabilityZoneMachinesDistribution checks to see if the
  1544  // machines have been distributed over the zones (with a maximum delta
  1545  // between the max and min number of machines of maxDelta). This check
  1546  // method works where there are no machine errors in the test case.
  1547  //
  1548  // Which machine will be in which zone is dependent on the order in
  1549  // which they are provisioned, therefore almost impossible to predict.
  1550  func assertAvailabilityZoneMachinesDistribution(c *gc.C, obtained []provisioner.AvailabilityZoneMachine, maxDelta int) {
  1551  	// Are the machines evenly distributed?  No zone should have
  1552  	// 2 machines more than any other zone.
  1553  	min, max := 1, 0
  1554  	counts := make(map[string]int)
  1555  	for _, zone := range obtained {
  1556  		count := zone.MachineIds.Size()
  1557  		counts[zone.ZoneName] = count
  1558  		if min > count {
  1559  			min = count
  1560  		}
  1561  		if max < count {
  1562  			max = count
  1563  		}
  1564  	}
  1565  	c.Assert(max-min, jc.LessThan, maxDelta+1, gc.Commentf("min = %d, max = %d, counts = %v", min, max, counts))
  1566  }
  1567  
  1568  // checkAvailabilityZoneMachinesDistributionGroups checks to see if
  1569  // the distribution groups have been honored.
  1570  func checkAvailabilityZoneMachinesDistributionGroups(c *gc.C, groups map[names.MachineTag][]string, obtained []provisioner.AvailabilityZoneMachine) error {
  1571  	// The set containing the machines in a distribution group and the
  1572  	// machine whose distribution group this is, should not be in the
  1573  	// same AZ, unless there are more machines in the set, than AZs.
  1574  	// If there are more machines in the set than AZs, each AZ should have
  1575  	// the number of machines in the set divided by the number of AZ in it,
  1576  	// or 1 less than that number.
  1577  	//
  1578  	// e.g. if there are 5 machines in the set and 3 AZ, each AZ should have
  1579  	// 2 or 1 machines from the set in it.
  1580  	obtainedZoneCount := len(obtained)
  1581  	for tag, group := range groups {
  1582  		maxMachineInZoneCount := 1
  1583  		applicationMachinesCount := len(group) + 1
  1584  		if applicationMachinesCount > obtainedZoneCount {
  1585  			maxMachineInZoneCount = applicationMachinesCount / obtainedZoneCount
  1586  		}
  1587  		for _, z := range obtained {
  1588  			if z.MachineIds.Contains(tag.Id()) {
  1589  				intersection := z.MachineIds.Intersection(set.NewStrings(group...))
  1590  				machineCount := intersection.Size() + 1
  1591  				// For appropriate machine distribution, the number of machines in the
  1592  				// zone should be the same as maxMachineInZoneCount or 1 less.
  1593  				if machineCount == maxMachineInZoneCount || machineCount == maxMachineInZoneCount-1 {
  1594  					break
  1595  				}
  1596  				return errors.Errorf("%+v has too many of %s and %s", z.MachineIds, tag.Id(), group)
  1597  			}
  1598  		}
  1599  	}
  1600  	return nil
  1601  }
  1602  
  1603  func (s *ProvisionerSuite) TestAvailabilityZoneMachinesStartMachines(c *gc.C) {
  1604  	// Per provider dummy, there will be 3 available availability zones.
  1605  	task := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{})
  1606  	defer workertest.CleanKill(c, task)
  1607  
  1608  	machines, err := s.addMachines(4)
  1609  	c.Assert(err, jc.ErrorIsNil)
  1610  	s.checkStartInstances(c, machines)
  1611  
  1612  	availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task)
  1613  	assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachines)
  1614  	assertAvailabilityZoneMachinesDistribution(c, availabilityZoneMachines, 1)
  1615  }
  1616  
  1617  func (s *ProvisionerSuite) TestAvailabilityZoneMachinesStartMachinesAZFailures(c *gc.C) {
  1618  	// Per provider dummy, there will be 3 available availability zones.
  1619  	s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond)
  1620  	e := &mockBroker{
  1621  		Environ:    s.Environ,
  1622  		retryCount: make(map[string]int),
  1623  		startInstanceFailureInfo: map[string]mockBrokerFailures{
  1624  			"2": {whenSucceed: 1, err: errors.New("zing")},
  1625  		},
  1626  	}
  1627  	retryStrategy := provisioner.NewRetryStrategy(5*time.Millisecond, 2)
  1628  	task := s.newProvisionerTaskWithRetryStrategy(c, config.HarvestDestroyed,
  1629  		e, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{}, retryStrategy)
  1630  	defer workertest.CleanKill(c, task)
  1631  
  1632  	machines, err := s.addMachines(4)
  1633  	c.Assert(err, jc.ErrorIsNil)
  1634  	s.checkStartInstances(c, machines)
  1635  
  1636  	availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task)
  1637  	assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachines)
  1638  
  1639  	// The reason maxDelta is 2 here is because in certain failure cases this
  1640  	// may start two machines on each of two zones, and none on the other (if
  1641  	// the failing machine is started second or third, and the subsequent
  1642  	// machines are started before markMachineFailedInAZ() is called). See
  1643  	// https://github.com/juju/juju/pull/12267 for more detail.
  1644  	assertAvailabilityZoneMachinesDistribution(c, availabilityZoneMachines, 2)
  1645  }
  1646  
  1647  func (s *ProvisionerSuite) TestAvailabilityZoneMachinesStartMachinesWithDG(c *gc.C) {
  1648  	// Per provider dummy, there will be 3 available availability zones.
  1649  	s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond)
  1650  	dgFinder := &mockDistributionGroupFinder{groups: map[names.MachineTag][]string{
  1651  		names.NewMachineTag("1"): {"3, 4"},
  1652  		names.NewMachineTag("2"): {},
  1653  		names.NewMachineTag("3"): {"1, 4"},
  1654  		names.NewMachineTag("4"): {"1, 3"},
  1655  		names.NewMachineTag("5"): {},
  1656  	}}
  1657  
  1658  	task := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, dgFinder, mockToolsFinder{})
  1659  	defer workertest.CleanKill(c, task)
  1660  
  1661  	machines, err := s.addMachines(5)
  1662  	c.Assert(err, jc.ErrorIsNil)
  1663  	s.checkStartInstances(c, machines)
  1664  
  1665  	// 1, 2, 4 should be in different zones
  1666  	availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task)
  1667  	assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachines)
  1668  	c.Assert(checkAvailabilityZoneMachinesDistributionGroups(c, dgFinder.groups, availabilityZoneMachines), jc.ErrorIsNil)
  1669  }
  1670  
  1671  func (s *ProvisionerSuite) TestAvailabilityZoneMachinesStartMachinesAZFailuresWithDG(c *gc.C) {
  1672  	// Per provider dummy, there will be 3 available availability zones.
  1673  	s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond)
  1674  	e := &mockBroker{
  1675  		Environ:    s.Environ,
  1676  		retryCount: make(map[string]int),
  1677  		startInstanceFailureInfo: map[string]mockBrokerFailures{
  1678  			"2": {whenSucceed: 1, err: errors.New("zing")},
  1679  		},
  1680  	}
  1681  	dgFinder := &mockDistributionGroupFinder{groups: map[names.MachineTag][]string{
  1682  		names.NewMachineTag("1"): {"4", "5"},
  1683  		names.NewMachineTag("2"): {"3"},
  1684  		names.NewMachineTag("3"): {"2"},
  1685  		names.NewMachineTag("4"): {"1", "5"},
  1686  		names.NewMachineTag("5"): {"1", "4"},
  1687  	}}
  1688  	retryStrategy := provisioner.NewRetryStrategy(0*time.Second, 2)
  1689  	task := s.newProvisionerTaskWithRetryStrategy(c, config.HarvestDestroyed,
  1690  		e, s.provisioner, dgFinder, mockToolsFinder{}, retryStrategy)
  1691  	defer workertest.CleanKill(c, task)
  1692  
  1693  	machines, err := s.addMachines(5)
  1694  	c.Assert(err, jc.ErrorIsNil)
  1695  	s.checkStartInstances(c, machines)
  1696  
  1697  	availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task)
  1698  	assertAvailabilityZoneMachines(c, machines, []*state.Machine{machines[1]}, availabilityZoneMachines)
  1699  	c.Assert(checkAvailabilityZoneMachinesDistributionGroups(c, dgFinder.groups, availabilityZoneMachines), jc.ErrorIsNil)
  1700  }
  1701  
  1702  func (s *ProvisionerSuite) TestProvisioningMachinesSingleMachineDGFailure(c *gc.C) {
  1703  	// If a single machine fails getting the distribution group,
  1704  	// ensure the other machines are still provisioned.
  1705  	dgFinder := &mockDistributionGroupFinder{
  1706  		groups: map[names.MachineTag][]string{
  1707  			names.NewMachineTag("2"): {"3", "5"},
  1708  			names.NewMachineTag("3"): {"2", "5"},
  1709  			names.NewMachineTag("4"): {"1"},
  1710  			names.NewMachineTag("5"): {"2", "3"},
  1711  		},
  1712  	}
  1713  	task := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, dgFinder, mockToolsFinder{})
  1714  	defer workertest.CleanKill(c, task)
  1715  
  1716  	machines, err := s.addMachines(5)
  1717  	c.Assert(err, jc.ErrorIsNil)
  1718  
  1719  	s.checkStartInstances(c, machines[1:])
  1720  	_, err = machines[0].InstanceId()
  1721  	c.Assert(err, jc.Satisfies, errors.IsNotProvisioned)
  1722  
  1723  	availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task)
  1724  	assertAvailabilityZoneMachines(c, machines[1:], nil, availabilityZoneMachines)
  1725  	c.Assert(checkAvailabilityZoneMachinesDistributionGroups(c, dgFinder.groups, availabilityZoneMachines), jc.ErrorIsNil)
  1726  }
  1727  
  1728  func (s *ProvisionerSuite) TestAvailabilityZoneMachinesStopMachines(c *gc.C) {
  1729  	// Per provider dummy, there will be 3 available availability zones.
  1730  	task := s.newProvisionerTask(
  1731  		c, config.HarvestDestroyed, s.Environ, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{})
  1732  	defer workertest.CleanKill(c, task)
  1733  
  1734  	machines, err := s.addMachines(4)
  1735  	c.Assert(err, jc.ErrorIsNil)
  1736  	s.checkStartInstances(c, machines)
  1737  
  1738  	availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task)
  1739  	assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachines)
  1740  	assertAvailabilityZoneMachinesDistribution(c, availabilityZoneMachines, 1)
  1741  
  1742  	c.Assert(machines[0].EnsureDead(), gc.IsNil)
  1743  	s.waitForRemovalMark(c, machines[0])
  1744  
  1745  	assertAvailabilityZoneMachines(c, machines[1:], nil, provisioner.GetCopyAvailabilityZoneMachines(task))
  1746  }
  1747  
  1748  func (s *ProvisionerSuite) TestProvisioningMachinesFailMachine(c *gc.C) {
  1749  	e := &mockBroker{
  1750  		Environ:    s.Environ,
  1751  		retryCount: make(map[string]int),
  1752  		startInstanceFailureInfo: map[string]mockBrokerFailures{
  1753  			"2": {whenSucceed: 2, err: errors.New("fail provisioning for TestAvailabilityZoneMachinesFailMachine")},
  1754  		},
  1755  	}
  1756  	task := s.newProvisionerTask(c, config.HarvestDestroyed,
  1757  		e, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{})
  1758  	defer workertest.CleanKill(c, task)
  1759  
  1760  	machines, err := s.addMachines(4)
  1761  	c.Assert(err, jc.ErrorIsNil)
  1762  	mFail := machines[1]
  1763  	machines = append(machines[:1], machines[2:]...)
  1764  	s.checkStartInstances(c, machines)
  1765  	_, err = mFail.InstanceId()
  1766  	c.Assert(err, jc.Satisfies, errors.IsNotProvisioned)
  1767  
  1768  	availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task)
  1769  	assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachines)
  1770  	assertAvailabilityZoneMachinesDistribution(c, availabilityZoneMachines, 1)
  1771  }
  1772  
  1773  func (s *ProvisionerSuite) TestAvailabilityZoneMachinesRestartTask(c *gc.C) {
  1774  	// Per provider dummy, there will be 3 available availability zones.
  1775  	task := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{})
  1776  	defer workertest.CleanKill(c, task)
  1777  
  1778  	machines, err := s.addMachines(4)
  1779  	c.Assert(err, jc.ErrorIsNil)
  1780  	s.checkStartInstances(c, machines)
  1781  
  1782  	availabilityZoneMachinesBefore := provisioner.GetCopyAvailabilityZoneMachines(task)
  1783  	assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachinesBefore)
  1784  	assertAvailabilityZoneMachinesDistribution(c, availabilityZoneMachinesBefore, 1)
  1785  
  1786  	workertest.CleanKill(c, task)
  1787  	newTask := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{})
  1788  	defer workertest.CleanKill(c, newTask)
  1789  
  1790  	// Verify provisionerTask.availabilityZoneMachines is the same before and
  1791  	// after the provisionerTask is restarted.
  1792  	availabilityZoneMachinesAfter := provisioner.GetCopyAvailabilityZoneMachines(task)
  1793  	c.Assert(availabilityZoneMachinesBefore, jc.DeepEquals, availabilityZoneMachinesAfter)
  1794  }
  1795  
  1796  func (s *ProvisionerSuite) TestProvisioningMachinesClearAZFailures(c *gc.C) {
  1797  	s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond)
  1798  	e := &mockBroker{
  1799  		Environ:    s.Environ,
  1800  		retryCount: make(map[string]int),
  1801  		startInstanceFailureInfo: map[string]mockBrokerFailures{
  1802  			"1": {whenSucceed: 3, err: errors.New("zing")},
  1803  		},
  1804  	}
  1805  	retryStrategy := provisioner.NewRetryStrategy(5*time.Millisecond, 4)
  1806  	task := s.newProvisionerTaskWithRetryStrategy(c, config.HarvestDestroyed,
  1807  		e, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{}, retryStrategy)
  1808  	defer workertest.CleanKill(c, task)
  1809  
  1810  	machine, err := s.addMachine()
  1811  	c.Assert(err, jc.ErrorIsNil)
  1812  	s.checkStartInstance(c, machine)
  1813  	count := e.getRetryCount(machine.Id())
  1814  	c.Assert(count, gc.Equals, 3)
  1815  	machineAZ, err := machine.AvailabilityZone()
  1816  	c.Assert(err, jc.ErrorIsNil)
  1817  	// Zones 3 and 4 have the same machine count, one is picked at random.
  1818  	c.Assert(set.NewStrings("zone3", "zone4").Contains(machineAZ), jc.IsTrue)
  1819  }
  1820  
  1821  func (s *ProvisionerSuite) TestProvisioningMachinesDerivedAZ(c *gc.C) {
  1822  	s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond)
  1823  	e := &mockBroker{
  1824  		Environ:    s.Environ,
  1825  		retryCount: make(map[string]int),
  1826  		startInstanceFailureInfo: map[string]mockBrokerFailures{
  1827  			"2": {whenSucceed: 3, err: errors.New("zing")},
  1828  			"3": {whenSucceed: 1, err: errors.New("zing")},
  1829  			"5": {whenSucceed: 1, err: environs.ZoneIndependentError(errors.New("arf"))},
  1830  		},
  1831  		derivedAZ: map[string][]string{
  1832  			"1": {"fail-zone"},
  1833  			"2": {"zone4"},
  1834  			"3": {"zone1", "zone4"},
  1835  			"4": {"zone1"},
  1836  			"5": {"zone3"},
  1837  		},
  1838  	}
  1839  	retryStrategy := provisioner.NewRetryStrategy(5*time.Millisecond, 2)
  1840  	task := s.newProvisionerTaskWithRetryStrategy(c, config.HarvestDestroyed,
  1841  		e, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{}, retryStrategy)
  1842  	defer workertest.CleanKill(c, task)
  1843  
  1844  	machines, err := s.addMachines(5)
  1845  	c.Assert(err, jc.ErrorIsNil)
  1846  	mFail := machines[:2]
  1847  	mSucceed := machines[2:]
  1848  
  1849  	s.checkStartInstances(c, mSucceed)
  1850  	c.Assert(e.getRetryCount(mSucceed[0].Id()), gc.Equals, 1)
  1851  	c.Assert(e.getRetryCount(mSucceed[2].Id()), gc.Equals, 1)
  1852  
  1853  	// This synchronisation addresses a potential race condition.
  1854  	// It can happen that upon successful return from checkStartInstances
  1855  	// The machine(s) arranged for provisioning failure have not yet been
  1856  	// retried the specified number of times; so we wait.
  1857  	id := mFail[1].Id()
  1858  	timeout := time.After(coretesting.LongWait)
  1859  	for e.getRetryCount(id) < 3 {
  1860  		select {
  1861  		case <-timeout:
  1862  			c.Fatalf("Failed provision of %q did not retry 3 times", id)
  1863  		default:
  1864  		}
  1865  	}
  1866  
  1867  	_, err = mFail[0].InstanceId()
  1868  	c.Assert(err, jc.Satisfies, errors.IsNotProvisioned)
  1869  	_, err = mFail[1].InstanceId()
  1870  	c.Assert(err, jc.Satisfies, errors.IsNotProvisioned)
  1871  
  1872  	availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task)
  1873  	assertAvailabilityZoneMachines(c, mSucceed, nil, availabilityZoneMachines)
  1874  
  1875  	for i, zone := range []string{"zone1", "zone3"} {
  1876  		machineAZ, err := mSucceed[i+1].AvailabilityZone()
  1877  		c.Assert(err, jc.ErrorIsNil)
  1878  		c.Assert(machineAZ, gc.Equals, zone)
  1879  	}
  1880  }
  1881  
  1882  func (s *ProvisionerSuite) TestProvisioningMachinesNoZonedEnviron(c *gc.C) {
  1883  	// Make sure the provisioner still works for providers which do not
  1884  	// implement the ZonedEnviron interface.
  1885  	noZonedEnvironBroker := &mockNoZonedEnvironBroker{Environ: s.Environ}
  1886  	task := s.newProvisionerTask(c,
  1887  		config.HarvestDestroyed,
  1888  		noZonedEnvironBroker,
  1889  		s.provisioner,
  1890  		&mockDistributionGroupFinder{},
  1891  		mockToolsFinder{})
  1892  	defer workertest.CleanKill(c, task)
  1893  
  1894  	machines, err := s.addMachines(4)
  1895  	c.Assert(err, jc.ErrorIsNil)
  1896  	s.checkStartInstances(c, machines)
  1897  
  1898  	expected := provisioner.GetCopyAvailabilityZoneMachines(task)
  1899  	c.Assert(expected, gc.HasLen, 0)
  1900  }
  1901  
  1902  type mockNoZonedEnvironBroker struct {
  1903  	environs.Environ
  1904  }
  1905  
  1906  func (b *mockNoZonedEnvironBroker) StartInstance(ctx context.ProviderCallContext, args environs.StartInstanceParams) (*environs.StartInstanceResult, error) {
  1907  	return b.Environ.StartInstance(ctx, args)
  1908  }
  1909  
  1910  type mockBroker struct {
  1911  	environs.Environ
  1912  
  1913  	mu                       sync.Mutex
  1914  	retryCount               map[string]int
  1915  	startInstanceFailureInfo map[string]mockBrokerFailures
  1916  	derivedAZ                map[string][]string
  1917  }
  1918  
  1919  type mockBrokerFailures struct {
  1920  	err         error
  1921  	whenSucceed int
  1922  }
  1923  
  1924  func (b *mockBroker) StartInstance(ctx context.ProviderCallContext, args environs.StartInstanceParams) (*environs.StartInstanceResult, error) {
  1925  	// All machines are provisioned successfully the first time unless
  1926  	// mock.startInstanceFailureInfo is configured.
  1927  	//
  1928  	id := args.InstanceConfig.MachineId
  1929  	b.mu.Lock()
  1930  	defer b.mu.Unlock()
  1931  	retries := b.retryCount[id]
  1932  	whenSucceed := 0
  1933  	var returnError error
  1934  	if failureInfo, ok := b.startInstanceFailureInfo[id]; ok {
  1935  		whenSucceed = failureInfo.whenSucceed
  1936  		returnError = failureInfo.err
  1937  	}
  1938  	if retries == whenSucceed {
  1939  		return b.Environ.StartInstance(ctx, args)
  1940  	} else {
  1941  		b.retryCount[id] = retries + 1
  1942  	}
  1943  	return nil, returnError
  1944  }
  1945  
  1946  func (b *mockBroker) getRetryCount(id string) int {
  1947  	b.mu.Lock()
  1948  	retries := b.retryCount[id]
  1949  	b.mu.Unlock()
  1950  	return retries
  1951  }
  1952  
  1953  // ZonedEnviron necessary for provisionerTask.populateAvailabilityZoneMachines where
  1954  // mockBroker used.
  1955  
  1956  func (b *mockBroker) AvailabilityZones(ctx context.ProviderCallContext) (corenetwork.AvailabilityZones, error) {
  1957  	return b.Environ.(providercommon.ZonedEnviron).AvailabilityZones(ctx)
  1958  }
  1959  
  1960  func (b *mockBroker) InstanceAvailabilityZoneNames(ctx context.ProviderCallContext, ids []instance.Id) (map[instance.Id]string, error) {
  1961  	return b.Environ.(providercommon.ZonedEnviron).InstanceAvailabilityZoneNames(ctx, ids)
  1962  }
  1963  
  1964  func (b *mockBroker) DeriveAvailabilityZones(ctx context.ProviderCallContext, args environs.StartInstanceParams) ([]string, error) {
  1965  	id := args.InstanceConfig.MachineId
  1966  	b.mu.Lock()
  1967  	defer b.mu.Unlock()
  1968  	if derivedAZ, ok := b.derivedAZ[id]; ok {
  1969  		return derivedAZ, nil
  1970  	}
  1971  	return b.Environ.(providercommon.ZonedEnviron).DeriveAvailabilityZones(ctx, args)
  1972  }
  1973  
  1974  type mockToolsFinder struct {
  1975  }
  1976  
  1977  func (f mockToolsFinder) FindTools(number version.Number, os string, a string) (coretools.List, error) {
  1978  	v, err := version.ParseBinary(fmt.Sprintf("%s-%s-%s", number, os, arch.HostArch()))
  1979  	if err != nil {
  1980  		return nil, err
  1981  	}
  1982  	if a == "" {
  1983  		return nil, errors.New("missing arch")
  1984  	}
  1985  	v.Arch = a
  1986  	return coretools.List{&coretools.Tools{Version: v}}, nil
  1987  }