github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/provisioner/provisioner_test.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package provisioner_test
     5  
     6  import (
     7  	"fmt"
     8  	"strings"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/juju/collections/set"
    13  	"github.com/juju/errors"
    14  	"github.com/juju/os/series"
    15  	jc "github.com/juju/testing/checkers"
    16  	"github.com/juju/utils"
    17  	"github.com/juju/utils/arch"
    18  	"github.com/juju/version"
    19  	gc "gopkg.in/check.v1"
    20  	"gopkg.in/juju/names.v2"
    21  	"gopkg.in/juju/worker.v1"
    22  	"gopkg.in/juju/worker.v1/workertest"
    23  
    24  	"github.com/juju/juju/agent"
    25  	"github.com/juju/juju/api"
    26  	apiprovisioner "github.com/juju/juju/api/provisioner"
    27  	apiserverprovisioner "github.com/juju/juju/apiserver/facades/agent/provisioner"
    28  	"github.com/juju/juju/apiserver/params"
    29  	"github.com/juju/juju/controller/authentication"
    30  	"github.com/juju/juju/core/constraints"
    31  	"github.com/juju/juju/core/instance"
    32  	"github.com/juju/juju/core/status"
    33  	"github.com/juju/juju/environs"
    34  	"github.com/juju/juju/environs/config"
    35  	"github.com/juju/juju/environs/context"
    36  	"github.com/juju/juju/environs/filestorage"
    37  	"github.com/juju/juju/environs/imagemetadata"
    38  	imagetesting "github.com/juju/juju/environs/imagemetadata/testing"
    39  	"github.com/juju/juju/environs/instances"
    40  	envtesting "github.com/juju/juju/environs/testing"
    41  	"github.com/juju/juju/environs/tools"
    42  	"github.com/juju/juju/juju/testing"
    43  	supportedversion "github.com/juju/juju/juju/version"
    44  	"github.com/juju/juju/network"
    45  	providercommon "github.com/juju/juju/provider/common"
    46  	"github.com/juju/juju/provider/dummy"
    47  	"github.com/juju/juju/state"
    48  	"github.com/juju/juju/state/cloudimagemetadata"
    49  	"github.com/juju/juju/state/multiwatcher"
    50  	"github.com/juju/juju/storage"
    51  	"github.com/juju/juju/storage/poolmanager"
    52  	coretesting "github.com/juju/juju/testing"
    53  	coretools "github.com/juju/juju/tools"
    54  	jujuversion "github.com/juju/juju/version"
    55  	"github.com/juju/juju/worker/provisioner"
    56  )
    57  
    58  type CommonProvisionerSuite struct {
    59  	testing.JujuConnSuite
    60  	op  <-chan dummy.Operation
    61  	cfg *config.Config
    62  	// defaultConstraints are used when adding a machine and then later in test assertions.
    63  	defaultConstraints constraints.Value
    64  
    65  	st          api.Connection
    66  	provisioner *apiprovisioner.State
    67  	callCtx     context.ProviderCallContext
    68  }
    69  
    70  func (s *CommonProvisionerSuite) assertProvisionerObservesConfigChanges(c *gc.C, p provisioner.Provisioner) {
    71  	// Inject our observer into the provisioner
    72  	cfgObserver := make(chan *config.Config)
    73  	provisioner.SetObserver(p, cfgObserver)
    74  
    75  	// Switch to reaping on All machines.
    76  	attrs := map[string]interface{}{
    77  		config.ProvisionerHarvestModeKey: config.HarvestAll.String(),
    78  	}
    79  	err := s.Model.UpdateModelConfig(attrs, nil)
    80  	c.Assert(err, jc.ErrorIsNil)
    81  
    82  	s.BackingState.StartSync()
    83  
    84  	// Wait for the PA to load the new configuration. We wait for the change we expect
    85  	// like this because sometimes we pick up the initial harvest config (destroyed)
    86  	// rather than the one we change to (all).
    87  	var received []string
    88  	timeout := time.After(coretesting.LongWait)
    89  	for {
    90  		select {
    91  		case newCfg := <-cfgObserver:
    92  			if newCfg.ProvisionerHarvestMode().String() == config.HarvestAll.String() {
    93  				return
    94  			}
    95  			received = append(received, newCfg.ProvisionerHarvestMode().String())
    96  		case <-time.After(coretesting.ShortWait):
    97  			s.BackingState.StartSync()
    98  		case <-timeout:
    99  			if len(received) == 0 {
   100  				c.Fatalf("PA did not action config change")
   101  			} else {
   102  				c.Fatalf("timed out waiting for config to change to '%s', received %+v",
   103  					config.HarvestAll.String(), received)
   104  			}
   105  		}
   106  	}
   107  }
   108  
   109  type ProvisionerSuite struct {
   110  	CommonProvisionerSuite
   111  }
   112  
   113  var _ = gc.Suite(&ProvisionerSuite{})
   114  
   115  func (s *CommonProvisionerSuite) SetUpSuite(c *gc.C) {
   116  	s.JujuConnSuite.SetUpSuite(c)
   117  	s.defaultConstraints = constraints.MustParse("arch=amd64 mem=4G cores=1 root-disk=8G")
   118  }
   119  
   120  func (s *CommonProvisionerSuite) SetUpTest(c *gc.C) {
   121  	s.JujuConnSuite.SetUpTest(c)
   122  
   123  	// We do not want to pull published image metadata for tests...
   124  	imagetesting.PatchOfficialDataSources(&s.CleanupSuite, "")
   125  	// We want an image to start test instances
   126  	err := s.State.CloudImageMetadataStorage.SaveMetadata([]cloudimagemetadata.Metadata{{
   127  		MetadataAttributes: cloudimagemetadata.MetadataAttributes{
   128  			Region:          "region",
   129  			Series:          "trusty",
   130  			Arch:            "amd64",
   131  			VirtType:        "",
   132  			RootStorageType: "",
   133  			Source:          "test",
   134  			Stream:          "released",
   135  		},
   136  		Priority: 10,
   137  		ImageId:  "-999",
   138  	}})
   139  	c.Assert(err, jc.ErrorIsNil)
   140  
   141  	// Create the operations channel with more than enough space
   142  	// for those tests that don't listen on it.
   143  	op := make(chan dummy.Operation, 500)
   144  	dummy.Listen(op)
   145  	s.op = op
   146  
   147  	cfg, err := s.Model.ModelConfig()
   148  	c.Assert(err, jc.ErrorIsNil)
   149  	s.cfg = cfg
   150  
   151  	s.callCtx = context.NewCloudCallContext()
   152  
   153  	// Create a machine for the dummy bootstrap instance,
   154  	// so the provisioner doesn't destroy it.
   155  	insts, err := s.Environ.Instances(s.callCtx, []instance.Id{dummy.BootstrapInstanceId})
   156  	c.Assert(err, jc.ErrorIsNil)
   157  	addrs, err := insts[0].Addresses(s.callCtx)
   158  	c.Assert(err, jc.ErrorIsNil)
   159  	machine, err := s.State.AddOneMachine(state.MachineTemplate{
   160  		Addresses:  addrs,
   161  		Series:     "quantal",
   162  		Nonce:      agent.BootstrapNonce,
   163  		InstanceId: dummy.BootstrapInstanceId,
   164  		Jobs:       []state.MachineJob{state.JobManageModel},
   165  	})
   166  	c.Assert(err, jc.ErrorIsNil)
   167  	c.Assert(machine.Id(), gc.Equals, "0")
   168  
   169  	current := version.Binary{
   170  		Number: jujuversion.Current,
   171  		Arch:   arch.HostArch(),
   172  		Series: series.MustHostSeries(),
   173  	}
   174  	err = machine.SetAgentVersion(current)
   175  	c.Assert(err, jc.ErrorIsNil)
   176  
   177  	password, err := utils.RandomPassword()
   178  	c.Assert(err, jc.ErrorIsNil)
   179  	err = machine.SetPassword(password)
   180  	c.Assert(err, jc.ErrorIsNil)
   181  
   182  	s.st = s.OpenAPIAsMachine(c, machine.Tag(), password, agent.BootstrapNonce)
   183  	c.Assert(s.st, gc.NotNil)
   184  	c.Logf("API: login as %q successful", machine.Tag())
   185  	s.provisioner = apiprovisioner.NewState(s.st)
   186  	c.Assert(s.provisioner, gc.NotNil)
   187  
   188  }
   189  
   190  func (s *CommonProvisionerSuite) startUnknownInstance(c *gc.C, id string) instances.Instance {
   191  	instance, _ := testing.AssertStartInstance(c, s.Environ, s.callCtx, s.ControllerConfig.ControllerUUID(), id)
   192  	select {
   193  	case o := <-s.op:
   194  		switch o := o.(type) {
   195  		case dummy.OpStartInstance:
   196  		default:
   197  			c.Fatalf("unexpected operation %#v", o)
   198  		}
   199  	case <-time.After(coretesting.LongWait):
   200  		c.Fatalf("timed out waiting for startinstance operation")
   201  	}
   202  	return instance
   203  }
   204  
   205  func (s *CommonProvisionerSuite) checkStartInstance(c *gc.C, m *state.Machine) instances.Instance {
   206  	retVal := s.checkStartInstancesCustom(c, []*state.Machine{m}, "pork", s.defaultConstraints,
   207  		nil, nil, nil, nil, nil, true)
   208  	return retVal[m.Id()]
   209  }
   210  
   211  func (s *CommonProvisionerSuite) checkStartInstanceCustom(
   212  	c *gc.C, m *state.Machine,
   213  	secret string, cons constraints.Value,
   214  	networkInfo []network.InterfaceInfo,
   215  	subnetsToZones map[network.Id][]string,
   216  	volumes []storage.Volume,
   217  	volumeAttachments []storage.VolumeAttachment,
   218  	checkPossibleTools coretools.List,
   219  	waitInstanceId bool,
   220  ) instances.Instance {
   221  	retVal := s.checkStartInstancesCustom(c, []*state.Machine{m},
   222  		secret, cons, networkInfo, subnetsToZones, volumes,
   223  		volumeAttachments, checkPossibleTools, waitInstanceId)
   224  	return retVal[m.Id()]
   225  }
   226  
   227  func (s *CommonProvisionerSuite) checkStartInstances(c *gc.C, machines []*state.Machine) map[string]instances.Instance {
   228  	return s.checkStartInstancesCustom(c, machines, "pork", s.defaultConstraints, nil, nil,
   229  		nil, nil, nil, true)
   230  }
   231  
   232  // checkStartInstanceCustom takes a slice of Machines.  A
   233  // map of machine Ids to instances is returned
   234  func (s *CommonProvisionerSuite) checkStartInstancesCustom(
   235  	c *gc.C, machines []*state.Machine,
   236  	secret string, cons constraints.Value,
   237  	networkInfo []network.InterfaceInfo,
   238  	subnetsToZones map[network.Id][]string,
   239  	volumes []storage.Volume,
   240  	volumeAttachments []storage.VolumeAttachment,
   241  	checkPossibleTools coretools.List,
   242  	waitInstanceId bool,
   243  ) (
   244  	returnInstances map[string]instances.Instance,
   245  ) {
   246  	s.BackingState.StartSync()
   247  	returnInstances = make(map[string]instances.Instance, len(machines))
   248  	found := 0
   249  	for {
   250  		select {
   251  		case o := <-s.op:
   252  			switch o := o.(type) {
   253  			case dummy.OpStartInstance:
   254  				inst := o.Instance
   255  
   256  				var m *state.Machine
   257  				for _, machine := range machines {
   258  					if machine.Id() == o.MachineId {
   259  						m = machine
   260  						found += 1
   261  						break
   262  					}
   263  				}
   264  				c.Assert(m, gc.NotNil)
   265  				if waitInstanceId {
   266  					s.waitInstanceId(c, m, inst.Id())
   267  				}
   268  
   269  				// Check the instance was started with the expected params.
   270  				c.Assert(o.MachineId, gc.Equals, m.Id())
   271  				nonceParts := strings.SplitN(o.MachineNonce, ":", 2)
   272  				c.Assert(nonceParts, gc.HasLen, 2)
   273  				c.Assert(nonceParts[0], gc.Equals, names.NewMachineTag("0").String())
   274  				c.Assert(nonceParts[1], jc.Satisfies, utils.IsValidUUIDString)
   275  				c.Assert(o.Secret, gc.Equals, secret)
   276  				c.Assert(o.SubnetsToZones, jc.DeepEquals, subnetsToZones)
   277  				c.Assert(o.NetworkInfo, jc.DeepEquals, networkInfo)
   278  				c.Assert(o.Volumes, jc.DeepEquals, volumes)
   279  				c.Assert(o.VolumeAttachments, jc.DeepEquals, volumeAttachments)
   280  
   281  				var jobs []multiwatcher.MachineJob
   282  				for _, job := range m.Jobs() {
   283  					jobs = append(jobs, job.ToParams())
   284  				}
   285  				c.Assert(o.Jobs, jc.SameContents, jobs)
   286  
   287  				if checkPossibleTools != nil {
   288  					for _, t := range o.PossibleTools {
   289  						url := fmt.Sprintf("https://%s/model/%s/tools/%s",
   290  							s.st.Addr(), coretesting.ModelTag.Id(), t.Version)
   291  						c.Check(t.URL, gc.Equals, url)
   292  						t.URL = ""
   293  					}
   294  					for _, t := range checkPossibleTools {
   295  						t.URL = ""
   296  					}
   297  					c.Assert(o.PossibleTools, gc.DeepEquals, checkPossibleTools)
   298  				}
   299  
   300  				// All provisioned machines in this test suite have
   301  				// their hardware characteristics attributes set to
   302  				// the same values as the constraints due to the dummy
   303  				// environment being used.
   304  				if !constraints.IsEmpty(&cons) {
   305  					c.Assert(o.Constraints, gc.DeepEquals, cons)
   306  					hc, err := m.HardwareCharacteristics()
   307  					c.Assert(err, jc.ErrorIsNil)
   308  					// At this point we don't care what the AvailabilityZone is,
   309  					// it can be a few different valid things.
   310  					zone := hc.AvailabilityZone
   311  					hc.AvailabilityZone = nil
   312  					c.Assert(*hc, gc.DeepEquals, instance.HardwareCharacteristics{
   313  						Arch:     cons.Arch,
   314  						Mem:      cons.Mem,
   315  						RootDisk: cons.RootDisk,
   316  						CpuCores: cons.CpuCores,
   317  						CpuPower: cons.CpuPower,
   318  						Tags:     cons.Tags,
   319  					})
   320  					hc.AvailabilityZone = zone
   321  				}
   322  				returnInstances[m.Id()] = inst
   323  				if found == len(machines) {
   324  					return
   325  				}
   326  				break
   327  			default:
   328  				c.Logf("ignoring unexpected operation %#v", o)
   329  			}
   330  		case <-time.After(2 * time.Second):
   331  			c.Fatalf("provisioner did not start an instance")
   332  			return
   333  		}
   334  	}
   335  }
   336  
   337  // checkNoOperations checks that the environ was not operated upon.
   338  func (s *CommonProvisionerSuite) checkNoOperations(c *gc.C) {
   339  	s.BackingState.StartSync()
   340  	select {
   341  	case o := <-s.op:
   342  		c.Fatalf("unexpected operation %+v", o)
   343  	case <-time.After(coretesting.ShortWait):
   344  		return
   345  	}
   346  }
   347  
   348  // checkStopInstances checks that an instance has been stopped.
   349  func (s *CommonProvisionerSuite) checkStopInstances(c *gc.C, instances ...instances.Instance) {
   350  	s.checkStopSomeInstances(c, instances, nil)
   351  }
   352  
   353  // checkStopSomeInstances checks that instancesToStop are stopped while instancesToKeep are not.
   354  func (s *CommonProvisionerSuite) checkStopSomeInstances(c *gc.C,
   355  	instancesToStop []instances.Instance, instancesToKeep []instances.Instance) {
   356  
   357  	s.BackingState.StartSync()
   358  	instanceIdsToStop := set.NewStrings()
   359  	for _, instance := range instancesToStop {
   360  		instanceIdsToStop.Add(string(instance.Id()))
   361  	}
   362  	instanceIdsToKeep := set.NewStrings()
   363  	for _, instance := range instancesToKeep {
   364  		instanceIdsToKeep.Add(string(instance.Id()))
   365  	}
   366  	// Continue checking for stop instance calls until all the instances we
   367  	// are waiting on to finish, actually finish, or we time out.
   368  	for !instanceIdsToStop.IsEmpty() {
   369  		select {
   370  		case o := <-s.op:
   371  			switch o := o.(type) {
   372  			case dummy.OpStopInstances:
   373  				for _, id := range o.Ids {
   374  					instId := string(id)
   375  					instanceIdsToStop.Remove(instId)
   376  					if instanceIdsToKeep.Contains(instId) {
   377  						c.Errorf("provisioner unexpectedly stopped instance %s", instId)
   378  					}
   379  				}
   380  			default:
   381  				c.Fatalf("unexpected operation %#v", o)
   382  				return
   383  			}
   384  		case <-time.After(2 * time.Second):
   385  			c.Fatalf("provisioner did not stop an instance")
   386  			return
   387  		}
   388  	}
   389  }
   390  
   391  func (s *CommonProvisionerSuite) waitForWatcher(c *gc.C, w state.NotifyWatcher, name string, check func() bool) {
   392  	// TODO(jam): We need to grow a new method on NotifyWatcherC
   393  	// that calls StartSync while waiting for changes, then
   394  	// waitMachine and waitHardwareCharacteristics can use that
   395  	// instead
   396  	defer workertest.CleanKill(c, w)
   397  	timeout := time.After(coretesting.LongWait)
   398  	resync := time.After(0)
   399  	for {
   400  		select {
   401  		case <-w.Changes():
   402  			if check() {
   403  				return
   404  			}
   405  		case <-resync:
   406  			resync = time.After(coretesting.ShortWait)
   407  			s.BackingState.StartSync()
   408  		case <-timeout:
   409  			c.Fatalf("%v wait timed out", name)
   410  		}
   411  	}
   412  }
   413  
   414  func (s *CommonProvisionerSuite) waitHardwareCharacteristics(c *gc.C, m *state.Machine, check func() bool) {
   415  	w := m.WatchHardwareCharacteristics()
   416  	name := fmt.Sprintf("hardware characteristics for machine %v", m)
   417  	s.waitForWatcher(c, w, name, check)
   418  }
   419  
   420  // waitForRemovalMark waits for the supplied machine to be marked for removal.
   421  func (s *CommonProvisionerSuite) waitForRemovalMark(c *gc.C, m *state.Machine) {
   422  	w := s.BackingState.WatchMachineRemovals()
   423  	name := fmt.Sprintf("machine %v marked for removal", m)
   424  	s.waitForWatcher(c, w, name, func() bool {
   425  		removals, err := s.BackingState.AllMachineRemovals()
   426  		c.Assert(err, jc.ErrorIsNil)
   427  		for _, removal := range removals {
   428  			if removal == m.Id() {
   429  				return true
   430  			}
   431  		}
   432  		return false
   433  	})
   434  }
   435  
   436  // waitInstanceId waits until the supplied machine has an instance id, then
   437  // asserts it is as expected.
   438  func (s *CommonProvisionerSuite) waitInstanceId(c *gc.C, m *state.Machine, expect instance.Id) {
   439  	s.waitHardwareCharacteristics(c, m, func() bool {
   440  		if actual, err := m.InstanceId(); err == nil {
   441  			c.Assert(actual, gc.Equals, expect)
   442  			return true
   443  		} else if !errors.IsNotProvisioned(err) {
   444  			// We don't expect any errors.
   445  			panic(err)
   446  		} else {
   447  			c.Logf("got not provisioned error while waiting: %v", err)
   448  		}
   449  		return false
   450  	})
   451  }
   452  
   453  // waitInstanceId waits until the supplied machine has an instance id, then
   454  // asserts it is as expected.
   455  func (s *CommonProvisionerSuite) waitInstanceIdNoAssert(c *gc.C, m *state.Machine) {
   456  	s.waitHardwareCharacteristics(c, m, func() bool {
   457  		if _, err := m.InstanceId(); err == nil {
   458  			return true
   459  		} else if !errors.IsNotProvisioned(err) {
   460  			// We don't expect any errors.
   461  			panic(err)
   462  		}
   463  		return false
   464  	})
   465  }
   466  
   467  func (s *CommonProvisionerSuite) newEnvironProvisioner(c *gc.C) provisioner.Provisioner {
   468  	machineTag := names.NewMachineTag("0")
   469  	agentConfig := s.AgentConfigForTag(c, machineTag)
   470  	apiState := apiprovisioner.NewState(s.st)
   471  	w, err := provisioner.NewEnvironProvisioner(apiState, agentConfig, s.Environ, &credentialAPIForTest{})
   472  	c.Assert(err, jc.ErrorIsNil)
   473  	return w
   474  }
   475  
   476  func (s *CommonProvisionerSuite) addMachine() (*state.Machine, error) {
   477  	return s.addMachineWithConstraints(s.defaultConstraints)
   478  }
   479  
   480  func (s *CommonProvisionerSuite) addMachineWithConstraints(cons constraints.Value) (*state.Machine, error) {
   481  	return s.BackingState.AddOneMachine(state.MachineTemplate{
   482  		Series:      supportedversion.SupportedLTS(),
   483  		Jobs:        []state.MachineJob{state.JobHostUnits},
   484  		Constraints: cons,
   485  	})
   486  }
   487  
   488  func (s *CommonProvisionerSuite) addMachines(number int) ([]*state.Machine, error) {
   489  	templates := make([]state.MachineTemplate, number)
   490  	for i := range templates {
   491  		templates[i] = state.MachineTemplate{
   492  			Series:      supportedversion.SupportedLTS(),
   493  			Jobs:        []state.MachineJob{state.JobHostUnits},
   494  			Constraints: s.defaultConstraints,
   495  		}
   496  	}
   497  	return s.BackingState.AddMachines(templates...)
   498  }
   499  
   500  func (s *CommonProvisionerSuite) enableHA(c *gc.C, n int) []*state.Machine {
   501  	changes, err := s.BackingState.EnableHA(n, s.defaultConstraints, supportedversion.SupportedLTS(), nil)
   502  	c.Assert(err, jc.ErrorIsNil)
   503  	added := make([]*state.Machine, len(changes.Added))
   504  	for i, mid := range changes.Added {
   505  		m, err := s.BackingState.Machine(mid)
   506  		c.Assert(err, jc.ErrorIsNil)
   507  		added[i] = m
   508  	}
   509  	return added
   510  }
   511  
   512  func (s *ProvisionerSuite) TestProvisionerStartStop(c *gc.C) {
   513  	p := s.newEnvironProvisioner(c)
   514  	workertest.CleanKill(c, p)
   515  }
   516  
   517  func (s *ProvisionerSuite) TestSimple(c *gc.C) {
   518  	p := s.newEnvironProvisioner(c)
   519  	defer workertest.CleanKill(c, p)
   520  
   521  	// Check that an instance is provisioned when the machine is created...
   522  	m, err := s.addMachine()
   523  	c.Assert(err, jc.ErrorIsNil)
   524  	instance := s.checkStartInstance(c, m)
   525  
   526  	// ...and removed, along with the machine, when the machine is Dead.
   527  	c.Assert(m.EnsureDead(), gc.IsNil)
   528  	s.checkStopInstances(c, instance)
   529  	s.waitForRemovalMark(c, m)
   530  }
   531  
   532  func (s *ProvisionerSuite) TestConstraints(c *gc.C) {
   533  	// Create a machine with non-standard constraints.
   534  	m, err := s.addMachine()
   535  	c.Assert(err, jc.ErrorIsNil)
   536  	cons := constraints.MustParse("mem=8G arch=amd64 cores=2 root-disk=10G")
   537  	err = m.SetConstraints(cons)
   538  	c.Assert(err, jc.ErrorIsNil)
   539  
   540  	// Start a provisioner and check those constraints are used.
   541  	p := s.newEnvironProvisioner(c)
   542  	defer workertest.CleanKill(c, p)
   543  
   544  	s.checkStartInstanceCustom(c, m, "pork", cons, nil, nil, nil, nil, nil, true)
   545  }
   546  
   547  func (s *ProvisionerSuite) TestPossibleTools(c *gc.C) {
   548  
   549  	storageDir := c.MkDir()
   550  	s.PatchValue(&tools.DefaultBaseURL, storageDir)
   551  	stor, err := filestorage.NewFileStorageWriter(storageDir)
   552  	c.Assert(err, jc.ErrorIsNil)
   553  	currentVersion := version.MustParseBinary("1.2.3-quantal-amd64")
   554  
   555  	// The current version is determined by the current model's agent
   556  	// version when locating tools to provision an added unit
   557  	attrs := map[string]interface{}{
   558  		config.AgentVersionKey: currentVersion.Number.String(),
   559  	}
   560  	err = s.Model.UpdateModelConfig(attrs, nil)
   561  	c.Assert(err, jc.ErrorIsNil)
   562  
   563  	s.PatchValue(&arch.HostArch, func() string { return currentVersion.Arch })
   564  	s.PatchValue(&series.MustHostSeries, func() string { return currentVersion.Series })
   565  
   566  	// Upload some plausible matches, and some that should be filtered out.
   567  	compatibleVersion := version.MustParseBinary("1.2.3-quantal-arm64")
   568  	ignoreVersion1 := version.MustParseBinary("1.2.4-quantal-arm64")
   569  	ignoreVersion2 := version.MustParseBinary("1.2.3-precise-arm64")
   570  	availableVersions := []version.Binary{
   571  		currentVersion, compatibleVersion, ignoreVersion1, ignoreVersion2,
   572  	}
   573  	envtesting.AssertUploadFakeToolsVersions(c, stor, s.cfg.AgentStream(), s.cfg.AgentStream(), availableVersions...)
   574  
   575  	// Extract the tools that we expect to actually match.
   576  	expectedList, err := tools.FindTools(s.Environ, -1, -1, []string{s.cfg.AgentStream()}, coretools.Filter{
   577  		Number: currentVersion.Number,
   578  		Series: currentVersion.Series,
   579  	})
   580  	c.Assert(err, jc.ErrorIsNil)
   581  
   582  	// Create the machine and check the tools that get passed into StartInstance.
   583  	machine, err := s.BackingState.AddOneMachine(state.MachineTemplate{
   584  		Series: "quantal",
   585  		Jobs:   []state.MachineJob{state.JobHostUnits},
   586  	})
   587  	c.Assert(err, jc.ErrorIsNil)
   588  
   589  	provisioner := s.newEnvironProvisioner(c)
   590  	defer workertest.CleanKill(c, provisioner)
   591  	s.checkStartInstanceCustom(
   592  		c, machine, "pork", constraints.Value{},
   593  		nil, nil, nil, nil, expectedList, true,
   594  	)
   595  }
   596  
   597  var validCloudInitUserData = `
   598  packages:
   599    - 'python-keystoneclient'
   600    - 'python-glanceclient'
   601  preruncmd:
   602    - mkdir /tmp/preruncmd
   603    - mkdir /tmp/preruncmd2
   604  postruncmd:
   605    - mkdir /tmp/postruncmd
   606    - mkdir /tmp/postruncmd2
   607  package_upgrade: false
   608  `[1:]
   609  
   610  func (s *ProvisionerSuite) TestSetUpToStartMachine(c *gc.C) {
   611  	attrs := map[string]interface{}{
   612  		config.CloudInitUserDataKey: validCloudInitUserData,
   613  	}
   614  	s.Model.UpdateModelConfig(attrs, nil)
   615  
   616  	task := s.newProvisionerTask(
   617  		c,
   618  		config.HarvestAll,
   619  		s.Environ,
   620  		s.provisioner,
   621  		&mockDistributionGroupFinder{},
   622  		mockToolsFinder{},
   623  	)
   624  	defer workertest.CleanKill(c, task)
   625  
   626  	machine, err := s.addMachine()
   627  	c.Assert(err, jc.ErrorIsNil)
   628  
   629  	result, err := s.provisioner.Machines(machine.MachineTag())
   630  	c.Assert(err, gc.IsNil)
   631  	c.Assert(result, gc.HasLen, 1)
   632  	c.Assert(result[0].Err, gc.IsNil)
   633  	apiMachine := result[0].Machine
   634  
   635  	v, err := apiMachine.ModelAgentVersion()
   636  	c.Assert(err, jc.ErrorIsNil)
   637  
   638  	startInstanceParams, err := provisioner.SetupToStartMachine(task, apiMachine, v)
   639  	c.Assert(err, jc.ErrorIsNil)
   640  	cloudInitUserData := startInstanceParams.InstanceConfig.CloudInitUserData
   641  	c.Assert(cloudInitUserData, gc.DeepEquals, map[string]interface{}{
   642  		"packages":        []interface{}{"python-keystoneclient", "python-glanceclient"},
   643  		"preruncmd":       []interface{}{"mkdir /tmp/preruncmd", "mkdir /tmp/preruncmd2"},
   644  		"postruncmd":      []interface{}{"mkdir /tmp/postruncmd", "mkdir /tmp/postruncmd2"},
   645  		"package_upgrade": false},
   646  	)
   647  }
   648  
   649  func (s *ProvisionerSuite) TestProvisionerSetsErrorStatusWhenNoToolsAreAvailable(c *gc.C) {
   650  	p := s.newEnvironProvisioner(c)
   651  	defer workertest.CleanKill(c, p)
   652  
   653  	// Check that an instance is not provisioned when the machine is created...
   654  	m, err := s.BackingState.AddOneMachine(state.MachineTemplate{
   655  		// We need a valid series that has no tools uploaded
   656  		Series:      "raring",
   657  		Jobs:        []state.MachineJob{state.JobHostUnits},
   658  		Constraints: s.defaultConstraints,
   659  	})
   660  	c.Assert(err, jc.ErrorIsNil)
   661  	s.checkNoOperations(c)
   662  
   663  	// Ensure machine error status was set, and the error matches
   664  	agentStatus, instanceStatus := s.waitUntilMachineNotPending(c, m)
   665  	c.Check(agentStatus.Status, gc.Equals, status.Error)
   666  	c.Check(agentStatus.Message, gc.Equals, "no matching agent binaries available")
   667  	c.Check(instanceStatus.Status, gc.Equals, status.ProvisioningError)
   668  	c.Check(instanceStatus.Message, gc.Equals, "no matching agent binaries available")
   669  
   670  	// Restart the PA to make sure the machine is skipped again.
   671  	workertest.CleanKill(c, p)
   672  	p = s.newEnvironProvisioner(c)
   673  	defer workertest.CleanKill(c, p)
   674  	s.checkNoOperations(c)
   675  }
   676  
   677  func (s *ProvisionerSuite) waitUntilMachineNotPending(c *gc.C, m *state.Machine) (status.StatusInfo, status.StatusInfo) {
   678  	t0 := time.Now()
   679  	for time.Since(t0) < 10*coretesting.LongWait {
   680  		agentStatusInfo, err := m.Status()
   681  		c.Assert(err, jc.ErrorIsNil)
   682  		if agentStatusInfo.Status == status.Pending {
   683  			time.Sleep(coretesting.ShortWait)
   684  			continue
   685  		}
   686  		instanceStatusInfo, err := m.InstanceStatus()
   687  		c.Assert(err, jc.ErrorIsNil)
   688  		// officially InstanceStatus is only supposed to be Provisioning, but
   689  		// all current Providers have their unknown state as Pending.
   690  		if instanceStatusInfo.Status == status.Provisioning ||
   691  			instanceStatusInfo.Status == status.Pending {
   692  			time.Sleep(coretesting.ShortWait)
   693  			continue
   694  		}
   695  		return agentStatusInfo, instanceStatusInfo
   696  	}
   697  	c.Fatalf("machine %q stayed in pending", m.Id())
   698  	// Satisfy Go, Fatal should be a panic anyway
   699  	return status.StatusInfo{}, status.StatusInfo{}
   700  }
   701  
   702  func (s *ProvisionerSuite) TestProvisionerFailedStartInstanceWithInjectedCreationError(c *gc.C) {
   703  	// Set the retry delay to 0, and retry count to 2 to keep tests short
   704  	s.PatchValue(provisioner.RetryStrategyDelay, 0*time.Second)
   705  	s.PatchValue(provisioner.RetryStrategyCount, 2)
   706  
   707  	// create the error injection channel
   708  	errorInjectionChannel := make(chan error, 3)
   709  
   710  	p := s.newEnvironProvisioner(c)
   711  	defer workertest.CleanKill(c, p)
   712  
   713  	// patch the dummy provider error injection channel
   714  	cleanup := dummy.PatchTransientErrorInjectionChannel(errorInjectionChannel)
   715  	defer cleanup()
   716  
   717  	retryableError := providercommon.ZoneIndependentError(
   718  		errors.New("container failed to start and was destroyed"),
   719  	)
   720  	destroyError := providercommon.ZoneIndependentError(
   721  		errors.New("container failed to start and failed to destroy: manual cleanup of containers needed"),
   722  	)
   723  	// send the error message three times, because the provisioner will retry twice as patched above.
   724  	errorInjectionChannel <- retryableError
   725  	errorInjectionChannel <- retryableError
   726  	errorInjectionChannel <- destroyError
   727  
   728  	m, err := s.addMachine()
   729  	c.Assert(err, jc.ErrorIsNil)
   730  	s.checkNoOperations(c)
   731  
   732  	agentStatus, instanceStatus := s.waitUntilMachineNotPending(c, m)
   733  	// check that the status matches the error message
   734  	c.Check(agentStatus.Status, gc.Equals, status.Error)
   735  	c.Check(agentStatus.Message, gc.Equals, destroyError.Error())
   736  	c.Check(instanceStatus.Status, gc.Equals, status.ProvisioningError)
   737  	c.Check(instanceStatus.Message, gc.Equals, destroyError.Error())
   738  }
   739  
   740  func (s *ProvisionerSuite) TestProvisionerSucceedStartInstanceWithInjectedRetryableCreationError(c *gc.C) {
   741  	// Set the retry delay to 0, and retry count to 2 to keep tests short
   742  	s.PatchValue(provisioner.RetryStrategyDelay, 0*time.Second)
   743  	s.PatchValue(provisioner.RetryStrategyCount, 2)
   744  
   745  	// create the error injection channel
   746  	errorInjectionChannel := make(chan error, 1)
   747  	c.Assert(errorInjectionChannel, gc.NotNil)
   748  
   749  	p := s.newEnvironProvisioner(c)
   750  	defer workertest.CleanKill(c, p)
   751  
   752  	// patch the dummy provider error injection channel
   753  	cleanup := dummy.PatchTransientErrorInjectionChannel(errorInjectionChannel)
   754  	defer cleanup()
   755  
   756  	// send the error message once
   757  	// - instance creation should succeed
   758  	retryableError := errors.New("container failed to start and was destroyed")
   759  	errorInjectionChannel <- retryableError
   760  
   761  	m, err := s.addMachine()
   762  	c.Assert(err, jc.ErrorIsNil)
   763  	s.checkStartInstance(c, m)
   764  }
   765  
   766  func (s *ProvisionerSuite) TestProvisionerStopRetryingIfDying(c *gc.C) {
   767  	// Create the error injection channel and inject
   768  	// a retryable error
   769  	errorInjectionChannel := make(chan error, 1)
   770  
   771  	p := s.newEnvironProvisioner(c)
   772  	// Don't refer the stop.  We will manually stop and verify the result.
   773  
   774  	// patch the dummy provider error injection channel
   775  	cleanup := dummy.PatchTransientErrorInjectionChannel(errorInjectionChannel)
   776  	defer cleanup()
   777  
   778  	retryableError := errors.New("container failed to start and was destroyed")
   779  	errorInjectionChannel <- retryableError
   780  
   781  	m, err := s.addMachine()
   782  	c.Assert(err, jc.ErrorIsNil)
   783  
   784  	time.Sleep(coretesting.ShortWait)
   785  
   786  	workertest.CleanKill(c, p)
   787  	statusInfo, err := m.Status()
   788  	c.Assert(err, jc.ErrorIsNil)
   789  	c.Check(statusInfo.Status, gc.Equals, status.Pending)
   790  	statusInfo, err = m.InstanceStatus()
   791  	c.Assert(err, jc.ErrorIsNil)
   792  	if statusInfo.Status != status.Pending && statusInfo.Status != status.Provisioning {
   793  		c.Errorf("statusInfo.Status was %q not one of %q or %q",
   794  			statusInfo.Status, status.Pending, status.Provisioning)
   795  	}
   796  	s.checkNoOperations(c)
   797  }
   798  
   799  func (s *ProvisionerSuite) TestProvisioningDoesNotOccurForLXD(c *gc.C) {
   800  	p := s.newEnvironProvisioner(c)
   801  	defer workertest.CleanKill(c, p)
   802  
   803  	// create a machine to host the container.
   804  	m, err := s.addMachine()
   805  	c.Assert(err, jc.ErrorIsNil)
   806  	inst := s.checkStartInstance(c, m)
   807  
   808  	// make a container on the machine we just created
   809  	template := state.MachineTemplate{
   810  		Series: supportedversion.SupportedLTS(),
   811  		Jobs:   []state.MachineJob{state.JobHostUnits},
   812  	}
   813  	container, err := s.State.AddMachineInsideMachine(template, m.Id(), instance.LXD)
   814  	c.Assert(err, jc.ErrorIsNil)
   815  
   816  	// the PA should not attempt to create it
   817  	s.checkNoOperations(c)
   818  
   819  	// cleanup
   820  	c.Assert(container.EnsureDead(), gc.IsNil)
   821  	c.Assert(container.Remove(), gc.IsNil)
   822  	c.Assert(m.EnsureDead(), gc.IsNil)
   823  	s.checkStopInstances(c, inst)
   824  	s.waitForRemovalMark(c, m)
   825  }
   826  
   827  func (s *ProvisionerSuite) TestProvisioningDoesNotOccurForKVM(c *gc.C) {
   828  	p := s.newEnvironProvisioner(c)
   829  	defer workertest.CleanKill(c, p)
   830  
   831  	// create a machine to host the container.
   832  	m, err := s.addMachine()
   833  	c.Assert(err, jc.ErrorIsNil)
   834  	inst := s.checkStartInstance(c, m)
   835  
   836  	// make a container on the machine we just created
   837  	template := state.MachineTemplate{
   838  		Series: supportedversion.SupportedLTS(),
   839  		Jobs:   []state.MachineJob{state.JobHostUnits},
   840  	}
   841  	container, err := s.State.AddMachineInsideMachine(template, m.Id(), instance.KVM)
   842  	c.Assert(err, jc.ErrorIsNil)
   843  
   844  	// the PA should not attempt to create it
   845  	s.checkNoOperations(c)
   846  
   847  	// cleanup
   848  	c.Assert(container.EnsureDead(), gc.IsNil)
   849  	c.Assert(container.Remove(), gc.IsNil)
   850  	c.Assert(m.EnsureDead(), gc.IsNil)
   851  	s.checkStopInstances(c, inst)
   852  	s.waitForRemovalMark(c, m)
   853  }
   854  
   855  type MachineClassifySuite struct {
   856  }
   857  
   858  var _ = gc.Suite(&MachineClassifySuite{})
   859  
   860  type MockMachine struct {
   861  	life          params.Life
   862  	status        status.Status
   863  	id            string
   864  	idErr         error
   865  	ensureDeadErr error
   866  	statusErr     error
   867  }
   868  
   869  func (m *MockMachine) Life() params.Life {
   870  	return m.life
   871  }
   872  
   873  func (m *MockMachine) InstanceId() (instance.Id, error) {
   874  	return instance.Id(m.id), m.idErr
   875  }
   876  
   877  func (m *MockMachine) InstanceNames() (instance.Id, string, error) {
   878  	instId, err := m.InstanceId()
   879  	return instId, "", err
   880  }
   881  
   882  func (m *MockMachine) EnsureDead() error {
   883  	return m.ensureDeadErr
   884  }
   885  
   886  func (m *MockMachine) Status() (status.Status, string, error) {
   887  	return m.status, "", m.statusErr
   888  }
   889  
   890  func (m *MockMachine) InstanceStatus() (status.Status, string, error) {
   891  	return m.status, "", m.statusErr
   892  }
   893  
   894  func (m *MockMachine) Id() string {
   895  	return m.id
   896  }
   897  
   898  type machineClassificationTest struct {
   899  	description    string
   900  	life           params.Life
   901  	status         status.Status
   902  	idErr          string
   903  	ensureDeadErr  string
   904  	expectErrCode  string
   905  	expectErrFmt   string
   906  	statusErr      string
   907  	classification provisioner.MachineClassification
   908  }
   909  
   910  var machineClassificationTests = []machineClassificationTest{{
   911  	description:    "Dead machine is dead",
   912  	life:           params.Dead,
   913  	status:         status.Started,
   914  	classification: provisioner.Dead,
   915  }, {
   916  	description:    "Dying machine can carry on dying",
   917  	life:           params.Dying,
   918  	status:         status.Started,
   919  	classification: provisioner.None,
   920  }, {
   921  	description:    "Dying unprovisioned machine is ensured dead",
   922  	life:           params.Dying,
   923  	status:         status.Started,
   924  	classification: provisioner.Dead,
   925  	idErr:          params.CodeNotProvisioned,
   926  }, {
   927  	description:    "Can't load provisioned dying machine",
   928  	life:           params.Dying,
   929  	status:         status.Started,
   930  	classification: provisioner.None,
   931  	idErr:          params.CodeNotFound,
   932  	expectErrCode:  params.CodeNotFound,
   933  	expectErrFmt:   "failed to load dying machine id:%s.*",
   934  }, {
   935  	description:    "Alive machine is not provisioned - pending",
   936  	life:           params.Alive,
   937  	status:         status.Pending,
   938  	classification: provisioner.Pending,
   939  	idErr:          params.CodeNotProvisioned,
   940  	expectErrFmt:   "found machine pending provisioning id:%s.*",
   941  }, {
   942  	description:    "Alive, pending machine not found",
   943  	life:           params.Alive,
   944  	status:         status.Pending,
   945  	classification: provisioner.None,
   946  	idErr:          params.CodeNotFound,
   947  	expectErrCode:  params.CodeNotFound,
   948  	expectErrFmt:   "failed to load machine id:%s.*",
   949  }, {
   950  	description:    "Cannot get unprovisioned machine status",
   951  	life:           params.Alive,
   952  	classification: provisioner.None,
   953  	statusErr:      params.CodeNotFound,
   954  	idErr:          params.CodeNotProvisioned,
   955  }, {
   956  	description:    "Dying machine fails to ensure dead",
   957  	life:           params.Dying,
   958  	status:         status.Started,
   959  	classification: provisioner.None,
   960  	idErr:          params.CodeNotProvisioned,
   961  	expectErrCode:  params.CodeNotFound,
   962  	ensureDeadErr:  params.CodeNotFound,
   963  	expectErrFmt:   "failed to ensure machine dead id:%s.*",
   964  }}
   965  
   966  var machineClassificationTestsRequireMaintenance = machineClassificationTest{
   967  	description:    "Machine needs maintaining",
   968  	life:           params.Alive,
   969  	status:         status.Started,
   970  	classification: provisioner.Maintain,
   971  }
   972  
   973  var machineClassificationTestsNoMaintenance = machineClassificationTest{
   974  	description:    "Machine doesn't need maintaining",
   975  	life:           params.Alive,
   976  	status:         status.Started,
   977  	classification: provisioner.None,
   978  }
   979  
   980  func (s *MachineClassifySuite) TestMachineClassification(c *gc.C) {
   981  	test := func(t machineClassificationTest, id string) {
   982  		// Run a sub-test from the test table
   983  		s2e := func(s string) error {
   984  			// Little helper to turn a non-empty string into a useful error for "ErrorMaches"
   985  			if s != "" {
   986  				return &params.Error{Code: s}
   987  			}
   988  			return nil
   989  		}
   990  
   991  		c.Logf("%s: %s", id, t.description)
   992  		machine := MockMachine{t.life, t.status, id, s2e(t.idErr), s2e(t.ensureDeadErr), s2e(t.statusErr)}
   993  		classification, err := provisioner.ClassifyMachine(&machine)
   994  		if err != nil {
   995  			c.Assert(err, gc.ErrorMatches, fmt.Sprintf(t.expectErrFmt, machine.Id()))
   996  		} else {
   997  			c.Assert(err, gc.Equals, s2e(t.expectErrCode))
   998  		}
   999  		c.Assert(classification, gc.Equals, t.classification)
  1000  	}
  1001  
  1002  	machineIds := []string{"0/kvm/0", "0"}
  1003  	for _, id := range machineIds {
  1004  		tests := machineClassificationTests
  1005  		if id == "0" {
  1006  			tests = append(tests, machineClassificationTestsNoMaintenance)
  1007  		} else {
  1008  			tests = append(tests, machineClassificationTestsRequireMaintenance)
  1009  		}
  1010  		for _, t := range tests {
  1011  			test(t, id)
  1012  		}
  1013  	}
  1014  }
  1015  
  1016  func (s *ProvisionerSuite) TestProvisioningMachinesWithSpacesSuccess(c *gc.C) {
  1017  	p := s.newEnvironProvisioner(c)
  1018  	defer workertest.CleanKill(c, p)
  1019  
  1020  	// Add the spaces used in constraints.
  1021  	_, err := s.State.AddSpace("space1", "", nil, false)
  1022  	c.Assert(err, jc.ErrorIsNil)
  1023  	_, err = s.State.AddSpace("space2", "", nil, false)
  1024  	c.Assert(err, jc.ErrorIsNil)
  1025  
  1026  	// Add 1 subnet into space1, and 2 into space2.
  1027  	// Each subnet is in a matching zone (e.g "subnet-#" in "zone#").
  1028  	testing.AddSubnetsWithTemplate(c, s.State, 3, state.SubnetInfo{
  1029  		CIDR:             "10.10.{{.}}.0/24",
  1030  		ProviderId:       "subnet-{{.}}",
  1031  		AvailabilityZone: "zone{{.}}",
  1032  		SpaceName:        "{{if (eq . 0)}}space1{{else}}space2{{end}}",
  1033  		VLANTag:          42,
  1034  	})
  1035  
  1036  	// Add and provision a machine with spaces specified.
  1037  	cons := constraints.MustParse(
  1038  		s.defaultConstraints.String(), "spaces=space2,^space1",
  1039  	)
  1040  	// The dummy provider simulates 2 subnets per included space.
  1041  	expectedSubnetsToZones := map[network.Id][]string{
  1042  		"subnet-0": {"zone0"},
  1043  		"subnet-1": {"zone1"},
  1044  	}
  1045  	m, err := s.addMachineWithConstraints(cons)
  1046  	c.Assert(err, jc.ErrorIsNil)
  1047  	inst := s.checkStartInstanceCustom(
  1048  		c, m, "pork", cons,
  1049  		nil,
  1050  		expectedSubnetsToZones,
  1051  		nil, nil, nil, true,
  1052  	)
  1053  
  1054  	// Cleanup.
  1055  	c.Assert(m.EnsureDead(), gc.IsNil)
  1056  	s.checkStopInstances(c, inst)
  1057  	s.waitForRemovalMark(c, m)
  1058  }
  1059  
  1060  func (s *ProvisionerSuite) testProvisioningFailsAndSetsErrorStatusForConstraints(
  1061  	c *gc.C,
  1062  	cons constraints.Value,
  1063  	expectedErrorStatus string,
  1064  ) {
  1065  	machine, err := s.addMachineWithConstraints(cons)
  1066  	c.Assert(err, jc.ErrorIsNil)
  1067  
  1068  	// Start the PA.
  1069  	p := s.newEnvironProvisioner(c)
  1070  	defer workertest.CleanKill(c, p)
  1071  
  1072  	// Expect StartInstance to fail.
  1073  	s.checkNoOperations(c)
  1074  
  1075  	// Ensure machine error status was set, and the error matches
  1076  	agentStatus, instanceStatus := s.waitUntilMachineNotPending(c, machine)
  1077  	c.Check(agentStatus.Status, gc.Equals, status.Error)
  1078  	c.Check(agentStatus.Message, gc.Equals, expectedErrorStatus)
  1079  	c.Check(instanceStatus.Status, gc.Equals, status.ProvisioningError)
  1080  	c.Check(instanceStatus.Message, gc.Equals, expectedErrorStatus)
  1081  
  1082  	// Make sure the task didn't stop with an error
  1083  	died := make(chan error)
  1084  	go func() {
  1085  		died <- p.Wait()
  1086  	}()
  1087  	select {
  1088  	case <-time.After(coretesting.ShortWait):
  1089  	case err := <-died:
  1090  		c.Fatalf("provisioner task died unexpectedly with err: %v", err)
  1091  	}
  1092  
  1093  	// Restart the PA to make sure the machine is not retried.
  1094  	workertest.CleanKill(c, p)
  1095  	p = s.newEnvironProvisioner(c)
  1096  	defer workertest.CleanKill(c, p)
  1097  
  1098  	s.checkNoOperations(c)
  1099  }
  1100  
  1101  func (s *ProvisionerSuite) TestProvisioningMachinesFailsWithUnknownSpaces(c *gc.C) {
  1102  	cons := constraints.MustParse(
  1103  		s.defaultConstraints.String(), "spaces=missing,ignored,^ignored-too",
  1104  	)
  1105  	expectedErrorStatus := `cannot match subnets to zones: space "missing" not found`
  1106  	s.testProvisioningFailsAndSetsErrorStatusForConstraints(c, cons, expectedErrorStatus)
  1107  }
  1108  
  1109  func (s *ProvisionerSuite) TestProvisioningMachinesFailsWithEmptySpaces(c *gc.C) {
  1110  	_, err := s.State.AddSpace("empty", "", nil, false)
  1111  	c.Assert(err, jc.ErrorIsNil)
  1112  	cons := constraints.MustParse(
  1113  		s.defaultConstraints.String(), "spaces=empty",
  1114  	)
  1115  	expectedErrorStatus := `cannot match subnets to zones: ` +
  1116  		`cannot use space "empty" as deployment target: no subnets`
  1117  	s.testProvisioningFailsAndSetsErrorStatusForConstraints(c, cons, expectedErrorStatus)
  1118  }
  1119  
  1120  func (s *CommonProvisionerSuite) addMachineWithRequestedVolumes(volumes []state.HostVolumeParams, cons constraints.Value) (*state.Machine, error) {
  1121  	return s.BackingState.AddOneMachine(state.MachineTemplate{
  1122  		Series:      supportedversion.SupportedLTS(),
  1123  		Jobs:        []state.MachineJob{state.JobHostUnits},
  1124  		Constraints: cons,
  1125  		Volumes:     volumes,
  1126  	})
  1127  }
  1128  
  1129  func (s *ProvisionerSuite) TestProvisioningMachinesWithRequestedVolumes(c *gc.C) {
  1130  	// Set up a persistent pool.
  1131  	poolManager := poolmanager.New(state.NewStateSettings(s.State), s.Environ)
  1132  	_, err := poolManager.Create("persistent-pool", "static", map[string]interface{}{"persistent": true})
  1133  	c.Assert(err, jc.ErrorIsNil)
  1134  
  1135  	p := s.newEnvironProvisioner(c)
  1136  	defer workertest.CleanKill(c, p)
  1137  
  1138  	// Add a machine with volumes to state.
  1139  	requestedVolumes := []state.HostVolumeParams{{
  1140  		Volume:     state.VolumeParams{Pool: "static", Size: 1024},
  1141  		Attachment: state.VolumeAttachmentParams{},
  1142  	}, {
  1143  		Volume:     state.VolumeParams{Pool: "persistent-pool", Size: 2048},
  1144  		Attachment: state.VolumeAttachmentParams{},
  1145  	}, {
  1146  		Volume:     state.VolumeParams{Pool: "persistent-pool", Size: 4096},
  1147  		Attachment: state.VolumeAttachmentParams{},
  1148  	}}
  1149  	m, err := s.addMachineWithRequestedVolumes(requestedVolumes, s.defaultConstraints)
  1150  	c.Assert(err, jc.ErrorIsNil)
  1151  
  1152  	// Provision volume-2, so that it is attached rather than created.
  1153  	sb, err := state.NewStorageBackend(s.State)
  1154  	c.Assert(err, jc.ErrorIsNil)
  1155  	err = sb.SetVolumeInfo(names.NewVolumeTag("2"), state.VolumeInfo{
  1156  		Pool:     "persistent-pool",
  1157  		VolumeId: "vol-ume",
  1158  		Size:     4096,
  1159  	})
  1160  	c.Assert(err, jc.ErrorIsNil)
  1161  
  1162  	// Provision the machine, checking the volume and volume attachment arguments.
  1163  	expectedVolumes := []storage.Volume{{
  1164  		names.NewVolumeTag("0"),
  1165  		storage.VolumeInfo{
  1166  			Size: 1024,
  1167  		},
  1168  	}, {
  1169  		names.NewVolumeTag("1"),
  1170  		storage.VolumeInfo{
  1171  			Size:       2048,
  1172  			Persistent: true,
  1173  		},
  1174  	}}
  1175  	expectedVolumeAttachments := []storage.VolumeAttachment{{
  1176  		Volume:  names.NewVolumeTag("2"),
  1177  		Machine: m.MachineTag(),
  1178  		VolumeAttachmentInfo: storage.VolumeAttachmentInfo{
  1179  			DeviceName: "sdb",
  1180  		},
  1181  	}}
  1182  	inst := s.checkStartInstanceCustom(
  1183  		c, m, "pork", s.defaultConstraints,
  1184  		nil, nil,
  1185  		expectedVolumes,
  1186  		expectedVolumeAttachments,
  1187  		nil, true,
  1188  	)
  1189  
  1190  	// Cleanup.
  1191  	c.Assert(m.EnsureDead(), gc.IsNil)
  1192  	s.checkStopInstances(c, inst)
  1193  	s.waitForRemovalMark(c, m)
  1194  }
  1195  
  1196  func (s *ProvisionerSuite) TestProvisioningDoesNotProvisionTheSameMachineAfterRestart(c *gc.C) {
  1197  	p := s.newEnvironProvisioner(c)
  1198  	defer workertest.CleanKill(c, p)
  1199  
  1200  	// create a machine
  1201  	m, err := s.addMachine()
  1202  	c.Assert(err, jc.ErrorIsNil)
  1203  	s.checkStartInstance(c, m)
  1204  
  1205  	// restart the PA
  1206  	workertest.CleanKill(c, p)
  1207  	p = s.newEnvironProvisioner(c)
  1208  	defer workertest.CleanKill(c, p)
  1209  
  1210  	// check that there is only one machine provisioned.
  1211  	machines, err := s.State.AllMachines()
  1212  	c.Assert(err, jc.ErrorIsNil)
  1213  	c.Check(len(machines), gc.Equals, 2)
  1214  	c.Check(machines[0].Id(), gc.Equals, "0")
  1215  	c.Check(machines[1].CheckProvisioned("fake_nonce"), jc.IsFalse)
  1216  
  1217  	// the PA should not create it a second time
  1218  	s.checkNoOperations(c)
  1219  }
  1220  
  1221  func (s *ProvisionerSuite) TestDyingMachines(c *gc.C) {
  1222  	p := s.newEnvironProvisioner(c)
  1223  	defer workertest.CleanKill(c, p)
  1224  
  1225  	// provision a machine
  1226  	m0, err := s.addMachine()
  1227  	c.Assert(err, jc.ErrorIsNil)
  1228  	s.checkStartInstance(c, m0)
  1229  
  1230  	// stop the provisioner and make the machine dying
  1231  	workertest.CleanKill(c, p)
  1232  	err = m0.Destroy()
  1233  	c.Assert(err, jc.ErrorIsNil)
  1234  
  1235  	// add a new, dying, unprovisioned machine
  1236  	m1, err := s.addMachine()
  1237  	c.Assert(err, jc.ErrorIsNil)
  1238  	err = m1.Destroy()
  1239  	c.Assert(err, jc.ErrorIsNil)
  1240  
  1241  	// start the provisioner and wait for it to reap the useless machine
  1242  	p = s.newEnvironProvisioner(c)
  1243  	defer workertest.CleanKill(c, p)
  1244  	s.checkNoOperations(c)
  1245  	s.waitForRemovalMark(c, m1)
  1246  
  1247  	// verify the other one's still fine
  1248  	err = m0.Refresh()
  1249  	c.Assert(err, jc.ErrorIsNil)
  1250  	c.Assert(m0.Life(), gc.Equals, state.Dying)
  1251  }
  1252  
  1253  type mockMachineGetter struct {
  1254  	machines map[names.MachineTag]*apiprovisioner.Machine
  1255  }
  1256  
  1257  func (mock *mockMachineGetter) Machines(tags ...names.MachineTag) ([]apiprovisioner.MachineResult, error) {
  1258  	return nil, fmt.Errorf("error")
  1259  }
  1260  
  1261  func (*mockMachineGetter) MachinesWithTransientErrors() ([]apiprovisioner.MachineStatusResult, error) {
  1262  	return nil, fmt.Errorf("error")
  1263  }
  1264  
  1265  type mockDistributionGroupFinder struct {
  1266  	groups map[names.MachineTag][]string
  1267  }
  1268  
  1269  func (mock *mockDistributionGroupFinder) DistributionGroupByMachineId(tags ...names.MachineTag) ([]apiprovisioner.DistributionGroupResult, error) {
  1270  	result := make([]apiprovisioner.DistributionGroupResult, len(tags))
  1271  	if len(mock.groups) == 0 {
  1272  		for i := range tags {
  1273  			result[i] = apiprovisioner.DistributionGroupResult{[]string{}, nil}
  1274  		}
  1275  	} else {
  1276  		for i, tag := range tags {
  1277  			if dg, ok := mock.groups[tag]; ok {
  1278  				result[i] = apiprovisioner.DistributionGroupResult{dg, nil}
  1279  			} else {
  1280  				result[i] = apiprovisioner.DistributionGroupResult{[]string{}, &params.Error{Code: params.CodeNotFound, Message: "Fail"}}
  1281  			}
  1282  		}
  1283  	}
  1284  	return result, nil
  1285  }
  1286  
  1287  func (s *ProvisionerSuite) TestMachineErrorsRetainInstances(c *gc.C) {
  1288  	task := s.newProvisionerTask(
  1289  		c,
  1290  		config.HarvestAll,
  1291  		s.Environ,
  1292  		s.provisioner,
  1293  		&mockDistributionGroupFinder{},
  1294  		mockToolsFinder{},
  1295  	)
  1296  	defer workertest.CleanKill(c, task)
  1297  
  1298  	// create a machine
  1299  	m0, err := s.addMachine()
  1300  	c.Assert(err, jc.ErrorIsNil)
  1301  	s.checkStartInstance(c, m0)
  1302  
  1303  	// create an instance out of band
  1304  	s.startUnknownInstance(c, "999")
  1305  
  1306  	// start the provisioner and ensure it doesn't kill any instances if there are error getting machines
  1307  	task = s.newProvisionerTask(
  1308  		c,
  1309  		config.HarvestAll,
  1310  		s.Environ,
  1311  		&mockMachineGetter{},
  1312  		&mockDistributionGroupFinder{},
  1313  		&mockToolsFinder{},
  1314  	)
  1315  	defer func() {
  1316  		err := worker.Stop(task)
  1317  		c.Assert(err, gc.ErrorMatches, ".*failed to get machine.*")
  1318  	}()
  1319  	s.checkNoOperations(c)
  1320  }
  1321  
  1322  func (s *ProvisionerSuite) TestEnvironProvisionerObservesConfigChanges(c *gc.C) {
  1323  	p := s.newEnvironProvisioner(c)
  1324  	defer workertest.CleanKill(c, p)
  1325  	s.assertProvisionerObservesConfigChanges(c, p)
  1326  }
  1327  
  1328  func (s *ProvisionerSuite) newProvisionerTask(
  1329  	c *gc.C,
  1330  	harvestingMethod config.HarvestMode,
  1331  	broker environs.InstanceBroker,
  1332  	machineGetter provisioner.MachineGetter,
  1333  	distributionGroupFinder provisioner.DistributionGroupFinder,
  1334  	toolsFinder provisioner.ToolsFinder,
  1335  ) provisioner.ProvisionerTask {
  1336  
  1337  	retryStrategy := provisioner.NewRetryStrategy(0*time.Second, 0)
  1338  
  1339  	return s.newProvisionerTaskWithRetryStrategy(c, harvestingMethod, broker,
  1340  		machineGetter, distributionGroupFinder, toolsFinder, retryStrategy)
  1341  }
  1342  
  1343  func (s *ProvisionerSuite) newProvisionerTaskWithRetryStrategy(
  1344  	c *gc.C,
  1345  	harvestingMethod config.HarvestMode,
  1346  	broker environs.InstanceBroker,
  1347  	machineGetter provisioner.MachineGetter,
  1348  	distributionGroupFinder provisioner.DistributionGroupFinder,
  1349  	toolsFinder provisioner.ToolsFinder,
  1350  	retryStrategy provisioner.RetryStrategy,
  1351  ) provisioner.ProvisionerTask {
  1352  
  1353  	machineWatcher, err := s.provisioner.WatchModelMachines()
  1354  	c.Assert(err, jc.ErrorIsNil)
  1355  	retryWatcher, err := s.provisioner.WatchMachineErrorRetry()
  1356  	c.Assert(err, jc.ErrorIsNil)
  1357  	machineProfileWatcher, err := s.provisioner.WatchModelMachinesCharmProfiles()
  1358  	c.Assert(err, jc.ErrorIsNil)
  1359  	auth, err := authentication.NewAPIAuthenticator(s.provisioner)
  1360  	c.Assert(err, jc.ErrorIsNil)
  1361  
  1362  	w, err := provisioner.NewProvisionerTask(
  1363  		s.ControllerConfig.ControllerUUID(),
  1364  		names.NewMachineTag("0"),
  1365  		harvestingMethod,
  1366  		machineGetter,
  1367  		distributionGroupFinder,
  1368  		toolsFinder,
  1369  		machineWatcher,
  1370  		retryWatcher,
  1371  		machineProfileWatcher,
  1372  		broker,
  1373  		auth,
  1374  		imagemetadata.ReleasedStream,
  1375  		retryStrategy,
  1376  		s.callCtx,
  1377  	)
  1378  	c.Assert(err, jc.ErrorIsNil)
  1379  	return w
  1380  }
  1381  
  1382  func (s *ProvisionerSuite) TestHarvestNoneReapsNothing(c *gc.C) {
  1383  
  1384  	task := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{})
  1385  	defer workertest.CleanKill(c, task)
  1386  	task.SetHarvestMode(config.HarvestNone)
  1387  
  1388  	// Create a machine and an unknown instance.
  1389  	m0, err := s.addMachine()
  1390  	c.Assert(err, jc.ErrorIsNil)
  1391  	s.checkStartInstance(c, m0)
  1392  	s.startUnknownInstance(c, "999")
  1393  
  1394  	// Mark the first machine as dead.
  1395  	c.Assert(m0.EnsureDead(), gc.IsNil)
  1396  
  1397  	// Ensure we're doing nothing.
  1398  	s.checkNoOperations(c)
  1399  }
  1400  
  1401  func (s *ProvisionerSuite) TestHarvestUnknownReapsOnlyUnknown(c *gc.C) {
  1402  
  1403  	task := s.newProvisionerTask(c,
  1404  		config.HarvestDestroyed,
  1405  		s.Environ,
  1406  		s.provisioner,
  1407  		&mockDistributionGroupFinder{},
  1408  		mockToolsFinder{},
  1409  	)
  1410  	defer workertest.CleanKill(c, task)
  1411  	task.SetHarvestMode(config.HarvestUnknown)
  1412  
  1413  	// Create a machine and an unknown instance.
  1414  	m0, err := s.addMachine()
  1415  	c.Assert(err, jc.ErrorIsNil)
  1416  	i0 := s.checkStartInstance(c, m0)
  1417  	i1 := s.startUnknownInstance(c, "999")
  1418  
  1419  	// Mark the first machine as dead.
  1420  	c.Assert(m0.EnsureDead(), gc.IsNil)
  1421  
  1422  	// When only harvesting unknown machines, only one of the machines
  1423  	// is stopped.
  1424  	s.checkStopSomeInstances(c, []instances.Instance{i1}, []instances.Instance{i0})
  1425  	s.waitForRemovalMark(c, m0)
  1426  }
  1427  
  1428  func (s *ProvisionerSuite) TestHarvestDestroyedReapsOnlyDestroyed(c *gc.C) {
  1429  
  1430  	task := s.newProvisionerTask(
  1431  		c,
  1432  		config.HarvestDestroyed,
  1433  		s.Environ,
  1434  		s.provisioner,
  1435  		&mockDistributionGroupFinder{},
  1436  		mockToolsFinder{},
  1437  	)
  1438  	defer workertest.CleanKill(c, task)
  1439  
  1440  	// Create a machine and an unknown instance.
  1441  	m0, err := s.addMachine()
  1442  	c.Assert(err, jc.ErrorIsNil)
  1443  	i0 := s.checkStartInstance(c, m0)
  1444  	i1 := s.startUnknownInstance(c, "999")
  1445  
  1446  	// Mark the first machine as dead.
  1447  	c.Assert(m0.EnsureDead(), gc.IsNil)
  1448  
  1449  	// When only harvesting destroyed machines, only one of the
  1450  	// machines is stopped.
  1451  	s.checkStopSomeInstances(c, []instances.Instance{i0}, []instances.Instance{i1})
  1452  	s.waitForRemovalMark(c, m0)
  1453  }
  1454  
  1455  func (s *ProvisionerSuite) TestHarvestAllReapsAllTheThings(c *gc.C) {
  1456  
  1457  	task := s.newProvisionerTask(c,
  1458  		config.HarvestDestroyed,
  1459  		s.Environ,
  1460  		s.provisioner,
  1461  		&mockDistributionGroupFinder{},
  1462  		mockToolsFinder{},
  1463  	)
  1464  	defer workertest.CleanKill(c, task)
  1465  	task.SetHarvestMode(config.HarvestAll)
  1466  
  1467  	// Create a machine and an unknown instance.
  1468  	m0, err := s.addMachine()
  1469  	c.Assert(err, jc.ErrorIsNil)
  1470  	i0 := s.checkStartInstance(c, m0)
  1471  	i1 := s.startUnknownInstance(c, "999")
  1472  
  1473  	// Mark the first machine as dead.
  1474  	c.Assert(m0.EnsureDead(), gc.IsNil)
  1475  
  1476  	// Everything must die!
  1477  	s.checkStopSomeInstances(c, []instances.Instance{i0, i1}, []instances.Instance{})
  1478  	s.waitForRemovalMark(c, m0)
  1479  }
  1480  
  1481  func (s *ProvisionerSuite) TestProvisionerObservesMachineJobs(c *gc.C) {
  1482  	s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond)
  1483  	broker := &mockBroker{Environ: s.Environ, retryCount: make(map[string]int),
  1484  		startInstanceFailureInfo: map[string]mockBrokerFailures{
  1485  			"3": {whenSucceed: 2, err: fmt.Errorf("error: some error")},
  1486  			"4": {whenSucceed: 2, err: fmt.Errorf("error: some error")},
  1487  		},
  1488  	}
  1489  	task := s.newProvisionerTask(c, config.HarvestAll, broker, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{})
  1490  	defer workertest.CleanKill(c, task)
  1491  
  1492  	added := s.enableHA(c, 3)
  1493  	c.Assert(added, gc.HasLen, 2)
  1494  	s.checkStartInstances(c, added)
  1495  }
  1496  
  1497  func assertAvailabilityZoneMachines(c *gc.C,
  1498  	machines []*state.Machine,
  1499  	failedAZMachines []*state.Machine,
  1500  	obtained []provisioner.AvailabilityZoneMachine,
  1501  ) {
  1502  	if len(machines) > 0 {
  1503  		// Do machine zones match AvailabilityZoneMachine
  1504  		for _, m := range machines {
  1505  			zone, err := m.AvailabilityZone()
  1506  			c.Assert(err, jc.ErrorIsNil)
  1507  			found := 0
  1508  			for _, zoneInfo := range obtained {
  1509  				if zone == zoneInfo.ZoneName {
  1510  					c.Assert(zoneInfo.MachineIds.Contains(m.Id()), gc.Equals, true)
  1511  					found += 1
  1512  				}
  1513  			}
  1514  			c.Assert(found, gc.Equals, 1)
  1515  		}
  1516  	}
  1517  	if len(failedAZMachines) > 0 {
  1518  		for _, m := range failedAZMachines {
  1519  			// Is the failed machine listed as failed in at least one zone?
  1520  			failedZones := 0
  1521  			for _, zoneInfo := range obtained {
  1522  				if zoneInfo.FailedMachineIds.Contains(m.Id()) {
  1523  					failedZones += 1
  1524  				}
  1525  			}
  1526  			c.Assert(failedZones, jc.GreaterThan, 0)
  1527  		}
  1528  	}
  1529  }
  1530  
  1531  // assertAvailabilityZoneMachinesDistribution checks to see if the
  1532  // machines have been distributed over the zones.  This check method
  1533  // works where there are no machine errors in the test case.
  1534  //
  1535  // Which machine it will be in which zone is dependent on the order in
  1536  // which they are provisioned, therefore almost impossible to predict.
  1537  func assertAvailabilityZoneMachinesDistribution(c *gc.C, obtained []provisioner.AvailabilityZoneMachine) {
  1538  	// Are the machines evenly distributed?  No zone should have
  1539  	// 2 machines more than any other zone.
  1540  	min, max := 1, 0
  1541  	for _, zone := range obtained {
  1542  		count := zone.MachineIds.Size()
  1543  		if min > count {
  1544  			min = count
  1545  		}
  1546  		if max < count {
  1547  			max = count
  1548  		}
  1549  	}
  1550  	c.Assert(max-min, jc.LessThan, 2)
  1551  }
  1552  
  1553  // assertAvailabilityZoneMachinesDistribution checks to see if
  1554  // the distribution groups have been honored.
  1555  func checkAvailabilityZoneMachinesDistributionGroups(c *gc.C, groups map[names.MachineTag][]string, obtained []provisioner.AvailabilityZoneMachine) error {
  1556  	// The set containing the machines in a distribution group and the
  1557  	// machine whose distribution group this is, should not be in the
  1558  	// same AZ, unless there are more machines in the set, than AZs.
  1559  	// If there are more machines in the set than AZs, each AZ should have
  1560  	// the number of machines in the set divided by the number of AZ in it,
  1561  	// or 1 less than that number.
  1562  	//
  1563  	// e.g. if there are 5 machines in the set and 3 AZ, each AZ should have
  1564  	// 2 or 1 machines from the set in it.
  1565  	obtainedZoneCount := len(obtained)
  1566  	for tag, group := range groups {
  1567  		maxMachineInZoneCount := 1
  1568  		applicationMachinesCount := len(group) + 1
  1569  		if applicationMachinesCount > obtainedZoneCount {
  1570  			maxMachineInZoneCount = applicationMachinesCount / obtainedZoneCount
  1571  		}
  1572  		for _, z := range obtained {
  1573  			if z.MachineIds.Contains(tag.Id()) {
  1574  				intersection := z.MachineIds.Intersection(set.NewStrings(group...))
  1575  				machineCount := intersection.Size() + 1
  1576  				// For appropriate machine distribution, the number of machines in the
  1577  				// zone should be the same as maxMachineInZoneCount or 1 less.
  1578  				if machineCount == maxMachineInZoneCount || machineCount == maxMachineInZoneCount-1 {
  1579  					break
  1580  				}
  1581  				return errors.Errorf("%+v has too many of %s and %s", z.MachineIds, tag.Id(), group)
  1582  			}
  1583  		}
  1584  	}
  1585  	return nil
  1586  }
  1587  
  1588  func (s *ProvisionerSuite) TestAvailabilityZoneMachinesStartMachines(c *gc.C) {
  1589  	// Per provider dummy, there will be 3 available availability zones.
  1590  	task := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{})
  1591  	defer workertest.CleanKill(c, task)
  1592  
  1593  	machines, err := s.addMachines(4)
  1594  	c.Assert(err, jc.ErrorIsNil)
  1595  	s.checkStartInstances(c, machines)
  1596  
  1597  	availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task)
  1598  	assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachines)
  1599  	assertAvailabilityZoneMachinesDistribution(c, availabilityZoneMachines)
  1600  }
  1601  
  1602  func (s *ProvisionerSuite) TestAvailabilityZoneMachinesStartMachinesAZFailures(c *gc.C) {
  1603  	// Per provider dummy, there will be 3 available availability zones.
  1604  	s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond)
  1605  	e := &mockBroker{
  1606  		Environ:    s.Environ,
  1607  		retryCount: make(map[string]int),
  1608  		startInstanceFailureInfo: map[string]mockBrokerFailures{
  1609  			"2": {whenSucceed: 1, err: errors.New("zing")},
  1610  		},
  1611  	}
  1612  	retryStrategy := provisioner.NewRetryStrategy(5*time.Millisecond, 2)
  1613  	task := s.newProvisionerTaskWithRetryStrategy(c, config.HarvestDestroyed,
  1614  		e, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{}, retryStrategy)
  1615  	defer workertest.CleanKill(c, task)
  1616  
  1617  	machines, err := s.addMachines(4)
  1618  	c.Assert(err, jc.ErrorIsNil)
  1619  	s.checkStartInstances(c, machines)
  1620  
  1621  	availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task)
  1622  	assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachines)
  1623  	assertAvailabilityZoneMachinesDistribution(c, availabilityZoneMachines)
  1624  }
  1625  
  1626  func (s *ProvisionerSuite) TestAvailabilityZoneMachinesStartMachinesWithDG(c *gc.C) {
  1627  	// Per provider dummy, there will be 3 available availability zones.
  1628  	s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond)
  1629  	dgFinder := &mockDistributionGroupFinder{groups: map[names.MachineTag][]string{
  1630  		names.NewMachineTag("1"): {"3, 4"},
  1631  		names.NewMachineTag("2"): {},
  1632  		names.NewMachineTag("3"): {"1, 4"},
  1633  		names.NewMachineTag("4"): {"1, 3"},
  1634  		names.NewMachineTag("5"): {},
  1635  	}}
  1636  
  1637  	task := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, dgFinder, mockToolsFinder{})
  1638  	defer workertest.CleanKill(c, task)
  1639  
  1640  	machines, err := s.addMachines(5)
  1641  	c.Assert(err, jc.ErrorIsNil)
  1642  	s.checkStartInstances(c, machines)
  1643  
  1644  	// 1, 2, 4 should be in different zones
  1645  	availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task)
  1646  	assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachines)
  1647  	c.Assert(checkAvailabilityZoneMachinesDistributionGroups(c, dgFinder.groups, availabilityZoneMachines), jc.ErrorIsNil)
  1648  }
  1649  
  1650  func (s *ProvisionerSuite) TestAvailabilityZoneMachinesStartMachinesAZFailuresWithDG(c *gc.C) {
  1651  	// Per provider dummy, there will be 3 available availability zones.
  1652  	s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond)
  1653  	e := &mockBroker{
  1654  		Environ:    s.Environ,
  1655  		retryCount: make(map[string]int),
  1656  		startInstanceFailureInfo: map[string]mockBrokerFailures{
  1657  			"2": {whenSucceed: 1, err: errors.New("zing")},
  1658  		},
  1659  	}
  1660  	dgFinder := &mockDistributionGroupFinder{groups: map[names.MachineTag][]string{
  1661  		names.NewMachineTag("1"): {"4", "5"},
  1662  		names.NewMachineTag("2"): {"3"},
  1663  		names.NewMachineTag("3"): {"2"},
  1664  		names.NewMachineTag("4"): {"1", "5"},
  1665  		names.NewMachineTag("5"): {"1", "4"},
  1666  	}}
  1667  	retryStrategy := provisioner.NewRetryStrategy(0*time.Second, 2)
  1668  	task := s.newProvisionerTaskWithRetryStrategy(c, config.HarvestDestroyed,
  1669  		e, s.provisioner, dgFinder, mockToolsFinder{}, retryStrategy)
  1670  	defer workertest.CleanKill(c, task)
  1671  
  1672  	machines, err := s.addMachines(5)
  1673  	c.Assert(err, jc.ErrorIsNil)
  1674  	s.checkStartInstances(c, machines)
  1675  
  1676  	availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task)
  1677  	assertAvailabilityZoneMachines(c, machines, []*state.Machine{machines[1]}, availabilityZoneMachines)
  1678  	c.Assert(checkAvailabilityZoneMachinesDistributionGroups(c, dgFinder.groups, availabilityZoneMachines), jc.ErrorIsNil)
  1679  }
  1680  
  1681  func (s *ProvisionerSuite) TestProvisioningMachinesSingleMachineDGFailure(c *gc.C) {
  1682  	// If a single machine fails getting the distribution group,
  1683  	// ensure the other machines are still provisioned.
  1684  	dgFinder := &mockDistributionGroupFinder{
  1685  		groups: map[names.MachineTag][]string{
  1686  			names.NewMachineTag("2"): {"3", "5"},
  1687  			names.NewMachineTag("3"): {"2", "5"},
  1688  			names.NewMachineTag("4"): {"1"},
  1689  			names.NewMachineTag("5"): {"2", "3"},
  1690  		},
  1691  	}
  1692  	task := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, dgFinder, mockToolsFinder{})
  1693  	defer workertest.CleanKill(c, task)
  1694  
  1695  	machines, err := s.addMachines(5)
  1696  	c.Assert(err, jc.ErrorIsNil)
  1697  
  1698  	s.checkStartInstances(c, machines[1:])
  1699  	_, err = machines[0].InstanceId()
  1700  	c.Assert(err, jc.Satisfies, errors.IsNotProvisioned)
  1701  
  1702  	availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task)
  1703  	assertAvailabilityZoneMachines(c, machines[1:], nil, availabilityZoneMachines)
  1704  	c.Assert(checkAvailabilityZoneMachinesDistributionGroups(c, dgFinder.groups, availabilityZoneMachines), jc.ErrorIsNil)
  1705  }
  1706  
  1707  func (s *ProvisionerSuite) TestAvailabilityZoneMachinesStopMachines(c *gc.C) {
  1708  	// Per provider dummy, there will be 3 available availability zones.
  1709  	task := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{})
  1710  	defer workertest.CleanKill(c, task)
  1711  
  1712  	machines, err := s.addMachines(4)
  1713  	c.Assert(err, jc.ErrorIsNil)
  1714  	s.checkStartInstances(c, machines)
  1715  
  1716  	availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task)
  1717  	assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachines)
  1718  	assertAvailabilityZoneMachinesDistribution(c, availabilityZoneMachines)
  1719  
  1720  	c.Assert(machines[0].EnsureDead(), gc.IsNil)
  1721  	s.waitForRemovalMark(c, machines[0])
  1722  
  1723  	assertAvailabilityZoneMachines(c, machines[1:], nil, provisioner.GetCopyAvailabilityZoneMachines(task))
  1724  }
  1725  
  1726  func (s *ProvisionerSuite) TestProvisioningMachinesFailMachine(c *gc.C) {
  1727  	e := &mockBroker{
  1728  		Environ:    s.Environ,
  1729  		retryCount: make(map[string]int),
  1730  		startInstanceFailureInfo: map[string]mockBrokerFailures{
  1731  			"2": {whenSucceed: 2, err: errors.New("fail provisioning for TestAvailabilityZoneMachinesFailMachine")},
  1732  		},
  1733  	}
  1734  	task := s.newProvisionerTask(c, config.HarvestDestroyed,
  1735  		e, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{})
  1736  	defer workertest.CleanKill(c, task)
  1737  
  1738  	machines, err := s.addMachines(4)
  1739  	c.Assert(err, jc.ErrorIsNil)
  1740  	mFail := machines[1]
  1741  	machines = append(machines[:1], machines[2:]...)
  1742  	s.checkStartInstances(c, machines)
  1743  	_, err = mFail.InstanceId()
  1744  	c.Assert(err, jc.Satisfies, errors.IsNotProvisioned)
  1745  
  1746  	availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task)
  1747  	assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachines)
  1748  	assertAvailabilityZoneMachinesDistribution(c, availabilityZoneMachines)
  1749  }
  1750  
  1751  func (s *ProvisionerSuite) TestAvailabilityZoneMachinesRestartTask(c *gc.C) {
  1752  	// Per provider dummy, there will be 3 available availability zones.
  1753  	task := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{})
  1754  	defer workertest.CleanKill(c, task)
  1755  
  1756  	machines, err := s.addMachines(4)
  1757  	c.Assert(err, jc.ErrorIsNil)
  1758  	s.checkStartInstances(c, machines)
  1759  
  1760  	availabilityZoneMachinesBefore := provisioner.GetCopyAvailabilityZoneMachines(task)
  1761  	assertAvailabilityZoneMachines(c, machines, nil, availabilityZoneMachinesBefore)
  1762  	assertAvailabilityZoneMachinesDistribution(c, availabilityZoneMachinesBefore)
  1763  
  1764  	workertest.CleanKill(c, task)
  1765  	newTask := s.newProvisionerTask(c, config.HarvestDestroyed, s.Environ, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{})
  1766  	defer workertest.CleanKill(c, newTask)
  1767  
  1768  	// Verify provisionerTask.availabilityZoneMachines is the same before and
  1769  	// after the provisionerTask is restarted.
  1770  	availabilityZoneMachinesAfter := provisioner.GetCopyAvailabilityZoneMachines(task)
  1771  	c.Assert(availabilityZoneMachinesBefore, jc.DeepEquals, availabilityZoneMachinesAfter)
  1772  }
  1773  
  1774  func (s *ProvisionerSuite) TestProvisioningMachinesClearAZFailures(c *gc.C) {
  1775  	s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond)
  1776  	e := &mockBroker{
  1777  		Environ:    s.Environ,
  1778  		retryCount: make(map[string]int),
  1779  		startInstanceFailureInfo: map[string]mockBrokerFailures{
  1780  			"1": {whenSucceed: 3, err: errors.New("zing")},
  1781  		},
  1782  	}
  1783  	retryStrategy := provisioner.NewRetryStrategy(5*time.Millisecond, 4)
  1784  	task := s.newProvisionerTaskWithRetryStrategy(c, config.HarvestDestroyed,
  1785  		e, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{}, retryStrategy)
  1786  	defer workertest.CleanKill(c, task)
  1787  
  1788  	machine, err := s.addMachine()
  1789  	c.Assert(err, jc.ErrorIsNil)
  1790  	s.checkStartInstance(c, machine)
  1791  	count := e.getRetryCount(machine.Id())
  1792  	c.Assert(count, gc.Equals, 3)
  1793  	machineAZ, err := machine.AvailabilityZone()
  1794  	c.Assert(err, jc.ErrorIsNil)
  1795  	c.Assert(machineAZ, gc.Equals, "zone1")
  1796  }
  1797  
  1798  func (s *ProvisionerSuite) TestProvisioningMachinesDerivedAZ(c *gc.C) {
  1799  	s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond)
  1800  	e := &mockBroker{
  1801  		Environ:    s.Environ,
  1802  		retryCount: make(map[string]int),
  1803  		startInstanceFailureInfo: map[string]mockBrokerFailures{
  1804  			"2": {whenSucceed: 3, err: errors.New("zing")},
  1805  			"3": {whenSucceed: 1, err: errors.New("zing")},
  1806  			"5": {whenSucceed: 1, err: providercommon.ZoneIndependentError(errors.New("arf"))},
  1807  		},
  1808  		derivedAZ: map[string][]string{
  1809  			"1": {"fail-zone"},
  1810  			"2": {"zone4"},
  1811  			"3": {"zone1", "zone4"},
  1812  			"4": {"zone1"},
  1813  			"5": {"zone3"},
  1814  		},
  1815  	}
  1816  	retryStrategy := provisioner.NewRetryStrategy(5*time.Millisecond, 2)
  1817  	task := s.newProvisionerTaskWithRetryStrategy(c, config.HarvestDestroyed,
  1818  		e, s.provisioner, &mockDistributionGroupFinder{}, mockToolsFinder{}, retryStrategy)
  1819  	defer workertest.CleanKill(c, task)
  1820  
  1821  	machines, err := s.addMachines(5)
  1822  	c.Assert(err, jc.ErrorIsNil)
  1823  	mFail := machines[:2]
  1824  	mSucceed := machines[2:]
  1825  
  1826  	s.checkStartInstances(c, mSucceed)
  1827  	c.Assert(e.getRetryCount(mSucceed[0].Id()), gc.Equals, 1)
  1828  	c.Assert(e.getRetryCount(mSucceed[2].Id()), gc.Equals, 1)
  1829  
  1830  	// This synchronisation addresses a potential race condition.
  1831  	// It can happen that upon successful return from checkStartInstances
  1832  	// The machine(s) arranged for provisioning failure have not yet been
  1833  	// retried the specified number of times; so we wait.
  1834  	id := mFail[1].Id()
  1835  	timeout := time.After(coretesting.LongWait)
  1836  	for e.getRetryCount(id) < 3 {
  1837  		select {
  1838  		case <-timeout:
  1839  			c.Fatalf("Failed provision of %q did not retry 3 times", id)
  1840  		default:
  1841  		}
  1842  	}
  1843  
  1844  	_, err = mFail[0].InstanceId()
  1845  	c.Assert(err, jc.Satisfies, errors.IsNotProvisioned)
  1846  	_, err = mFail[1].InstanceId()
  1847  	c.Assert(err, jc.Satisfies, errors.IsNotProvisioned)
  1848  
  1849  	availabilityZoneMachines := provisioner.GetCopyAvailabilityZoneMachines(task)
  1850  	assertAvailabilityZoneMachines(c, mSucceed, nil, availabilityZoneMachines)
  1851  
  1852  	for i, zone := range []string{"zone1", "zone3"} {
  1853  		machineAZ, err := mSucceed[i+1].AvailabilityZone()
  1854  		c.Assert(err, jc.ErrorIsNil)
  1855  		c.Assert(machineAZ, gc.Equals, zone)
  1856  	}
  1857  }
  1858  
  1859  func (s *ProvisionerSuite) TestProvisioningMachinesNoZonedEnviron(c *gc.C) {
  1860  	// Make sure the provisioner still works for providers which do not
  1861  	// implement the ZonedEnviron interface.
  1862  	noZonedEnvironBroker := &mockNoZonedEnvironBroker{Environ: s.Environ}
  1863  	task := s.newProvisionerTask(c,
  1864  		config.HarvestDestroyed,
  1865  		noZonedEnvironBroker,
  1866  		s.provisioner,
  1867  		&mockDistributionGroupFinder{},
  1868  		mockToolsFinder{})
  1869  	defer workertest.CleanKill(c, task)
  1870  
  1871  	machines, err := s.addMachines(4)
  1872  	c.Assert(err, jc.ErrorIsNil)
  1873  	s.checkStartInstances(c, machines)
  1874  
  1875  	expected := provisioner.GetCopyAvailabilityZoneMachines(task)
  1876  	c.Assert(expected, gc.HasLen, 0)
  1877  }
  1878  
  1879  type mockNoZonedEnvironBroker struct {
  1880  	environs.Environ
  1881  }
  1882  
  1883  func (b *mockNoZonedEnvironBroker) StartInstance(ctx context.ProviderCallContext, args environs.StartInstanceParams) (*environs.StartInstanceResult, error) {
  1884  	return b.Environ.StartInstance(ctx, args)
  1885  }
  1886  
  1887  type mockBroker struct {
  1888  	environs.Environ
  1889  
  1890  	mu                       sync.Mutex
  1891  	retryCount               map[string]int
  1892  	startInstanceFailureInfo map[string]mockBrokerFailures
  1893  	derivedAZ                map[string][]string
  1894  }
  1895  
  1896  type mockBrokerFailures struct {
  1897  	err         error
  1898  	whenSucceed int
  1899  }
  1900  
  1901  func (b *mockBroker) StartInstance(ctx context.ProviderCallContext, args environs.StartInstanceParams) (*environs.StartInstanceResult, error) {
  1902  	// All machines are provisioned successfully the first time unless
  1903  	// mock.startInstanceFailureInfo is configured.
  1904  	//
  1905  	id := args.InstanceConfig.MachineId
  1906  	b.mu.Lock()
  1907  	defer b.mu.Unlock()
  1908  	retries := b.retryCount[id]
  1909  	whenSucceed := 0
  1910  	var returnError error
  1911  	if failureInfo, ok := b.startInstanceFailureInfo[id]; ok {
  1912  		whenSucceed = failureInfo.whenSucceed
  1913  		returnError = failureInfo.err
  1914  	}
  1915  	if retries == whenSucceed {
  1916  		return b.Environ.StartInstance(ctx, args)
  1917  	} else {
  1918  		b.retryCount[id] = retries + 1
  1919  	}
  1920  	return nil, returnError
  1921  }
  1922  
  1923  func (b *mockBroker) getRetryCount(id string) int {
  1924  	b.mu.Lock()
  1925  	retries := b.retryCount[id]
  1926  	b.mu.Unlock()
  1927  	return retries
  1928  }
  1929  
  1930  // ZonedEnviron necessary for provisionerTask.populateAvailabilityZoneMachines where
  1931  // mockBroker used.
  1932  
  1933  func (b *mockBroker) AvailabilityZones(ctx context.ProviderCallContext) ([]providercommon.AvailabilityZone, error) {
  1934  	return b.Environ.(providercommon.ZonedEnviron).AvailabilityZones(ctx)
  1935  }
  1936  
  1937  func (b *mockBroker) InstanceAvailabilityZoneNames(ctx context.ProviderCallContext, ids []instance.Id) ([]string, error) {
  1938  	return b.Environ.(providercommon.ZonedEnviron).InstanceAvailabilityZoneNames(ctx, ids)
  1939  }
  1940  
  1941  func (b *mockBroker) DeriveAvailabilityZones(ctx context.ProviderCallContext, args environs.StartInstanceParams) ([]string, error) {
  1942  	id := args.InstanceConfig.MachineId
  1943  	b.mu.Lock()
  1944  	defer b.mu.Unlock()
  1945  	if derivedAZ, ok := b.derivedAZ[id]; ok {
  1946  		return derivedAZ, nil
  1947  	}
  1948  	return b.Environ.(providercommon.ZonedEnviron).DeriveAvailabilityZones(ctx, args)
  1949  }
  1950  
  1951  type mockToolsFinder struct {
  1952  }
  1953  
  1954  func (f mockToolsFinder) FindTools(number version.Number, series string, a string) (coretools.List, error) {
  1955  	v, err := version.ParseBinary(fmt.Sprintf("%s-%s-%s", number, series, arch.HostArch()))
  1956  	if err != nil {
  1957  		return nil, err
  1958  	}
  1959  	if a != "" {
  1960  		v.Arch = a
  1961  	}
  1962  	return coretools.List{&coretools.Tools{Version: v}}, nil
  1963  }
  1964  
  1965  type mockAgent struct {
  1966  	agent.Agent
  1967  	config agent.Config
  1968  }
  1969  
  1970  func (mock mockAgent) CurrentConfig() agent.Config {
  1971  	return mock.config
  1972  }