github.com/mattyw/juju@v0.0.0-20140610034352-732aecd63861/worker/provisioner/provisioner_test.go (about)

     1  // Copyright 2012, 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package provisioner_test
     5  
     6  import (
     7  	"fmt"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/juju/errors"
    12  	"github.com/juju/names"
    13  	jc "github.com/juju/testing/checkers"
    14  	"github.com/juju/utils"
    15  	"github.com/juju/utils/set"
    16  	gc "launchpad.net/gocheck"
    17  
    18  	"github.com/juju/juju/constraints"
    19  	"github.com/juju/juju/environs"
    20  	"github.com/juju/juju/environs/config"
    21  	"github.com/juju/juju/environs/network"
    22  	"github.com/juju/juju/environs/simplestreams"
    23  	"github.com/juju/juju/environs/tools"
    24  	"github.com/juju/juju/instance"
    25  	"github.com/juju/juju/juju/testing"
    26  	"github.com/juju/juju/provider/dummy"
    27  	"github.com/juju/juju/state"
    28  	"github.com/juju/juju/state/api"
    29  	"github.com/juju/juju/state/api/params"
    30  	apiprovisioner "github.com/juju/juju/state/api/provisioner"
    31  	apiserverprovisioner "github.com/juju/juju/state/apiserver/provisioner"
    32  	coretesting "github.com/juju/juju/testing"
    33  	"github.com/juju/juju/worker/provisioner"
    34  )
    35  
    36  type CommonProvisionerSuite struct {
    37  	testing.JujuConnSuite
    38  	op  <-chan dummy.Operation
    39  	cfg *config.Config
    40  	// defaultConstraints are used when adding a machine and then later in test assertions.
    41  	defaultConstraints constraints.Value
    42  
    43  	st          *api.State
    44  	provisioner *apiprovisioner.State
    45  }
    46  
    47  type ProvisionerSuite struct {
    48  	CommonProvisionerSuite
    49  }
    50  
    51  var _ = gc.Suite(&ProvisionerSuite{})
    52  
    53  var veryShortAttempt = utils.AttemptStrategy{
    54  	Total: 1 * time.Second,
    55  	Delay: 80 * time.Millisecond,
    56  }
    57  
    58  func (s *CommonProvisionerSuite) SetUpSuite(c *gc.C) {
    59  	s.JujuConnSuite.SetUpSuite(c)
    60  	s.defaultConstraints = constraints.MustParse("arch=amd64 mem=4G cpu-cores=1 root-disk=8G")
    61  }
    62  
    63  func (s *CommonProvisionerSuite) SetUpTest(c *gc.C) {
    64  	// Disable the default state policy, because the
    65  	// provisioner needs to be able to test pathological
    66  	// scenarios where a machine exists in state with
    67  	// invalid environment config.
    68  	dummy.SetStatePolicy(nil)
    69  
    70  	s.JujuConnSuite.SetUpTest(c)
    71  	// Create the operations channel with more than enough space
    72  	// for those tests that don't listen on it.
    73  	op := make(chan dummy.Operation, 500)
    74  	dummy.Listen(op)
    75  	s.op = op
    76  
    77  	cfg, err := s.State.EnvironConfig()
    78  	c.Assert(err, gc.IsNil)
    79  	s.cfg = cfg
    80  }
    81  
    82  func (s *CommonProvisionerSuite) APILogin(c *gc.C, machine *state.Machine) {
    83  	if s.st != nil {
    84  		c.Assert(s.st.Close(), gc.IsNil)
    85  	}
    86  	password, err := utils.RandomPassword()
    87  	c.Assert(err, gc.IsNil)
    88  	err = machine.SetPassword(password)
    89  	c.Assert(err, gc.IsNil)
    90  	err = machine.SetProvisioned("i-fake", "fake_nonce", nil)
    91  	c.Assert(err, gc.IsNil)
    92  	s.st = s.OpenAPIAsMachine(c, machine.Tag(), password, "fake_nonce")
    93  	c.Assert(s.st, gc.NotNil)
    94  	c.Logf("API: login as %q successful", machine.Tag())
    95  	s.provisioner = s.st.Provisioner()
    96  	c.Assert(s.provisioner, gc.NotNil)
    97  }
    98  
    99  // breakDummyProvider changes the environment config in state in a way
   100  // that causes the given environMethod of the dummy provider to return
   101  // an error, which is also returned as a message to be checked.
   102  func breakDummyProvider(c *gc.C, st *state.State, environMethod string) string {
   103  	attrs := map[string]interface{}{"broken": environMethod}
   104  	err := st.UpdateEnvironConfig(attrs, nil, nil)
   105  	c.Assert(err, gc.IsNil)
   106  	return fmt.Sprintf("dummy.%s is broken", environMethod)
   107  }
   108  
   109  // setupEnvironmentManager adds an environment manager machine and login to the API.
   110  func (s *CommonProvisionerSuite) setupEnvironmentManager(c *gc.C) {
   111  	machine, err := s.State.AddMachine("quantal", state.JobManageEnviron)
   112  	c.Assert(err, gc.IsNil)
   113  	c.Assert(machine.Id(), gc.Equals, "0")
   114  	err = machine.SetAddresses(instance.NewAddress("0.1.2.3", instance.NetworkUnknown))
   115  	c.Assert(err, gc.IsNil)
   116  	s.APILogin(c, machine)
   117  }
   118  
   119  // invalidateEnvironment alters the environment configuration
   120  // so the Settings returned from the watcher will not pass
   121  // validation.
   122  func (s *CommonProvisionerSuite) invalidateEnvironment(c *gc.C) {
   123  	st, err := state.Open(s.StateInfo(c), state.DefaultDialOpts(), state.Policy(nil))
   124  	c.Assert(err, gc.IsNil)
   125  	defer st.Close()
   126  	attrs := map[string]interface{}{"type": "unknown"}
   127  	err = st.UpdateEnvironConfig(attrs, nil, nil)
   128  	c.Assert(err, gc.IsNil)
   129  }
   130  
   131  // fixEnvironment undoes the work of invalidateEnvironment.
   132  func (s *CommonProvisionerSuite) fixEnvironment(c *gc.C) error {
   133  	st, err := state.Open(s.StateInfo(c), state.DefaultDialOpts(), state.Policy(nil))
   134  	c.Assert(err, gc.IsNil)
   135  	defer st.Close()
   136  	attrs := map[string]interface{}{"type": s.cfg.AllAttrs()["type"]}
   137  	return st.UpdateEnvironConfig(attrs, nil, nil)
   138  }
   139  
   140  // stopper is stoppable.
   141  type stopper interface {
   142  	Stop() error
   143  }
   144  
   145  // stop stops a stopper.
   146  func stop(c *gc.C, s stopper) {
   147  	c.Assert(s.Stop(), gc.IsNil)
   148  }
   149  
   150  func (s *CommonProvisionerSuite) startUnknownInstance(c *gc.C, id string) instance.Instance {
   151  	instance, _ := testing.AssertStartInstance(c, s.Conn.Environ, id)
   152  	select {
   153  	case o := <-s.op:
   154  		switch o := o.(type) {
   155  		case dummy.OpStartInstance:
   156  		default:
   157  			c.Fatalf("unexpected operation %#v", o)
   158  		}
   159  	case <-time.After(coretesting.LongWait):
   160  		c.Fatalf("timed out waiting for startinstance operation")
   161  	}
   162  	return instance
   163  }
   164  
   165  func (s *CommonProvisionerSuite) checkStartInstance(c *gc.C, m *state.Machine) instance.Instance {
   166  	return s.checkStartInstanceCustom(c, m, "pork", s.defaultConstraints, nil, nil, true)
   167  }
   168  
   169  func (s *CommonProvisionerSuite) checkStartInstanceCustom(c *gc.C, m *state.Machine, secret string, cons constraints.Value, networks []string, networkInfo []network.Info, waitInstanceId bool) (inst instance.Instance) {
   170  	s.BackingState.StartSync()
   171  	for {
   172  		select {
   173  		case o := <-s.op:
   174  			switch o := o.(type) {
   175  			case dummy.OpStartInstance:
   176  				inst = o.Instance
   177  				if waitInstanceId {
   178  					s.waitInstanceId(c, m, inst.Id())
   179  				}
   180  
   181  				// Check the instance was started with the expected params.
   182  				c.Assert(o.MachineId, gc.Equals, m.Id())
   183  				nonceParts := strings.SplitN(o.MachineNonce, ":", 2)
   184  				c.Assert(nonceParts, gc.HasLen, 2)
   185  				c.Assert(nonceParts[0], gc.Equals, names.MachineTag("0"))
   186  				c.Assert(nonceParts[1], jc.Satisfies, utils.IsValidUUIDString)
   187  				c.Assert(o.Secret, gc.Equals, secret)
   188  				c.Assert(o.Networks, jc.DeepEquals, networks)
   189  				c.Assert(o.NetworkInfo, jc.DeepEquals, networkInfo)
   190  
   191  				// All provisioned machines in this test suite have
   192  				// their hardware characteristics attributes set to
   193  				// the same values as the constraints due to the dummy
   194  				// environment being used.
   195  				if !constraints.IsEmpty(&cons) {
   196  					c.Assert(o.Constraints, gc.DeepEquals, cons)
   197  					hc, err := m.HardwareCharacteristics()
   198  					c.Assert(err, gc.IsNil)
   199  					c.Assert(*hc, gc.DeepEquals, instance.HardwareCharacteristics{
   200  						Arch:     cons.Arch,
   201  						Mem:      cons.Mem,
   202  						RootDisk: cons.RootDisk,
   203  						CpuCores: cons.CpuCores,
   204  						CpuPower: cons.CpuPower,
   205  						Tags:     cons.Tags,
   206  					})
   207  				}
   208  				return
   209  			default:
   210  				c.Logf("ignoring unexpected operation %#v", o)
   211  			}
   212  		case <-time.After(2 * time.Second):
   213  			c.Fatalf("provisioner did not start an instance")
   214  			return
   215  		}
   216  	}
   217  	return
   218  }
   219  
   220  // checkNoOperations checks that the environ was not operated upon.
   221  func (s *CommonProvisionerSuite) checkNoOperations(c *gc.C) {
   222  	s.BackingState.StartSync()
   223  	select {
   224  	case o := <-s.op:
   225  		c.Fatalf("unexpected operation %#v", o)
   226  	case <-time.After(coretesting.ShortWait):
   227  		return
   228  	}
   229  }
   230  
   231  // checkStopInstances checks that an instance has been stopped.
   232  func (s *CommonProvisionerSuite) checkStopInstances(c *gc.C, instances ...instance.Instance) {
   233  	s.checkStopSomeInstances(c, instances, nil)
   234  }
   235  
   236  // checkStopSomeInstances checks that instancesToStop are stopped while instancesToKeep are not.
   237  func (s *CommonProvisionerSuite) checkStopSomeInstances(c *gc.C,
   238  	instancesToStop []instance.Instance, instancesToKeep []instance.Instance) {
   239  
   240  	s.BackingState.StartSync()
   241  	instanceIdsToStop := set.NewStrings()
   242  	for _, instance := range instancesToStop {
   243  		instanceIdsToStop.Add(string(instance.Id()))
   244  	}
   245  	instanceIdsToKeep := set.NewStrings()
   246  	for _, instance := range instancesToKeep {
   247  		instanceIdsToKeep.Add(string(instance.Id()))
   248  	}
   249  	// Continue checking for stop instance calls until all the instances we
   250  	// are waiting on to finish, actually finish, or we time out.
   251  	for !instanceIdsToStop.IsEmpty() {
   252  		select {
   253  		case o := <-s.op:
   254  			switch o := o.(type) {
   255  			case dummy.OpStopInstances:
   256  				for _, id := range o.Ids {
   257  					instId := string(id)
   258  					instanceIdsToStop.Remove(instId)
   259  					if instanceIdsToKeep.Contains(instId) {
   260  						c.Errorf("provisioner unexpectedly stopped instance %s", instId)
   261  					}
   262  				}
   263  			default:
   264  				c.Fatalf("unexpected operation %#v", o)
   265  				return
   266  			}
   267  		case <-time.After(2 * time.Second):
   268  			c.Fatalf("provisioner did not stop an instance")
   269  			return
   270  		}
   271  	}
   272  }
   273  
   274  func (s *CommonProvisionerSuite) waitMachine(c *gc.C, m *state.Machine, check func() bool) {
   275  	// TODO(jam): We need to grow a new method on NotifyWatcherC
   276  	// that calls StartSync while waiting for changes, then
   277  	// waitMachine and waitHardwareCharacteristics can use that
   278  	// instead
   279  	w := m.Watch()
   280  	defer stop(c, w)
   281  	timeout := time.After(coretesting.LongWait)
   282  	resync := time.After(0)
   283  	for {
   284  		select {
   285  		case <-w.Changes():
   286  			if check() {
   287  				return
   288  			}
   289  		case <-resync:
   290  			resync = time.After(coretesting.ShortWait)
   291  			s.BackingState.StartSync()
   292  		case <-timeout:
   293  			c.Fatalf("machine %v wait timed out", m)
   294  		}
   295  	}
   296  }
   297  
   298  func (s *CommonProvisionerSuite) waitHardwareCharacteristics(c *gc.C, m *state.Machine, check func() bool) {
   299  	w := m.WatchHardwareCharacteristics()
   300  	defer stop(c, w)
   301  	timeout := time.After(coretesting.LongWait)
   302  	resync := time.After(0)
   303  	for {
   304  		select {
   305  		case <-w.Changes():
   306  			if check() {
   307  				return
   308  			}
   309  		case <-resync:
   310  			resync = time.After(coretesting.ShortWait)
   311  			s.BackingState.StartSync()
   312  		case <-timeout:
   313  			c.Fatalf("hardware characteristics for machine %v wait timed out", m)
   314  		}
   315  	}
   316  }
   317  
   318  // waitRemoved waits for the supplied machine to be removed from state.
   319  func (s *CommonProvisionerSuite) waitRemoved(c *gc.C, m *state.Machine) {
   320  	s.waitMachine(c, m, func() bool {
   321  		err := m.Refresh()
   322  		if errors.IsNotFound(err) {
   323  			return true
   324  		}
   325  		c.Assert(err, gc.IsNil)
   326  		c.Logf("machine %v is still %s", m, m.Life())
   327  		return false
   328  	})
   329  }
   330  
   331  // waitInstanceId waits until the supplied machine has an instance id, then
   332  // asserts it is as expected.
   333  func (s *CommonProvisionerSuite) waitInstanceId(c *gc.C, m *state.Machine, expect instance.Id) {
   334  	s.waitHardwareCharacteristics(c, m, func() bool {
   335  		if actual, err := m.InstanceId(); err == nil {
   336  			c.Assert(actual, gc.Equals, expect)
   337  			return true
   338  		} else if !state.IsNotProvisionedError(err) {
   339  			// We don't expect any errors.
   340  			panic(err)
   341  		}
   342  		c.Logf("machine %v is still unprovisioned", m)
   343  		return false
   344  	})
   345  }
   346  
   347  func (s *CommonProvisionerSuite) newEnvironProvisioner(c *gc.C) provisioner.Provisioner {
   348  	machineTag := "machine-0"
   349  	agentConfig := s.AgentConfigForTag(c, machineTag)
   350  	return provisioner.NewEnvironProvisioner(s.provisioner, agentConfig)
   351  }
   352  
   353  func (s *CommonProvisionerSuite) addMachine() (*state.Machine, error) {
   354  	return s.addMachineWithRequestedNetworks(nil, s.defaultConstraints)
   355  }
   356  
   357  func (s *CommonProvisionerSuite) addMachineWithRequestedNetworks(networks []string, cons constraints.Value) (*state.Machine, error) {
   358  	return s.BackingState.AddOneMachine(state.MachineTemplate{
   359  		Series:            coretesting.FakeDefaultSeries,
   360  		Jobs:              []state.MachineJob{state.JobHostUnits},
   361  		Constraints:       cons,
   362  		RequestedNetworks: networks,
   363  	})
   364  }
   365  
   366  func (s *ProvisionerSuite) SetUpTest(c *gc.C) {
   367  	s.CommonProvisionerSuite.SetUpTest(c)
   368  	s.CommonProvisionerSuite.setupEnvironmentManager(c)
   369  }
   370  
   371  func (s *ProvisionerSuite) TestProvisionerStartStop(c *gc.C) {
   372  	p := s.newEnvironProvisioner(c)
   373  	c.Assert(p.Stop(), gc.IsNil)
   374  }
   375  
   376  func (s *ProvisionerSuite) TestSimple(c *gc.C) {
   377  	p := s.newEnvironProvisioner(c)
   378  	defer stop(c, p)
   379  
   380  	// Check that an instance is provisioned when the machine is created...
   381  	m, err := s.addMachine()
   382  	c.Assert(err, gc.IsNil)
   383  	instance := s.checkStartInstance(c, m)
   384  
   385  	// ...and removed, along with the machine, when the machine is Dead.
   386  	c.Assert(m.EnsureDead(), gc.IsNil)
   387  	s.checkStopInstances(c, instance)
   388  	s.waitRemoved(c, m)
   389  }
   390  
   391  func (s *ProvisionerSuite) TestConstraints(c *gc.C) {
   392  	// Create a machine with non-standard constraints.
   393  	m, err := s.addMachine()
   394  	c.Assert(err, gc.IsNil)
   395  	cons := constraints.MustParse("mem=8G arch=amd64 cpu-cores=2 root-disk=10G")
   396  	err = m.SetConstraints(cons)
   397  	c.Assert(err, gc.IsNil)
   398  
   399  	// Start a provisioner and check those constraints are used.
   400  	p := s.newEnvironProvisioner(c)
   401  	defer stop(c, p)
   402  	s.checkStartInstanceCustom(c, m, "pork", cons, nil, nil, true)
   403  }
   404  
   405  func (s *ProvisionerSuite) TestProvisionerSetsErrorStatusWhenNoToolsAreAvailable(c *gc.C) {
   406  	p := s.newEnvironProvisioner(c)
   407  	defer stop(c, p)
   408  
   409  	// Check that an instance is not provisioned when the machine is created...
   410  	m, err := s.BackingState.AddOneMachine(state.MachineTemplate{
   411  		// We need a valid series that has no tools uploaded
   412  		Series:      "raring",
   413  		Jobs:        []state.MachineJob{state.JobHostUnits},
   414  		Constraints: s.defaultConstraints,
   415  	})
   416  	c.Assert(err, gc.IsNil)
   417  	s.checkNoOperations(c)
   418  
   419  	t0 := time.Now()
   420  	for time.Since(t0) < coretesting.LongWait {
   421  		// And check the machine status is set to error.
   422  		status, info, _, err := m.Status()
   423  		c.Assert(err, gc.IsNil)
   424  		if status == params.StatusPending {
   425  			time.Sleep(coretesting.ShortWait)
   426  			continue
   427  		}
   428  		c.Assert(status, gc.Equals, params.StatusError)
   429  		c.Assert(info, gc.Equals, "no matching tools available")
   430  		break
   431  	}
   432  
   433  	// Restart the PA to make sure the machine is skipped again.
   434  	stop(c, p)
   435  	p = s.newEnvironProvisioner(c)
   436  	defer stop(c, p)
   437  	s.checkNoOperations(c)
   438  }
   439  
   440  func (s *ProvisionerSuite) TestProvisionerSetsErrorStatusWhenStartInstanceFailed(c *gc.C) {
   441  	brokenMsg := breakDummyProvider(c, s.State, "StartInstance")
   442  	p := s.newEnvironProvisioner(c)
   443  	defer stop(c, p)
   444  
   445  	// Check that an instance is not provisioned when the machine is created...
   446  	m, err := s.addMachine()
   447  	c.Assert(err, gc.IsNil)
   448  	s.checkNoOperations(c)
   449  
   450  	t0 := time.Now()
   451  	for time.Since(t0) < coretesting.LongWait {
   452  		// And check the machine status is set to error.
   453  		status, info, _, err := m.Status()
   454  		c.Assert(err, gc.IsNil)
   455  		if status == params.StatusPending {
   456  			time.Sleep(coretesting.ShortWait)
   457  			continue
   458  		}
   459  		c.Assert(status, gc.Equals, params.StatusError)
   460  		c.Assert(info, gc.Equals, brokenMsg)
   461  		break
   462  	}
   463  
   464  	// Unbreak the environ config.
   465  	err = s.fixEnvironment(c)
   466  	c.Assert(err, gc.IsNil)
   467  
   468  	// Restart the PA to make sure the machine is skipped again.
   469  	stop(c, p)
   470  	p = s.newEnvironProvisioner(c)
   471  	defer stop(c, p)
   472  	s.checkNoOperations(c)
   473  }
   474  
   475  func (s *ProvisionerSuite) TestProvisioningDoesNotOccurForContainers(c *gc.C) {
   476  	p := s.newEnvironProvisioner(c)
   477  	defer stop(c, p)
   478  
   479  	// create a machine to host the container.
   480  	m, err := s.addMachine()
   481  	c.Assert(err, gc.IsNil)
   482  	inst := s.checkStartInstance(c, m)
   483  
   484  	// make a container on the machine we just created
   485  	template := state.MachineTemplate{
   486  		Series: coretesting.FakeDefaultSeries,
   487  		Jobs:   []state.MachineJob{state.JobHostUnits},
   488  	}
   489  	container, err := s.State.AddMachineInsideMachine(template, m.Id(), instance.LXC)
   490  	c.Assert(err, gc.IsNil)
   491  
   492  	// the PA should not attempt to create it
   493  	s.checkNoOperations(c)
   494  
   495  	// cleanup
   496  	c.Assert(container.EnsureDead(), gc.IsNil)
   497  	c.Assert(container.Remove(), gc.IsNil)
   498  	c.Assert(m.EnsureDead(), gc.IsNil)
   499  	s.checkStopInstances(c, inst)
   500  	s.waitRemoved(c, m)
   501  }
   502  
   503  func (s *ProvisionerSuite) TestProvisioningMachinesWithRequestedNetworks(c *gc.C) {
   504  	p := s.newEnvironProvisioner(c)
   505  	defer stop(c, p)
   506  
   507  	// Add and provision a machine with networks specified.
   508  	requestedNetworks := []string{"net1", "net2"}
   509  	cons := constraints.MustParse(s.defaultConstraints.String(), "networks=^net3,^net4")
   510  	expectNetworkInfo := []network.Info{{
   511  		MACAddress:    "aa:bb:cc:dd:ee:f0",
   512  		InterfaceName: "eth0",
   513  		ProviderId:    "net1",
   514  		NetworkName:   "net1",
   515  		VLANTag:       0,
   516  		CIDR:          "0.1.2.0/24",
   517  		IsVirtual:     false,
   518  	}, {
   519  		MACAddress:    "aa:bb:cc:dd:ee:f1",
   520  		InterfaceName: "eth1",
   521  		ProviderId:    "net2",
   522  		NetworkName:   "net2",
   523  		VLANTag:       1,
   524  		CIDR:          "0.2.2.0/24",
   525  		IsVirtual:     true,
   526  	}}
   527  	m, err := s.addMachineWithRequestedNetworks(requestedNetworks, cons)
   528  	c.Assert(err, gc.IsNil)
   529  	inst := s.checkStartInstanceCustom(
   530  		c, m, "pork", cons,
   531  		requestedNetworks,
   532  		expectNetworkInfo, true)
   533  
   534  	_, err = s.State.Network("net1")
   535  	c.Assert(err, gc.IsNil)
   536  	_, err = s.State.Network("net2")
   537  	c.Assert(err, gc.IsNil)
   538  	_, err = s.State.Network("net3")
   539  	c.Assert(err, jc.Satisfies, errors.IsNotFound)
   540  	_, err = s.State.Network("net4")
   541  	c.Assert(err, jc.Satisfies, errors.IsNotFound)
   542  	ifaces, err := m.NetworkInterfaces()
   543  	c.Assert(err, gc.IsNil)
   544  	c.Assert(ifaces, gc.HasLen, 2)
   545  
   546  	// Cleanup.
   547  	c.Assert(m.EnsureDead(), gc.IsNil)
   548  	s.checkStopInstances(c, inst)
   549  	s.waitRemoved(c, m)
   550  }
   551  
   552  func (s *ProvisionerSuite) TestSetInstanceInfoFailureSetsErrorStatusAndStopsInstanceButKeepsGoing(c *gc.C) {
   553  	p := s.newEnvironProvisioner(c)
   554  	defer stop(c, p)
   555  
   556  	// Add and provision a machine with networks specified.
   557  	networks := []string{"bad-net1"}
   558  	// "bad-" prefix for networks causes dummy provider to report
   559  	// invalid network.Info.
   560  	expectNetworkInfo := []network.Info{
   561  		{ProviderId: "bad-net1", NetworkName: "bad-net1", CIDR: "invalid"},
   562  	}
   563  	m, err := s.addMachineWithRequestedNetworks(networks, constraints.Value{})
   564  	c.Assert(err, gc.IsNil)
   565  	inst := s.checkStartInstanceCustom(
   566  		c, m, "pork", constraints.Value{},
   567  		networks, expectNetworkInfo, false)
   568  
   569  	// Ensure machine error status was set.
   570  	t0 := time.Now()
   571  	for time.Since(t0) < coretesting.LongWait {
   572  		// And check the machine status is set to error.
   573  		status, info, _, err := m.Status()
   574  		c.Assert(err, gc.IsNil)
   575  		if status == params.StatusPending {
   576  			time.Sleep(coretesting.ShortWait)
   577  			continue
   578  		}
   579  		c.Assert(status, gc.Equals, params.StatusError)
   580  		c.Assert(info, gc.Matches, `aborted instance "dummyenv-0": cannot add network "bad-net1": invalid CIDR address: invalid`)
   581  		break
   582  	}
   583  	s.checkStopInstances(c, inst)
   584  
   585  	// Make sure the task didn't stop with an error
   586  	died := make(chan error)
   587  	go func() {
   588  		died <- p.Wait()
   589  	}()
   590  	select {
   591  	case <-time.After(coretesting.LongWait):
   592  	case err = <-died:
   593  		c.Fatalf("provisioner task died unexpectedly with err: %v", err)
   594  	}
   595  
   596  	// Restart the PA to make sure the machine is not retried.
   597  	stop(c, p)
   598  	p = s.newEnvironProvisioner(c)
   599  	defer stop(c, p)
   600  
   601  	s.checkNoOperations(c)
   602  }
   603  
   604  func (s *ProvisionerSuite) TestProvisioningDoesNotOccurWithAnInvalidEnvironment(c *gc.C) {
   605  	s.invalidateEnvironment(c)
   606  
   607  	p := s.newEnvironProvisioner(c)
   608  	defer stop(c, p)
   609  
   610  	// try to create a machine
   611  	_, err := s.addMachine()
   612  	c.Assert(err, gc.IsNil)
   613  
   614  	// the PA should not create it
   615  	s.checkNoOperations(c)
   616  }
   617  
   618  func (s *ProvisionerSuite) TestProvisioningOccursWithFixedEnvironment(c *gc.C) {
   619  	s.invalidateEnvironment(c)
   620  
   621  	p := s.newEnvironProvisioner(c)
   622  	defer stop(c, p)
   623  
   624  	// try to create a machine
   625  	m, err := s.addMachine()
   626  	c.Assert(err, gc.IsNil)
   627  
   628  	// the PA should not create it
   629  	s.checkNoOperations(c)
   630  
   631  	err = s.fixEnvironment(c)
   632  	c.Assert(err, gc.IsNil)
   633  
   634  	s.checkStartInstance(c, m)
   635  }
   636  
   637  func (s *ProvisionerSuite) TestProvisioningDoesOccurAfterInvalidEnvironmentPublished(c *gc.C) {
   638  	p := s.newEnvironProvisioner(c)
   639  	defer stop(c, p)
   640  
   641  	// place a new machine into the state
   642  	m, err := s.addMachine()
   643  	c.Assert(err, gc.IsNil)
   644  
   645  	s.checkStartInstance(c, m)
   646  
   647  	s.invalidateEnvironment(c)
   648  
   649  	// create a second machine
   650  	m, err = s.addMachine()
   651  	c.Assert(err, gc.IsNil)
   652  
   653  	// the PA should create it using the old environment
   654  	s.checkStartInstance(c, m)
   655  }
   656  
   657  func (s *ProvisionerSuite) TestProvisioningDoesNotProvisionTheSameMachineAfterRestart(c *gc.C) {
   658  	p := s.newEnvironProvisioner(c)
   659  	defer stop(c, p)
   660  
   661  	// create a machine
   662  	m, err := s.addMachine()
   663  	c.Assert(err, gc.IsNil)
   664  	s.checkStartInstance(c, m)
   665  
   666  	// restart the PA
   667  	stop(c, p)
   668  	p = s.newEnvironProvisioner(c)
   669  	defer stop(c, p)
   670  
   671  	// check that there is only one machine provisioned.
   672  	machines, err := s.State.AllMachines()
   673  	c.Assert(err, gc.IsNil)
   674  	c.Check(len(machines), gc.Equals, 2)
   675  	c.Check(machines[0].Id(), gc.Equals, "0")
   676  	c.Check(machines[1].CheckProvisioned("fake_nonce"), jc.IsFalse)
   677  
   678  	// the PA should not create it a second time
   679  	s.checkNoOperations(c)
   680  }
   681  
   682  func (s *ProvisionerSuite) TestProvisioningStopsInstances(c *gc.C) {
   683  	p := s.newEnvironProvisioner(c)
   684  	defer stop(c, p)
   685  
   686  	// create a machine
   687  	m0, err := s.addMachine()
   688  	c.Assert(err, gc.IsNil)
   689  	i0 := s.checkStartInstance(c, m0)
   690  
   691  	// create a second machine
   692  	m1, err := s.addMachine()
   693  	c.Assert(err, gc.IsNil)
   694  	i1 := s.checkStartInstance(c, m1)
   695  	stop(c, p)
   696  
   697  	// mark the first machine as dead
   698  	c.Assert(m0.EnsureDead(), gc.IsNil)
   699  
   700  	// remove the second machine entirely
   701  	c.Assert(m1.EnsureDead(), gc.IsNil)
   702  	c.Assert(m1.Remove(), gc.IsNil)
   703  
   704  	// start a new provisioner to shut them both down
   705  	p = s.newEnvironProvisioner(c)
   706  	defer stop(c, p)
   707  	s.checkStopInstances(c, i0, i1)
   708  	s.waitRemoved(c, m0)
   709  }
   710  
   711  func (s *ProvisionerSuite) TestDyingMachines(c *gc.C) {
   712  	p := s.newEnvironProvisioner(c)
   713  	defer stop(c, p)
   714  
   715  	// provision a machine
   716  	m0, err := s.addMachine()
   717  	c.Assert(err, gc.IsNil)
   718  	s.checkStartInstance(c, m0)
   719  
   720  	// stop the provisioner and make the machine dying
   721  	stop(c, p)
   722  	err = m0.Destroy()
   723  	c.Assert(err, gc.IsNil)
   724  
   725  	// add a new, dying, unprovisioned machine
   726  	m1, err := s.addMachine()
   727  	c.Assert(err, gc.IsNil)
   728  	err = m1.Destroy()
   729  	c.Assert(err, gc.IsNil)
   730  
   731  	// start the provisioner and wait for it to reap the useless machine
   732  	p = s.newEnvironProvisioner(c)
   733  	defer stop(c, p)
   734  	s.checkNoOperations(c)
   735  	s.waitRemoved(c, m1)
   736  
   737  	// verify the other one's still fine
   738  	err = m0.Refresh()
   739  	c.Assert(err, gc.IsNil)
   740  	c.Assert(m0.Life(), gc.Equals, state.Dying)
   741  }
   742  
   743  func (s *ProvisionerSuite) TestProvisioningRecoversAfterInvalidEnvironmentPublished(c *gc.C) {
   744  	p := s.newEnvironProvisioner(c)
   745  	defer stop(c, p)
   746  
   747  	// place a new machine into the state
   748  	m, err := s.addMachine()
   749  	c.Assert(err, gc.IsNil)
   750  	s.checkStartInstance(c, m)
   751  
   752  	s.invalidateEnvironment(c)
   753  	s.BackingState.StartSync()
   754  
   755  	// create a second machine
   756  	m, err = s.addMachine()
   757  	c.Assert(err, gc.IsNil)
   758  
   759  	// the PA should create it using the old environment
   760  	s.checkStartInstance(c, m)
   761  
   762  	err = s.fixEnvironment(c)
   763  	c.Assert(err, gc.IsNil)
   764  
   765  	// insert our observer
   766  	cfgObserver := make(chan *config.Config, 1)
   767  	provisioner.SetObserver(p, cfgObserver)
   768  
   769  	err = s.State.UpdateEnvironConfig(map[string]interface{}{"secret": "beef"}, nil, nil)
   770  	c.Assert(err, gc.IsNil)
   771  
   772  	s.BackingState.StartSync()
   773  
   774  	// wait for the PA to load the new configuration
   775  	select {
   776  	case <-cfgObserver:
   777  	case <-time.After(coretesting.LongWait):
   778  		c.Fatalf("PA did not action config change")
   779  	}
   780  
   781  	// create a third machine
   782  	m, err = s.addMachine()
   783  	c.Assert(err, gc.IsNil)
   784  
   785  	// the PA should create it using the new environment
   786  	s.checkStartInstanceCustom(c, m, "beef", s.defaultConstraints, nil, nil, true)
   787  }
   788  
   789  func (s *ProvisionerSuite) TestProvisioningSafeMode(c *gc.C) {
   790  	p := s.newEnvironProvisioner(c)
   791  	defer stop(c, p)
   792  
   793  	// create a machine
   794  	m0, err := s.addMachine()
   795  	c.Assert(err, gc.IsNil)
   796  	i0 := s.checkStartInstance(c, m0)
   797  
   798  	// create a second machine
   799  	m1, err := s.addMachine()
   800  	c.Assert(err, gc.IsNil)
   801  	i1 := s.checkStartInstance(c, m1)
   802  	stop(c, p)
   803  
   804  	// mark the first machine as dead
   805  	c.Assert(m0.EnsureDead(), gc.IsNil)
   806  
   807  	// remove the second machine entirely from state
   808  	c.Assert(m1.EnsureDead(), gc.IsNil)
   809  	c.Assert(m1.Remove(), gc.IsNil)
   810  
   811  	// turn on safe mode
   812  	attrs := map[string]interface{}{"provisioner-safe-mode": true}
   813  	err = s.State.UpdateEnvironConfig(attrs, nil, nil)
   814  	c.Assert(err, gc.IsNil)
   815  
   816  	// start a new provisioner to shut down only the machine still in state.
   817  	p = s.newEnvironProvisioner(c)
   818  	defer stop(c, p)
   819  	s.checkStopSomeInstances(c, []instance.Instance{i0}, []instance.Instance{i1})
   820  	s.waitRemoved(c, m0)
   821  }
   822  
   823  func (s *ProvisionerSuite) TestProvisioningSafeModeChange(c *gc.C) {
   824  	p := s.newEnvironProvisioner(c)
   825  	defer stop(c, p)
   826  
   827  	// First check that safe mode is initially off.
   828  
   829  	// create a machine
   830  	m0, err := s.addMachine()
   831  	c.Assert(err, gc.IsNil)
   832  	i0 := s.checkStartInstance(c, m0)
   833  
   834  	// create a second machine
   835  	m1, err := s.addMachine()
   836  	c.Assert(err, gc.IsNil)
   837  	i1 := s.checkStartInstance(c, m1)
   838  
   839  	// mark the first machine as dead
   840  	c.Assert(m0.EnsureDead(), gc.IsNil)
   841  
   842  	// remove the second machine entirely from state
   843  	c.Assert(m1.EnsureDead(), gc.IsNil)
   844  	c.Assert(m1.Remove(), gc.IsNil)
   845  
   846  	s.checkStopInstances(c, i0, i1)
   847  	s.waitRemoved(c, m0)
   848  
   849  	// insert our observer
   850  	cfgObserver := make(chan *config.Config, 1)
   851  	provisioner.SetObserver(p, cfgObserver)
   852  
   853  	// turn on safe mode
   854  	attrs := map[string]interface{}{"provisioner-safe-mode": true}
   855  	err = s.State.UpdateEnvironConfig(attrs, nil, nil)
   856  	c.Assert(err, gc.IsNil)
   857  
   858  	s.BackingState.StartSync()
   859  
   860  	// wait for the PA to load the new configuration
   861  	select {
   862  	case <-cfgObserver:
   863  	case <-time.After(coretesting.LongWait):
   864  		c.Fatalf("PA did not action config change")
   865  	}
   866  
   867  	// Now check that the provisioner has noticed safe mode is on.
   868  
   869  	// create a machine
   870  	m3, err := s.addMachine()
   871  	c.Assert(err, gc.IsNil)
   872  	i3 := s.checkStartInstance(c, m3)
   873  
   874  	// create an instance out of band
   875  	i4 := s.startUnknownInstance(c, "999")
   876  
   877  	// mark the machine as dead
   878  	c.Assert(m3.EnsureDead(), gc.IsNil)
   879  
   880  	// check the machine's instance is stopped, and the other isn't
   881  	s.checkStopSomeInstances(c, []instance.Instance{i3}, []instance.Instance{i4})
   882  	s.waitRemoved(c, m3)
   883  }
   884  
   885  func (s *ProvisionerSuite) newProvisionerTask(c *gc.C, safeMode bool, broker environs.InstanceBroker) provisioner.ProvisionerTask {
   886  	machineWatcher, err := s.provisioner.WatchEnvironMachines()
   887  	c.Assert(err, gc.IsNil)
   888  	retryWatcher, err := s.provisioner.WatchMachineErrorRetry()
   889  	c.Assert(err, gc.IsNil)
   890  	auth, err := environs.NewAPIAuthenticator(s.provisioner)
   891  	c.Assert(err, gc.IsNil)
   892  	return provisioner.NewProvisionerTask(
   893  		"machine-0", safeMode, s.provisioner,
   894  		machineWatcher, retryWatcher, broker, auth)
   895  }
   896  
   897  func (s *ProvisionerSuite) TestTurningOffSafeModeReapsUnknownInstances(c *gc.C) {
   898  	task := s.newProvisionerTask(c, true, s.APIConn.Environ)
   899  	defer stop(c, task)
   900  
   901  	// Initially create a machine, and an unknown instance, with safe mode on.
   902  	m0, err := s.addMachine()
   903  	c.Assert(err, gc.IsNil)
   904  	i0 := s.checkStartInstance(c, m0)
   905  	i1 := s.startUnknownInstance(c, "999")
   906  
   907  	// mark the first machine as dead
   908  	c.Assert(m0.EnsureDead(), gc.IsNil)
   909  
   910  	// with safe mode on, only one of the machines is stopped.
   911  	s.checkStopSomeInstances(c, []instance.Instance{i0}, []instance.Instance{i1})
   912  	s.waitRemoved(c, m0)
   913  
   914  	// turn off safe mode and check that the other machine is now stopped also.
   915  	task.SetSafeMode(false)
   916  	s.checkStopInstances(c, i1)
   917  }
   918  
   919  func (s *ProvisionerSuite) TestProvisionerRetriesTransientErrors(c *gc.C) {
   920  	s.PatchValue(&apiserverprovisioner.ErrorRetryWaitDelay, 5*time.Millisecond)
   921  	var e environs.Environ = &mockBroker{Environ: s.APIConn.Environ, retryCount: make(map[string]int)}
   922  	task := s.newProvisionerTask(c, false, e)
   923  	defer stop(c, task)
   924  
   925  	// Provision some machines, some will be started first time,
   926  	// another will require retries.
   927  	m1, err := s.addMachine()
   928  	c.Assert(err, gc.IsNil)
   929  	s.checkStartInstance(c, m1)
   930  	m2, err := s.addMachine()
   931  	c.Assert(err, gc.IsNil)
   932  	s.checkStartInstance(c, m2)
   933  	m3, err := s.addMachine()
   934  	c.Assert(err, gc.IsNil)
   935  	m4, err := s.addMachine()
   936  	c.Assert(err, gc.IsNil)
   937  
   938  	// mockBroker will fail to start machine-3 several times;
   939  	// keep setting the transient flag to retry until the
   940  	// instance has started.
   941  	thatsAllFolks := make(chan struct{})
   942  	go func() {
   943  		for {
   944  			select {
   945  			case <-thatsAllFolks:
   946  				return
   947  			case <-time.After(coretesting.ShortWait):
   948  				err := m3.SetStatus(params.StatusError, "info", params.StatusData{"transient": true})
   949  				c.Assert(err, gc.IsNil)
   950  			}
   951  		}
   952  	}()
   953  	s.checkStartInstance(c, m3)
   954  	close(thatsAllFolks)
   955  
   956  	// Machine 4 is never provisioned.
   957  	status, _, _, err := m4.Status()
   958  	c.Assert(err, gc.IsNil)
   959  	c.Assert(status, gc.Equals, params.StatusError)
   960  	_, err = m4.InstanceId()
   961  	c.Assert(err, jc.Satisfies, state.IsNotProvisionedError)
   962  }
   963  
   964  type mockBroker struct {
   965  	environs.Environ
   966  	retryCount map[string]int
   967  }
   968  
   969  func (b *mockBroker) StartInstance(args environs.StartInstanceParams) (instance.Instance, *instance.HardwareCharacteristics, []network.Info, error) {
   970  	// All machines except machines 3, 4 are provisioned successfully the first time.
   971  	// Machines 3 is provisioned after some attempts have been made.
   972  	// Machine 4 is never provisioned.
   973  	id := args.MachineConfig.MachineId
   974  	retries := b.retryCount[id]
   975  	if (id != "3" && id != "4") || retries > 2 {
   976  		return b.Environ.StartInstance(args)
   977  	} else {
   978  		b.retryCount[id] = retries + 1
   979  	}
   980  	return nil, nil, nil, fmt.Errorf("error: some error")
   981  }
   982  
   983  func (b *mockBroker) GetToolsSources() ([]simplestreams.DataSource, error) {
   984  	return b.Environ.(tools.SupportsCustomSources).GetToolsSources()
   985  }