github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/provider/ec2/environ.go (about)

     1  // Copyright 2011-2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package ec2
     5  
     6  import (
     7  	"fmt"
     8  	"math/rand"
     9  	"net"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/juju/clock"
    15  	"github.com/juju/collections/set"
    16  	"github.com/juju/errors"
    17  	"github.com/juju/retry"
    18  	"github.com/juju/utils"
    19  	"github.com/juju/version"
    20  	"gopkg.in/amz.v3/aws"
    21  	"gopkg.in/amz.v3/ec2"
    22  	"gopkg.in/juju/names.v2"
    23  
    24  	"github.com/juju/juju/cloudconfig/instancecfg"
    25  	"github.com/juju/juju/cloudconfig/providerinit"
    26  	"github.com/juju/juju/core/constraints"
    27  	"github.com/juju/juju/core/instance"
    28  	"github.com/juju/juju/core/status"
    29  	"github.com/juju/juju/environs"
    30  	"github.com/juju/juju/environs/config"
    31  	"github.com/juju/juju/environs/context"
    32  	"github.com/juju/juju/environs/instances"
    33  	"github.com/juju/juju/environs/simplestreams"
    34  	"github.com/juju/juju/environs/tags"
    35  	"github.com/juju/juju/network"
    36  	"github.com/juju/juju/provider/common"
    37  	"github.com/juju/juju/provider/ec2/internal/ec2instancetypes"
    38  	"github.com/juju/juju/storage"
    39  	"github.com/juju/juju/tools"
    40  )
    41  
    42  const (
    43  	invalidParameterValue = "InvalidParameterValue"
    44  
    45  	// tagName is the AWS-specific tag key that populates resources'
    46  	// name columns in the console.
    47  	tagName = "Name"
    48  )
    49  
    50  var (
    51  	// Use shortAttempt to poll for short-term events or for retrying API calls.
    52  	// TODO(katco): 2016-08-09: lp:1611427
    53  	shortAttempt = utils.AttemptStrategy{
    54  		Total: 5 * time.Second,
    55  		Delay: 200 * time.Millisecond,
    56  	}
    57  
    58  	// aliveInstanceStates are the states which we filter by when listing
    59  	// instances in an environment.
    60  	aliveInstanceStates = []string{"pending", "running"}
    61  )
    62  
    63  type environ struct {
    64  	name  string
    65  	cloud environs.CloudSpec
    66  	ec2   *ec2.EC2
    67  
    68  	// ecfgMutex protects the *Unlocked fields below.
    69  	ecfgMutex    sync.Mutex
    70  	ecfgUnlocked *environConfig
    71  
    72  	availabilityZonesMutex sync.Mutex
    73  	availabilityZones      []common.AvailabilityZone
    74  
    75  	defaultVPCMutex   sync.Mutex
    76  	defaultVPCChecked bool
    77  	defaultVPC        *ec2.VPC
    78  
    79  	ensureGroupMutex sync.Mutex
    80  }
    81  
    82  var _ environs.Environ = (*environ)(nil)
    83  var _ environs.Networking = (*environ)(nil)
    84  
    85  func (e *environ) Config() *config.Config {
    86  	return e.ecfg().Config
    87  }
    88  
    89  func (e *environ) SetConfig(cfg *config.Config) error {
    90  	ecfg, err := providerInstance.newConfig(cfg)
    91  	if err != nil {
    92  		return errors.Trace(err)
    93  	}
    94  	e.ecfgMutex.Lock()
    95  	e.ecfgUnlocked = ecfg
    96  	e.ecfgMutex.Unlock()
    97  	return nil
    98  }
    99  
   100  func (e *environ) ecfg() *environConfig {
   101  	e.ecfgMutex.Lock()
   102  	ecfg := e.ecfgUnlocked
   103  	e.ecfgMutex.Unlock()
   104  	return ecfg
   105  }
   106  
   107  func (e *environ) Name() string {
   108  	return e.name
   109  }
   110  
   111  // PrepareForBootstrap is part of the Environ interface.
   112  func (env *environ) PrepareForBootstrap(ctx environs.BootstrapContext) error {
   113  	callCtx := context.NewCloudCallContext()
   114  	// Cannot really invalidate a credential here since nothing is bootstrapped yet.
   115  	callCtx.InvalidateCredentialFunc = func(string) error { return nil }
   116  	if ctx.ShouldVerifyCredentials() {
   117  		if err := verifyCredentials(env, callCtx); err != nil {
   118  			return err
   119  		}
   120  	}
   121  	ecfg := env.ecfg()
   122  	vpcID, forceVPCID := ecfg.vpcID(), ecfg.forceVPCID()
   123  	if err := validateBootstrapVPC(env.ec2, callCtx, env.cloud.Region, vpcID, forceVPCID, ctx); err != nil {
   124  		return errors.Trace(maybeConvertCredentialError(err, callCtx))
   125  	}
   126  	return nil
   127  }
   128  
   129  // Create is part of the Environ interface.
   130  func (env *environ) Create(ctx context.ProviderCallContext, args environs.CreateParams) error {
   131  	if err := verifyCredentials(env, ctx); err != nil {
   132  		return err
   133  	}
   134  	vpcID := env.ecfg().vpcID()
   135  	if err := validateModelVPC(env.ec2, ctx, env.name, vpcID); err != nil {
   136  		return errors.Trace(maybeConvertCredentialError(err, ctx))
   137  	}
   138  	// TODO(axw) 2016-08-04 #1609643
   139  	// Create global security group(s) here.
   140  	return nil
   141  }
   142  
   143  // Bootstrap is part of the Environ interface.
   144  func (e *environ) Bootstrap(ctx environs.BootstrapContext, callCtx context.ProviderCallContext, args environs.BootstrapParams) (*environs.BootstrapResult, error) {
   145  	r, err := common.Bootstrap(ctx, e, callCtx, args)
   146  	return r, maybeConvertCredentialError(err, callCtx)
   147  }
   148  
   149  // SupportsSpaces is specified on environs.Networking.
   150  func (e *environ) SupportsSpaces(ctx context.ProviderCallContext) (bool, error) {
   151  	return true, nil
   152  }
   153  
   154  // SupportsContainerAddresses is specified on environs.Networking.
   155  func (e *environ) SupportsContainerAddresses(ctx context.ProviderCallContext) (bool, error) {
   156  	return false, errors.NotSupportedf("container address allocation")
   157  }
   158  
   159  // SupportsSpaceDiscovery is specified on environs.Networking.
   160  func (e *environ) SupportsSpaceDiscovery(ctx context.ProviderCallContext) (bool, error) {
   161  	return false, nil
   162  }
   163  
   164  var unsupportedConstraints = []string{
   165  	constraints.Tags,
   166  	// TODO(anastasiamac 2016-03-16) LP#1557874
   167  	// use virt-type in StartInstances
   168  	constraints.VirtType,
   169  }
   170  
   171  // ConstraintsValidator is defined on the Environs interface.
   172  func (e *environ) ConstraintsValidator(ctx context.ProviderCallContext) (constraints.Validator, error) {
   173  	validator := constraints.NewValidator()
   174  	validator.RegisterConflicts(
   175  		[]string{constraints.InstanceType},
   176  		[]string{constraints.Mem, constraints.Cores, constraints.CpuPower})
   177  	validator.RegisterUnsupported(unsupportedConstraints)
   178  	instanceTypes, err := e.supportedInstanceTypes(ctx)
   179  	if err != nil {
   180  		return nil, errors.Trace(err)
   181  	}
   182  	instTypeNames := make([]string, len(instanceTypes))
   183  	for i, itype := range instanceTypes {
   184  		instTypeNames[i] = itype.Name
   185  	}
   186  	validator.RegisterVocabulary(constraints.InstanceType, instTypeNames)
   187  	return validator, nil
   188  }
   189  
   190  func archMatches(arches []string, arch *string) bool {
   191  	if arch == nil {
   192  		return true
   193  	}
   194  	for _, a := range arches {
   195  		if a == *arch {
   196  			return true
   197  		}
   198  	}
   199  	return false
   200  }
   201  
   202  var ec2AvailabilityZones = (*ec2.EC2).AvailabilityZones
   203  
   204  type ec2AvailabilityZone struct {
   205  	ec2.AvailabilityZoneInfo
   206  }
   207  
   208  func (z *ec2AvailabilityZone) Name() string {
   209  	return z.AvailabilityZoneInfo.Name
   210  }
   211  
   212  func (z *ec2AvailabilityZone) Available() bool {
   213  	return z.AvailabilityZoneInfo.State == availableState
   214  }
   215  
   216  // AvailabilityZones returns a slice of availability zones
   217  // for the configured region.
   218  func (e *environ) AvailabilityZones(ctx context.ProviderCallContext) ([]common.AvailabilityZone, error) {
   219  	e.availabilityZonesMutex.Lock()
   220  	defer e.availabilityZonesMutex.Unlock()
   221  	if e.availabilityZones == nil {
   222  		filter := ec2.NewFilter()
   223  		filter.Add("region-name", e.cloud.Region)
   224  		resp, err := ec2AvailabilityZones(e.ec2, filter)
   225  		if err != nil {
   226  			return nil, maybeConvertCredentialError(err, ctx)
   227  		}
   228  		logger.Debugf("availability zones: %+v", resp)
   229  		e.availabilityZones = make([]common.AvailabilityZone, len(resp.Zones))
   230  		for i, z := range resp.Zones {
   231  			e.availabilityZones[i] = &ec2AvailabilityZone{z}
   232  		}
   233  	}
   234  	return e.availabilityZones, nil
   235  }
   236  
   237  // InstanceAvailabilityZoneNames returns the availability zone names for each
   238  // of the specified instances.
   239  func (e *environ) InstanceAvailabilityZoneNames(ctx context.ProviderCallContext, ids []instance.Id) ([]string, error) {
   240  	instances, err := e.Instances(ctx, ids)
   241  	if err != nil && err != environs.ErrPartialInstances {
   242  		return nil, err
   243  	}
   244  	zones := make([]string, len(instances))
   245  	for i, inst := range instances {
   246  		if inst == nil {
   247  			continue
   248  		}
   249  		zones[i] = inst.(*ec2Instance).AvailZone
   250  	}
   251  	return zones, err
   252  }
   253  
   254  // DeriveAvailabilityZones is part of the common.ZonedEnviron interface.
   255  func (e *environ) DeriveAvailabilityZones(ctx context.ProviderCallContext, args environs.StartInstanceParams) ([]string, error) {
   256  	availabilityZone, err := e.deriveAvailabilityZone(ctx, args)
   257  	if availabilityZone != "" {
   258  		return []string{availabilityZone}, errors.Trace(err)
   259  	}
   260  	return nil, errors.Trace(err)
   261  }
   262  
   263  type ec2Placement struct {
   264  	availabilityZone *ec2.AvailabilityZoneInfo
   265  	subnet           *ec2.Subnet
   266  }
   267  
   268  func (e *environ) parsePlacement(ctx context.ProviderCallContext, placement string) (*ec2Placement, error) {
   269  	pos := strings.IndexRune(placement, '=')
   270  	if pos == -1 {
   271  		return nil, fmt.Errorf("unknown placement directive: %v", placement)
   272  	}
   273  	switch key, value := placement[:pos], placement[pos+1:]; key {
   274  	case "zone":
   275  		availabilityZone := value
   276  		zones, err := e.AvailabilityZones(ctx)
   277  		if err != nil {
   278  			return nil, err
   279  		}
   280  		for _, z := range zones {
   281  			if z.Name() == availabilityZone {
   282  				ec2AZ := z.(*ec2AvailabilityZone)
   283  				return &ec2Placement{
   284  					availabilityZone: &ec2AZ.AvailabilityZoneInfo,
   285  				}, nil
   286  			}
   287  		}
   288  		return nil, fmt.Errorf("invalid availability zone %q", availabilityZone)
   289  	case "subnet":
   290  		logger.Debugf("searching for subnet matching placement directive %q", value)
   291  		matcher := CreateSubnetMatcher(value)
   292  		// Get all known subnets, look for a match
   293  		allSubnets := []string{}
   294  		subnetResp, vpcId, err := e.subnetsForVPC(ctx)
   295  		if err != nil {
   296  			return nil, errors.Trace(err)
   297  		}
   298  		// we'll also need info about this zone, we don't have a way right now to ask about a single AZ, so punt
   299  		zones, err := e.AvailabilityZones(ctx)
   300  		if err != nil {
   301  			return nil, errors.Trace(err)
   302  		}
   303  		for _, subnet := range subnetResp.Subnets {
   304  			allSubnets = append(allSubnets, fmt.Sprintf("%q:%q", subnet.Id, subnet.CIDRBlock))
   305  			if matcher.Match(subnet) {
   306  				// We found the CIDR, now see if we can find the AZs.
   307  				for _, zone := range zones {
   308  					if zone.Name() == subnet.AvailZone {
   309  						ec2AZ := zone.(*ec2AvailabilityZone)
   310  						return &ec2Placement{
   311  							availabilityZone: &ec2AZ.AvailabilityZoneInfo,
   312  							subnet:           &subnet,
   313  						}, nil
   314  					}
   315  				}
   316  				logger.Debugf("found a matching subnet (%v) but couldn't find the AZ", subnet)
   317  			}
   318  		}
   319  		logger.Debugf("searched for subnet %q, did not find it in all subnets %v for vpc-id %q", value, allSubnets, vpcId)
   320  	}
   321  	return nil, fmt.Errorf("unknown placement directive: %v", placement)
   322  }
   323  
   324  // PrecheckInstance is defined on the environs.InstancePrechecker interface.
   325  func (e *environ) PrecheckInstance(ctx context.ProviderCallContext, args environs.PrecheckInstanceParams) error {
   326  	if _, _, err := e.deriveAvailabilityZoneAndSubnetID(ctx,
   327  		environs.StartInstanceParams{
   328  			Placement:         args.Placement,
   329  			VolumeAttachments: args.VolumeAttachments,
   330  		},
   331  	); err != nil {
   332  		return errors.Trace(err)
   333  	}
   334  	if !args.Constraints.HasInstanceType() {
   335  		return nil
   336  	}
   337  	// Constraint has an instance-type constraint so let's see if it is valid.
   338  	instanceTypes, err := e.supportedInstanceTypes(ctx)
   339  	if err != nil {
   340  		return errors.Trace(err)
   341  	}
   342  	for _, itype := range instanceTypes {
   343  		if itype.Name != *args.Constraints.InstanceType {
   344  			continue
   345  		}
   346  		if archMatches(itype.Arches, args.Constraints.Arch) {
   347  			return nil
   348  		}
   349  	}
   350  	if args.Constraints.Arch == nil {
   351  		return fmt.Errorf("invalid AWS instance type %q specified", *args.Constraints.InstanceType)
   352  	}
   353  	return fmt.Errorf("invalid AWS instance type %q and arch %q specified", *args.Constraints.InstanceType, *args.Constraints.Arch)
   354  }
   355  
   356  // MetadataLookupParams returns parameters which are used to query simplestreams metadata.
   357  func (e *environ) MetadataLookupParams(region string) (*simplestreams.MetadataLookupParams, error) {
   358  	var endpoint string
   359  	if region == "" {
   360  		region = e.cloud.Region
   361  		endpoint = e.cloud.Endpoint
   362  	} else {
   363  		// TODO(axw) 2016-10-04 #1630089
   364  		// MetadataLookupParams needs to be updated so that providers
   365  		// are not expected to know how to map regions to endpoints.
   366  		ec2Region, ok := aws.Regions[region]
   367  		if !ok {
   368  			return nil, errors.Errorf("unknown region %q", region)
   369  		}
   370  		endpoint = ec2Region.EC2Endpoint
   371  	}
   372  	return &simplestreams.MetadataLookupParams{
   373  		Series:   config.PreferredSeries(e.ecfg()),
   374  		Region:   region,
   375  		Endpoint: endpoint,
   376  	}, nil
   377  }
   378  
   379  // Region is specified in the HasRegion interface.
   380  func (e *environ) Region() (simplestreams.CloudSpec, error) {
   381  	return simplestreams.CloudSpec{
   382  		Region:   e.cloud.Region,
   383  		Endpoint: e.cloud.Endpoint,
   384  	}, nil
   385  }
   386  
   387  const (
   388  	ebsStorage = "ebs"
   389  	ssdStorage = "ssd"
   390  )
   391  
   392  // DistributeInstances implements the state.InstanceDistributor policy.
   393  func (e *environ) DistributeInstances(
   394  	ctx context.ProviderCallContext, candidates, distributionGroup []instance.Id, limitZones []string,
   395  ) ([]instance.Id, error) {
   396  	return common.DistributeInstances(e, ctx, candidates, distributionGroup, limitZones)
   397  }
   398  
   399  // MaintainInstance is specified in the InstanceBroker interface.
   400  func (*environ) MaintainInstance(ctx context.ProviderCallContext, args environs.StartInstanceParams) error {
   401  	return nil
   402  }
   403  
   404  // resourceName returns the string to use for a resource's Name tag,
   405  // to help users identify Juju-managed resources in the AWS console.
   406  func resourceName(tag names.Tag, envName string) string {
   407  	return fmt.Sprintf("juju-%s-%s", envName, tag)
   408  }
   409  
   410  // StartInstance is specified in the InstanceBroker interface.
   411  func (e *environ) StartInstance(ctx context.ProviderCallContext, args environs.StartInstanceParams) (_ *environs.StartInstanceResult, resultErr error) {
   412  	var inst *ec2Instance
   413  	callback := args.StatusCallback
   414  	defer func() {
   415  		if resultErr == nil || inst == nil {
   416  			return
   417  		}
   418  		if err := e.StopInstances(ctx, inst.Id()); err != nil {
   419  			callback(status.Error, fmt.Sprintf("error stopping failed instance: %v", err), nil)
   420  			logger.Errorf("error stopping failed instance: %v", err)
   421  		}
   422  	}()
   423  
   424  	callback(status.Allocating, "Verifying availability zone", nil)
   425  
   426  	annotateWrapError := func(received error, annotation string) error {
   427  		if received == nil {
   428  			return nil
   429  		}
   430  		// If there is a problem with authentication/authorisation,
   431  		// we want a correctly typed error.
   432  		annotatedErr := errors.Annotate(
   433  			maybeConvertCredentialError(received, ctx),
   434  			annotation)
   435  		if common.IsCredentialNotValid(annotatedErr) {
   436  			return annotatedErr
   437  		}
   438  		return common.ZoneIndependentError(annotatedErr)
   439  	}
   440  
   441  	wrapError := func(received error) error {
   442  		return annotateWrapError(received, "")
   443  	}
   444  
   445  	// Verify the provided availability zone to start the instance in.  It's
   446  	// provided via StartInstanceParams Constraints or AvailabilityZone.
   447  	// The availability zone of existing volumes that are to be
   448  	// attached to the machine must all match, and must be the same
   449  	// as specified zone (if any).
   450  	availabilityZone, placementSubnetID, err := e.deriveAvailabilityZoneAndSubnetID(ctx, args)
   451  	if err != nil {
   452  		// An IsNotValid error is returned if the zone is invalid;
   453  		// this is a zone-specific error.
   454  		zoneSpecific := errors.IsNotValid(err)
   455  		if !zoneSpecific {
   456  			return nil, wrapError(err)
   457  		}
   458  		return nil, err
   459  	}
   460  
   461  	arches := args.Tools.Arches()
   462  
   463  	instanceTypes, err := e.supportedInstanceTypes(ctx)
   464  	if err != nil {
   465  		return nil, wrapError(err)
   466  	}
   467  
   468  	spec, err := findInstanceSpec(
   469  		args.InstanceConfig.Controller != nil,
   470  		args.ImageMetadata,
   471  		instanceTypes,
   472  		&instances.InstanceConstraint{
   473  			Region:      e.cloud.Region,
   474  			Series:      args.InstanceConfig.Series,
   475  			Arches:      arches,
   476  			Constraints: args.Constraints,
   477  			Storage:     []string{ssdStorage, ebsStorage},
   478  		},
   479  	)
   480  	if err != nil {
   481  		return nil, wrapError(err)
   482  	}
   483  	tools, err := args.Tools.Match(tools.Filter{Arch: spec.Image.Arch})
   484  	if err != nil {
   485  		return nil, common.ZoneIndependentError(
   486  			errors.Errorf("chosen architecture %v not present in %v", spec.Image.Arch, arches),
   487  		)
   488  	}
   489  
   490  	if spec.InstanceType.Deprecated {
   491  		logger.Infof("deprecated instance type specified: %s", spec.InstanceType.Name)
   492  	}
   493  
   494  	if err := args.InstanceConfig.SetTools(tools); err != nil {
   495  		return nil, common.ZoneIndependentError(err)
   496  	}
   497  	if err := instancecfg.FinishInstanceConfig(args.InstanceConfig, e.Config()); err != nil {
   498  		return nil, common.ZoneIndependentError(err)
   499  	}
   500  
   501  	callback(status.Allocating, "Making user data", nil)
   502  	userData, err := providerinit.ComposeUserData(args.InstanceConfig, nil, AmazonRenderer{})
   503  	if err != nil {
   504  		return nil, common.ZoneIndependentError(
   505  			errors.Annotate(err, "cannot make user data"),
   506  		)
   507  	}
   508  	logger.Debugf("ec2 user data; %d bytes", len(userData))
   509  	apiPorts := make([]int, 0, 2)
   510  	if args.InstanceConfig.Controller != nil {
   511  		apiPorts = append(apiPorts, args.InstanceConfig.Controller.Config.APIPort())
   512  		if args.InstanceConfig.Controller.Config.AutocertDNSName() != "" {
   513  			// Open port 80 as well as it handles Let's Encrypt HTTP challenge.
   514  			apiPorts = append(apiPorts, 80)
   515  		}
   516  	} else {
   517  		apiPorts = append(apiPorts, args.InstanceConfig.APIInfo.Ports()[0])
   518  	}
   519  	callback(status.Allocating, "Setting up groups", nil)
   520  	groups, err := e.setUpGroups(ctx, args.ControllerUUID, args.InstanceConfig.MachineId, apiPorts)
   521  	if err != nil {
   522  		return nil, annotateWrapError(err, "cannot set up groups")
   523  	}
   524  
   525  	blockDeviceMappings := getBlockDeviceMappings(
   526  		args.Constraints,
   527  		args.InstanceConfig.Series,
   528  		args.InstanceConfig.Controller != nil,
   529  	)
   530  	rootDiskSize := uint64(blockDeviceMappings[0].VolumeSize) * 1024
   531  
   532  	// If --constraints spaces=foo was passed, the provisioner will populate
   533  	// args.SubnetsToZones map. In AWS a subnet can span only one zone, so here
   534  	// we build the reverse map zonesToSubnets, which we will use to below in
   535  	// the RunInstance loop to provide an explicit subnet ID, rather than just
   536  	// AZ. This ensures instances in the same group (units of an application or all
   537  	// instances when adding a machine manually) will still be evenly
   538  	// distributed across AZs, but only within subnets of the space constraint.
   539  	//
   540  	// TODO(dimitern): This should be done in a provider-independent way.
   541  	if spaces := args.Constraints.IncludeSpaces(); len(spaces) > 1 {
   542  		logger.Infof("ignoring all but the first positive space from constraints: %v", spaces)
   543  	}
   544  
   545  	var instResp *ec2.RunInstancesResp
   546  	commonRunArgs := &ec2.RunInstances{
   547  		MinCount:            1,
   548  		MaxCount:            1,
   549  		UserData:            userData,
   550  		InstanceType:        spec.InstanceType.Name,
   551  		SecurityGroups:      groups,
   552  		BlockDeviceMappings: blockDeviceMappings,
   553  		ImageId:             spec.Image.Id,
   554  	}
   555  
   556  	runArgs := commonRunArgs
   557  	runArgs.AvailZone = availabilityZone
   558  
   559  	haveVPCID := isVPCIDSet(e.ecfg().vpcID())
   560  	var subnetIDsForZone []string
   561  	var subnetErr error
   562  	if haveVPCID {
   563  		var allowedSubnetIDs []string
   564  		if placementSubnetID != "" {
   565  			allowedSubnetIDs = []string{placementSubnetID}
   566  		} else {
   567  			for subnetID := range args.SubnetsToZones {
   568  				allowedSubnetIDs = append(allowedSubnetIDs, string(subnetID))
   569  			}
   570  		}
   571  		subnetIDsForZone, subnetErr = getVPCSubnetIDsForAvailabilityZone(e.ec2, ctx, e.ecfg().vpcID(), availabilityZone, allowedSubnetIDs)
   572  	} else if args.Constraints.HasSpaces() {
   573  		subnetIDsForZone, subnetErr = findSubnetIDsForAvailabilityZone(availabilityZone, args.SubnetsToZones)
   574  		if subnetErr == nil && placementSubnetID != "" {
   575  			asSet := set.NewStrings(subnetIDsForZone...)
   576  			if asSet.Contains(placementSubnetID) {
   577  				subnetIDsForZone = []string{placementSubnetID}
   578  			} else {
   579  				subnetIDsForZone = nil
   580  				subnetErr = errors.NotFoundf("subnets %q in AZ %q", placementSubnetID, availabilityZone)
   581  			}
   582  		}
   583  	}
   584  
   585  	switch {
   586  	case subnetErr != nil && errors.IsNotFound(subnetErr):
   587  		return nil, errors.Trace(subnetErr)
   588  	case subnetErr != nil:
   589  		return nil, errors.Annotatef(maybeConvertCredentialError(subnetErr, ctx), "getting subnets for zone %q", availabilityZone)
   590  	case len(subnetIDsForZone) > 1:
   591  		// With multiple equally suitable subnets, picking one at random
   592  		// will allow for better instance spread within the same zone, and
   593  		// still work correctly if we happen to pick a constrained subnet
   594  		// (we'll just treat this the same way we treat constrained zones
   595  		// and retry).
   596  		runArgs.SubnetId = subnetIDsForZone[rand.Intn(len(subnetIDsForZone))]
   597  		logger.Debugf("selected random subnet %q from all matching in zone %q", runArgs.SubnetId, availabilityZone)
   598  	case len(subnetIDsForZone) == 1:
   599  		runArgs.SubnetId = subnetIDsForZone[0]
   600  		logger.Debugf("selected subnet %q in zone %q", runArgs.SubnetId, availabilityZone)
   601  	}
   602  
   603  	callback(status.Allocating, fmt.Sprintf("Trying to start instance in availability zone %q", availabilityZone), nil)
   604  	instResp, err = runInstances(e.ec2, ctx, runArgs, callback)
   605  	if err != nil {
   606  		if !isZoneOrSubnetConstrainedError(err) {
   607  			err = annotateWrapError(err, "cannot run instances")
   608  		}
   609  		return nil, err
   610  	}
   611  	if len(instResp.Instances) != 1 {
   612  		return nil, errors.Errorf("expected 1 started instance, got %d", len(instResp.Instances))
   613  	}
   614  
   615  	inst = &ec2Instance{
   616  		e:        e,
   617  		Instance: &instResp.Instances[0],
   618  	}
   619  	instAZ := inst.Instance.AvailZone
   620  	if haveVPCID {
   621  		instVPC := e.ecfg().vpcID()
   622  		instSubnet := inst.Instance.SubnetId
   623  		logger.Infof("started instance %q in AZ %q, subnet %q, VPC %q", inst.Id(), instAZ, instSubnet, instVPC)
   624  	} else {
   625  		logger.Infof("started instance %q in AZ %q", inst.Id(), instAZ)
   626  	}
   627  
   628  	// Tag instance, for accounting and identification.
   629  	instanceName := resourceName(
   630  		names.NewMachineTag(args.InstanceConfig.MachineId), e.Config().Name(),
   631  	)
   632  	args.InstanceConfig.Tags[tagName] = instanceName
   633  	if err := tagResources(e.ec2, ctx, args.InstanceConfig.Tags, string(inst.Id())); err != nil {
   634  		return nil, annotateWrapError(err, "tagging instance")
   635  	}
   636  
   637  	// Tag the machine's root EBS volume, if it has one.
   638  	if inst.Instance.RootDeviceType == "ebs" {
   639  		cfg := e.Config()
   640  		tags := tags.ResourceTags(
   641  			names.NewModelTag(cfg.UUID()),
   642  			names.NewControllerTag(args.ControllerUUID),
   643  			cfg,
   644  		)
   645  		tags[tagName] = instanceName + "-root"
   646  		if err := tagRootDisk(e.ec2, ctx, tags, inst.Instance); err != nil {
   647  			return nil, annotateWrapError(err, "tagging root disk")
   648  		}
   649  	}
   650  
   651  	hc := instance.HardwareCharacteristics{
   652  		Arch:     &spec.Image.Arch,
   653  		Mem:      &spec.InstanceType.Mem,
   654  		CpuCores: &spec.InstanceType.CpuCores,
   655  		CpuPower: spec.InstanceType.CpuPower,
   656  		RootDisk: &rootDiskSize,
   657  		// Tags currently not supported by EC2
   658  		AvailabilityZone: &inst.Instance.AvailZone,
   659  	}
   660  	return &environs.StartInstanceResult{
   661  		Instance: inst,
   662  		Hardware: &hc,
   663  	}, nil
   664  }
   665  
   666  func (e *environ) deriveAvailabilityZone(ctx context.ProviderCallContext, args environs.StartInstanceParams) (string, error) {
   667  	availabilityZone, _, err := e.deriveAvailabilityZoneAndSubnetID(ctx, args)
   668  	return availabilityZone, err
   669  }
   670  
   671  func (e *environ) deriveAvailabilityZoneAndSubnetID(ctx context.ProviderCallContext, args environs.StartInstanceParams) (string, string, error) {
   672  	// Determine the availability zones of existing volumes that are to be
   673  	// attached to the machine. They must all match, and must be the same
   674  	// as specified zone (if any).
   675  	volumeAttachmentsZone, err := volumeAttachmentsZone(e.ec2, ctx, args.VolumeAttachments)
   676  	if err != nil {
   677  		return "", "", errors.Trace(err)
   678  	}
   679  	placementZone, placementSubnetID, err := e.instancePlacementZone(ctx, args.Placement, volumeAttachmentsZone)
   680  	if err != nil {
   681  		return "", "", errors.Trace(err)
   682  	}
   683  	var availabilityZone string
   684  	if placementZone != "" {
   685  		availabilityZone = placementZone
   686  	} else if args.AvailabilityZone != "" {
   687  		// Validate and check state of the AvailabilityZone
   688  		zones, err := e.AvailabilityZones(ctx)
   689  		if err != nil {
   690  			return "", "", err
   691  		}
   692  		for _, z := range zones {
   693  			if z.Name() == args.AvailabilityZone {
   694  				ec2AZ := z.(*ec2AvailabilityZone)
   695  				if ec2AZ.AvailabilityZoneInfo.State != availableState {
   696  					return "", "", errors.Errorf(
   697  						"availability zone %q is %q",
   698  						ec2AZ.AvailabilityZoneInfo.Name,
   699  						ec2AZ.AvailabilityZoneInfo.State,
   700  					)
   701  				} else {
   702  					availabilityZone = args.AvailabilityZone
   703  				}
   704  				break
   705  			}
   706  		}
   707  		if availabilityZone == "" {
   708  			return "", "", errors.NotValidf("availability zone %q", availabilityZone)
   709  		}
   710  	}
   711  	return availabilityZone, placementSubnetID, nil
   712  }
   713  
   714  func (e *environ) instancePlacementZone(ctx context.ProviderCallContext, placement, volumeAttachmentsZone string) (zone, subnet string, _ error) {
   715  	if placement == "" {
   716  		return volumeAttachmentsZone, "", nil
   717  	}
   718  	var placementSubnetID string
   719  	instPlacement, err := e.parsePlacement(ctx, placement)
   720  	if err != nil {
   721  		return "", "", errors.Trace(err)
   722  	}
   723  	if instPlacement.availabilityZone.State != availableState {
   724  		return "", "", errors.Errorf(
   725  			"availability zone %q is %q",
   726  			instPlacement.availabilityZone.Name,
   727  			instPlacement.availabilityZone.State,
   728  		)
   729  	}
   730  	if volumeAttachmentsZone != "" && volumeAttachmentsZone != instPlacement.availabilityZone.Name {
   731  		return "", "", errors.Errorf(
   732  			"cannot create instance with placement %q, as this will prevent attaching the requested EBS volumes in zone %q",
   733  			placement, volumeAttachmentsZone,
   734  		)
   735  	}
   736  	if instPlacement.subnet != nil {
   737  		if instPlacement.subnet.State != availableState {
   738  			return "", "", errors.Errorf("subnet %q is %q", instPlacement.subnet.CIDRBlock, instPlacement.subnet.State)
   739  		}
   740  		placementSubnetID = instPlacement.subnet.Id
   741  	}
   742  	return instPlacement.availabilityZone.Name, placementSubnetID, nil
   743  }
   744  
   745  // volumeAttachmentsZone determines the availability zone for each volume
   746  // identified in the volume attachment parameters, checking that they are
   747  // all the same, and returns the availability zone name.
   748  func volumeAttachmentsZone(ec2 *ec2.EC2, ctx context.ProviderCallContext, attachments []storage.VolumeAttachmentParams) (string, error) {
   749  	volumeIds := make([]string, 0, len(attachments))
   750  	for _, a := range attachments {
   751  		if a.Provider != EBS_ProviderType {
   752  			continue
   753  		}
   754  		volumeIds = append(volumeIds, a.VolumeId)
   755  	}
   756  	if len(volumeIds) == 0 {
   757  		return "", nil
   758  	}
   759  	resp, err := ec2.Volumes(volumeIds, nil)
   760  	if err != nil {
   761  		return "", errors.Annotatef(maybeConvertCredentialError(err, ctx), "getting volume details (%s)", volumeIds)
   762  	}
   763  	if len(resp.Volumes) == 0 {
   764  		return "", nil
   765  	}
   766  	for i, v := range resp.Volumes[1:] {
   767  		if v.AvailZone != resp.Volumes[i].AvailZone {
   768  			return "", errors.Errorf(
   769  				"cannot attach volumes from multiple availability zones: %s is in %s, %s is in %s",
   770  				resp.Volumes[i].Id, resp.Volumes[i].AvailZone, v.Id, v.AvailZone,
   771  			)
   772  		}
   773  	}
   774  	return resp.Volumes[0].AvailZone, nil
   775  }
   776  
   777  // tagResources calls ec2.CreateTags, tagging each of the specified resources
   778  // with the given tags. tagResources will retry for a short period of time
   779  // if it receives a *.NotFound error response from EC2.
   780  func tagResources(e *ec2.EC2, ctx context.ProviderCallContext, tags map[string]string, resourceIds ...string) error {
   781  	if len(tags) == 0 {
   782  		return nil
   783  	}
   784  	ec2Tags := make([]ec2.Tag, 0, len(tags))
   785  	for k, v := range tags {
   786  		ec2Tags = append(ec2Tags, ec2.Tag{k, v})
   787  	}
   788  	var err error
   789  	for a := shortAttempt.Start(); a.Next(); {
   790  		_, err = e.CreateTags(resourceIds, ec2Tags)
   791  		if err == nil || !strings.HasSuffix(ec2ErrCode(err), ".NotFound") {
   792  			return err
   793  		}
   794  	}
   795  	return maybeConvertCredentialError(err, ctx)
   796  }
   797  
   798  func tagRootDisk(e *ec2.EC2, ctx context.ProviderCallContext, tags map[string]string, inst *ec2.Instance) error {
   799  	if len(tags) == 0 {
   800  		return nil
   801  	}
   802  	findVolumeId := func(inst *ec2.Instance) string {
   803  		for _, m := range inst.BlockDeviceMappings {
   804  			if m.DeviceName != inst.RootDeviceName {
   805  				continue
   806  			}
   807  			return m.VolumeId
   808  		}
   809  		return ""
   810  	}
   811  	// Wait until the instance has an associated EBS volume in the
   812  	// block-device-mapping.
   813  	volumeId := findVolumeId(inst)
   814  	// TODO(katco): 2016-08-09: lp:1611427
   815  	waitRootDiskAttempt := utils.AttemptStrategy{
   816  		Total: 5 * time.Minute,
   817  		Delay: 5 * time.Second,
   818  	}
   819  	for a := waitRootDiskAttempt.Start(); volumeId == "" && a.Next(); {
   820  		resp, err := e.Instances([]string{inst.InstanceId}, nil)
   821  		if err != nil {
   822  			err = errors.Annotate(maybeConvertCredentialError(err, ctx), "cannot fetch instance information")
   823  			logger.Warningf("%v", err)
   824  			if a.HasNext() == false {
   825  				return err
   826  			}
   827  			logger.Infof("retrying fetch of instances")
   828  			continue
   829  		}
   830  		if len(resp.Reservations) > 0 && len(resp.Reservations[0].Instances) > 0 {
   831  			inst = &resp.Reservations[0].Instances[0]
   832  			volumeId = findVolumeId(inst)
   833  		}
   834  	}
   835  	if volumeId == "" {
   836  		return errors.New("timed out waiting for EBS volume to be associated")
   837  	}
   838  	return tagResources(e, ctx, tags, volumeId)
   839  }
   840  
   841  var runInstances = _runInstances
   842  
   843  // runInstances calls ec2.RunInstances for a fixed number of attempts until
   844  // RunInstances returns an error code that does not indicate an error that
   845  // may be caused by eventual consistency.
   846  func _runInstances(e *ec2.EC2, ctx context.ProviderCallContext, ri *ec2.RunInstances, c environs.StatusCallbackFunc) (resp *ec2.RunInstancesResp, err error) {
   847  	try := 1
   848  	for a := shortAttempt.Start(); a.Next(); {
   849  		c(status.Allocating, fmt.Sprintf("Start instance attempt %d", try), nil)
   850  		resp, err = e.RunInstances(ri)
   851  		if err == nil || !isNotFoundError(err) {
   852  			break
   853  		}
   854  		try++
   855  	}
   856  	return resp, maybeConvertCredentialError(err, ctx)
   857  }
   858  
   859  func (e *environ) StopInstances(ctx context.ProviderCallContext, ids ...instance.Id) error {
   860  	return errors.Trace(e.terminateInstances(ctx, ids))
   861  }
   862  
   863  // groupInfoByName returns information on the security group
   864  // with the given name including rules and other details.
   865  func (e *environ) groupInfoByName(ctx context.ProviderCallContext, groupName string) (ec2.SecurityGroupInfo, error) {
   866  	resp, err := e.securityGroupsByNameOrID(groupName)
   867  	if err != nil {
   868  		return ec2.SecurityGroupInfo{}, maybeConvertCredentialError(err, ctx)
   869  	}
   870  
   871  	if len(resp.Groups) != 1 {
   872  		return ec2.SecurityGroupInfo{}, errors.NewNotFound(fmt.Errorf(
   873  			"expected one security group named %q, got %v",
   874  			groupName, resp.Groups,
   875  		), "")
   876  	}
   877  	return resp.Groups[0], nil
   878  }
   879  
   880  // groupByName returns the security group with the given name.
   881  func (e *environ) groupByName(ctx context.ProviderCallContext, groupName string) (ec2.SecurityGroup, error) {
   882  	groupInfo, err := e.groupInfoByName(ctx, groupName)
   883  	return groupInfo.SecurityGroup, err
   884  }
   885  
   886  // isNotFoundError returns whether err is a typed NotFoundError or an EC2 error
   887  // code for "group not found", indicating no matching instances (as they are
   888  // filtered by group).
   889  func isNotFoundError(err error) bool {
   890  	return err != nil && (errors.IsNotFound(err) || ec2ErrCode(err) == "InvalidGroup.NotFound")
   891  }
   892  
   893  // Instances is part of the environs.Environ interface.
   894  func (e *environ) Instances(ctx context.ProviderCallContext, ids []instance.Id) ([]instances.Instance, error) {
   895  	if len(ids) == 0 {
   896  		return nil, nil
   897  	}
   898  	insts := make([]instances.Instance, len(ids))
   899  	// Make a series of requests to cope with eventual consistency.
   900  	// Each request will attempt to add more instances to the requested
   901  	// set.
   902  	var err error
   903  	for a := shortAttempt.Start(); a.Next(); {
   904  		var need []string
   905  		for i, inst := range insts {
   906  			if inst == nil {
   907  				need = append(need, string(ids[i]))
   908  			}
   909  		}
   910  		filter := ec2.NewFilter()
   911  		filter.Add("instance-state-name", aliveInstanceStates...)
   912  		filter.Add("instance-id", need...)
   913  		e.addModelFilter(filter)
   914  		err = e.gatherInstances(ctx, ids, insts, filter)
   915  		if err == nil || err != environs.ErrPartialInstances {
   916  			break
   917  		}
   918  	}
   919  	if err == environs.ErrPartialInstances {
   920  		for _, inst := range insts {
   921  			if inst != nil {
   922  				return insts, environs.ErrPartialInstances
   923  			}
   924  		}
   925  		return nil, environs.ErrNoInstances
   926  	}
   927  	if err != nil {
   928  		return nil, err
   929  	}
   930  	return insts, nil
   931  }
   932  
   933  // gatherInstances tries to get information on each instance
   934  // id whose corresponding insts slot is nil.
   935  //
   936  // This function returns environs.ErrPartialInstances if the
   937  // insts slice has not been completely filled.
   938  func (e *environ) gatherInstances(
   939  	ctx context.ProviderCallContext,
   940  	ids []instance.Id,
   941  	insts []instances.Instance,
   942  	filter *ec2.Filter,
   943  ) error {
   944  	resp, err := e.ec2.Instances(nil, filter)
   945  	if err != nil {
   946  		return maybeConvertCredentialError(err, ctx)
   947  	}
   948  	n := 0
   949  	// For each requested id, add it to the returned instances
   950  	// if we find it in the response.
   951  	for i, id := range ids {
   952  		if insts[i] != nil {
   953  			n++
   954  			continue
   955  		}
   956  		for j := range resp.Reservations {
   957  			r := &resp.Reservations[j]
   958  			for k := range r.Instances {
   959  				if r.Instances[k].InstanceId != string(id) {
   960  					continue
   961  				}
   962  				inst := r.Instances[k]
   963  				// TODO(wallyworld): lookup the details to fill in the instance type data
   964  				insts[i] = &ec2Instance{e: e, Instance: &inst}
   965  				n++
   966  			}
   967  		}
   968  	}
   969  	if n < len(ids) {
   970  		return environs.ErrPartialInstances
   971  	}
   972  	return nil
   973  }
   974  
   975  // NetworkInterfaces implements NetworkingEnviron.NetworkInterfaces.
   976  func (e *environ) NetworkInterfaces(ctx context.ProviderCallContext, instId instance.Id) ([]network.InterfaceInfo, error) {
   977  	var err error
   978  	var networkInterfacesResp *ec2.NetworkInterfacesResp
   979  	for a := shortAttempt.Start(); a.Next(); {
   980  		logger.Tracef("retrieving NICs for instance %q", instId)
   981  		filter := ec2.NewFilter()
   982  		filter.Add("attachment.instance-id", string(instId))
   983  		networkInterfacesResp, err = e.ec2.NetworkInterfaces(nil, filter)
   984  		logger.Tracef("instance %q NICs: %#v (err: %v)", instId, networkInterfacesResp, err)
   985  		if err != nil {
   986  			err = maybeConvertCredentialError(err, ctx)
   987  			if common.IsCredentialNotValid(err) {
   988  				// no need to re-try: there is a problem with credentials
   989  				break
   990  			}
   991  			logger.Errorf("failed to get instance %q interfaces: %v (retrying)", instId, err)
   992  			continue
   993  		}
   994  		if len(networkInterfacesResp.Interfaces) == 0 {
   995  			logger.Tracef("instance %q has no NIC attachment yet, retrying...", instId)
   996  			continue
   997  		}
   998  		logger.Tracef("found instance %q NICS: %#v", instId, networkInterfacesResp.Interfaces)
   999  		break
  1000  	}
  1001  	if err != nil {
  1002  		// either the instance doesn't exist or we couldn't get through to
  1003  		// the ec2 api
  1004  		return nil, errors.Annotatef(err, "cannot get instance %q network interfaces", instId)
  1005  	}
  1006  	ec2Interfaces := networkInterfacesResp.Interfaces
  1007  	result := make([]network.InterfaceInfo, len(ec2Interfaces))
  1008  	for i, iface := range ec2Interfaces {
  1009  		resp, err := e.ec2.Subnets([]string{iface.SubnetId}, nil)
  1010  		if err != nil {
  1011  			return nil, errors.Annotatef(maybeConvertCredentialError(err, ctx), "failed to retrieve subnet %q info", iface.SubnetId)
  1012  		}
  1013  		if len(resp.Subnets) != 1 {
  1014  			return nil, errors.Errorf("expected 1 subnet, got %d", len(resp.Subnets))
  1015  		}
  1016  		subnet := resp.Subnets[0]
  1017  		cidr := subnet.CIDRBlock
  1018  
  1019  		result[i] = network.InterfaceInfo{
  1020  			DeviceIndex:       iface.Attachment.DeviceIndex,
  1021  			MACAddress:        iface.MACAddress,
  1022  			CIDR:              cidr,
  1023  			ProviderId:        network.Id(iface.Id),
  1024  			ProviderSubnetId:  network.Id(iface.SubnetId),
  1025  			AvailabilityZones: []string{subnet.AvailZone},
  1026  			VLANTag:           0, // Not supported on EC2.
  1027  			// Getting the interface name is not supported on EC2, so fake it.
  1028  			InterfaceName: fmt.Sprintf("unsupported%d", iface.Attachment.DeviceIndex),
  1029  			Disabled:      false,
  1030  			NoAutoStart:   false,
  1031  			ConfigType:    network.ConfigDHCP,
  1032  			InterfaceType: network.EthernetInterface,
  1033  			Address:       network.NewScopedAddress(iface.PrivateIPAddress, network.ScopeCloudLocal),
  1034  		}
  1035  	}
  1036  	return result, nil
  1037  }
  1038  
  1039  func makeSubnetInfo(cidr string, subnetId, providerNetworkId network.Id, availZones []string) (network.SubnetInfo, error) {
  1040  	_, _, err := net.ParseCIDR(cidr)
  1041  	if err != nil {
  1042  		return network.SubnetInfo{}, errors.Annotatef(err, "skipping subnet %q, invalid CIDR", cidr)
  1043  	}
  1044  
  1045  	info := network.SubnetInfo{
  1046  		CIDR:              cidr,
  1047  		ProviderId:        subnetId,
  1048  		ProviderNetworkId: providerNetworkId,
  1049  		VLANTag:           0, // Not supported on EC2
  1050  		AvailabilityZones: availZones,
  1051  	}
  1052  	logger.Tracef("found subnet with info %#v", info)
  1053  	return info, nil
  1054  
  1055  }
  1056  
  1057  // Spaces is not implemented by the ec2 provider as we don't currently have
  1058  // provider level spaces.
  1059  func (e *environ) Spaces(ctx context.ProviderCallContext) ([]network.SpaceInfo, error) {
  1060  	return nil, errors.NotSupportedf("Spaces")
  1061  }
  1062  
  1063  // Subnets returns basic information about the specified subnets known
  1064  // by the provider for the specified instance or list of ids. subnetIds can be
  1065  // empty, in which case all known are returned. Implements
  1066  // NetworkingEnviron.Subnets.
  1067  func (e *environ) Subnets(ctx context.ProviderCallContext, instId instance.Id, subnetIds []network.Id) ([]network.SubnetInfo, error) {
  1068  	var results []network.SubnetInfo
  1069  	subIdSet := make(map[string]bool)
  1070  	for _, subId := range subnetIds {
  1071  		subIdSet[string(subId)] = false
  1072  	}
  1073  
  1074  	if instId != instance.UnknownId {
  1075  		interfaces, err := e.NetworkInterfaces(ctx, instId)
  1076  		if err != nil {
  1077  			return results, errors.Trace(err)
  1078  		}
  1079  		if len(subnetIds) == 0 {
  1080  			for _, iface := range interfaces {
  1081  				subIdSet[string(iface.ProviderSubnetId)] = false
  1082  			}
  1083  		}
  1084  		for _, iface := range interfaces {
  1085  			_, ok := subIdSet[string(iface.ProviderSubnetId)]
  1086  			if !ok {
  1087  				logger.Tracef("subnet %q not in %v, skipping", iface.ProviderSubnetId, subnetIds)
  1088  				continue
  1089  			}
  1090  			subIdSet[string(iface.ProviderSubnetId)] = true
  1091  			info, err := makeSubnetInfo(iface.CIDR, iface.ProviderSubnetId, iface.ProviderNetworkId, iface.AvailabilityZones)
  1092  			if err != nil {
  1093  				// Error will already have been logged.
  1094  				continue
  1095  			}
  1096  			results = append(results, info)
  1097  		}
  1098  	} else {
  1099  		resp, _, err := e.subnetsForVPC(ctx)
  1100  		if err != nil {
  1101  			return nil, errors.Annotatef(err, "failed to retrieve subnets")
  1102  		}
  1103  		if len(subnetIds) == 0 {
  1104  			for _, subnet := range resp.Subnets {
  1105  				subIdSet[subnet.Id] = false
  1106  			}
  1107  		}
  1108  
  1109  		for _, subnet := range resp.Subnets {
  1110  			_, ok := subIdSet[subnet.Id]
  1111  			if !ok {
  1112  				logger.Tracef("subnet %q not in %v, skipping", subnet.Id, subnetIds)
  1113  				continue
  1114  			}
  1115  			subIdSet[subnet.Id] = true
  1116  			cidr := subnet.CIDRBlock
  1117  			info, err := makeSubnetInfo(cidr, network.Id(subnet.Id), network.Id(subnet.VPCId), []string{subnet.AvailZone})
  1118  			if err != nil {
  1119  				// Error will already have been logged.
  1120  				continue
  1121  			}
  1122  			results = append(results, info)
  1123  
  1124  		}
  1125  	}
  1126  
  1127  	notFound := []string{}
  1128  	for subId, found := range subIdSet {
  1129  		if !found {
  1130  			notFound = append(notFound, subId)
  1131  		}
  1132  	}
  1133  	if len(notFound) != 0 {
  1134  		return nil, errors.Errorf("failed to find the following subnet ids: %v", notFound)
  1135  	}
  1136  
  1137  	return results, nil
  1138  }
  1139  
  1140  func (e *environ) subnetsForVPC(ctx context.ProviderCallContext) (resp *ec2.SubnetsResp, vpcId string, err error) {
  1141  	filter := ec2.NewFilter()
  1142  	vpcId = e.ecfg().vpcID()
  1143  	if !isVPCIDSet(vpcId) {
  1144  		if hasDefaultVPC, err := e.hasDefaultVPC(ctx); err == nil && hasDefaultVPC {
  1145  			vpcId = e.defaultVPC.Id
  1146  		}
  1147  	}
  1148  	filter.Add("vpc-id", vpcId)
  1149  	resp, err = e.ec2.Subnets(nil, filter)
  1150  	return resp, vpcId, maybeConvertCredentialError(err, ctx)
  1151  }
  1152  
  1153  // AdoptResources is part of the Environ interface.
  1154  func (e *environ) AdoptResources(ctx context.ProviderCallContext, controllerUUID string, fromVersion version.Number) error {
  1155  	// Gather resource ids for instances, volumes and security groups tagged with this model.
  1156  	instances, err := e.AllInstances(ctx)
  1157  	if err != nil {
  1158  		return errors.Trace(err)
  1159  	}
  1160  	// We want to update the controller tags on root disks even though
  1161  	// they are destroyed automatically with the instance they're
  1162  	// attached to.
  1163  	volumeIds, err := e.allModelVolumes(ctx, true)
  1164  	if err != nil {
  1165  		return errors.Trace(err)
  1166  	}
  1167  	groupIds, err := e.modelSecurityGroupIDs(ctx)
  1168  	if err != nil {
  1169  		return errors.Trace(err)
  1170  	}
  1171  
  1172  	resourceIds := make([]string, len(instances))
  1173  	for i, instance := range instances {
  1174  		resourceIds[i] = string(instance.Id())
  1175  	}
  1176  	resourceIds = append(resourceIds, volumeIds...)
  1177  	resourceIds = append(resourceIds, groupIds...)
  1178  
  1179  	tags := map[string]string{tags.JujuController: controllerUUID}
  1180  	return errors.Annotate(tagResources(e.ec2, ctx, tags, resourceIds...), "updating tags")
  1181  }
  1182  
  1183  // AllInstances is part of the environs.InstanceBroker interface.
  1184  func (e *environ) AllInstances(ctx context.ProviderCallContext) ([]instances.Instance, error) {
  1185  	return e.AllInstancesByState(ctx, "pending", "running")
  1186  }
  1187  
  1188  // AllInstancesByState returns all instances in the environment
  1189  // with one of the specified instance states.
  1190  func (e *environ) AllInstancesByState(ctx context.ProviderCallContext, states ...string) ([]instances.Instance, error) {
  1191  	// NOTE(axw) we use security group filtering here because instances
  1192  	// start out untagged. If Juju were to abort after starting an instance,
  1193  	// but before tagging it, it would be leaked. We only need to do this
  1194  	// for AllInstances, as it is the result of AllInstances that is used
  1195  	// in "harvesting" unknown instances by the provisioner.
  1196  	//
  1197  	// One possible alternative is to modify ec2.RunInstances to allow the
  1198  	// caller to specify ClientToken, and then format it like
  1199  	//     <controller-uuid>:<model-uuid>:<machine-id>
  1200  	//     (with base64-encoding to keep the size under the 64-byte limit)
  1201  	//
  1202  	// It is possible to filter on "client-token", and specify wildcards;
  1203  	// therefore we could use client-token filters everywhere in the ec2
  1204  	// provider instead of tags or security groups. The only danger is if
  1205  	// we need to make non-idempotent calls to RunInstances for the machine
  1206  	// ID. I don't think this is needed, but I am not confident enough to
  1207  	// change this fundamental right now.
  1208  	//
  1209  	// An EC2 API call is required to resolve the group name to an id, as
  1210  	// VPC enabled accounts do not support name based filtering.
  1211  	groupName := e.jujuGroupName()
  1212  	group, err := e.groupByName(ctx, groupName)
  1213  	if isNotFoundError(err) {
  1214  		// If there's no group, then there cannot be any instances.
  1215  		return nil, nil
  1216  	} else if err != nil {
  1217  		return nil, errors.Trace(maybeConvertCredentialError(err, ctx))
  1218  	}
  1219  	filter := ec2.NewFilter()
  1220  	filter.Add("instance-state-name", states...)
  1221  	filter.Add("instance.group-id", group.Id)
  1222  	return e.allInstances(ctx, filter)
  1223  }
  1224  
  1225  // ControllerInstances is part of the environs.Environ interface.
  1226  func (e *environ) ControllerInstances(ctx context.ProviderCallContext, controllerUUID string) ([]instance.Id, error) {
  1227  	filter := ec2.NewFilter()
  1228  	filter.Add("instance-state-name", aliveInstanceStates...)
  1229  	filter.Add(fmt.Sprintf("tag:%s", tags.JujuIsController), "true")
  1230  	e.addControllerFilter(filter, controllerUUID)
  1231  	ids, err := e.allInstanceIDs(ctx, filter)
  1232  	if err != nil {
  1233  		return nil, errors.Trace(maybeConvertCredentialError(err, ctx))
  1234  	}
  1235  	if len(ids) == 0 {
  1236  		return nil, environs.ErrNotBootstrapped
  1237  	}
  1238  	return ids, nil
  1239  }
  1240  
  1241  // allControllerManagedInstances returns the IDs of all instances managed by
  1242  // this environment's controller.
  1243  //
  1244  // Note that this requires that all instances are tagged; we cannot filter on
  1245  // security groups, as we do not know the names of the models.
  1246  func (e *environ) allControllerManagedInstances(ctx context.ProviderCallContext, controllerUUID string) ([]instance.Id, error) {
  1247  	filter := ec2.NewFilter()
  1248  	filter.Add("instance-state-name", aliveInstanceStates...)
  1249  	e.addControllerFilter(filter, controllerUUID)
  1250  	return e.allInstanceIDs(ctx, filter)
  1251  }
  1252  
  1253  func (e *environ) allInstanceIDs(ctx context.ProviderCallContext, filter *ec2.Filter) ([]instance.Id, error) {
  1254  	insts, err := e.allInstances(ctx, filter)
  1255  	if err != nil {
  1256  		return nil, errors.Trace(maybeConvertCredentialError(err, ctx))
  1257  	}
  1258  	ids := make([]instance.Id, len(insts))
  1259  	for i, inst := range insts {
  1260  		ids[i] = inst.Id()
  1261  	}
  1262  	return ids, nil
  1263  }
  1264  
  1265  func (e *environ) allInstances(ctx context.ProviderCallContext, filter *ec2.Filter) ([]instances.Instance, error) {
  1266  	resp, err := e.ec2.Instances(nil, filter)
  1267  	if err != nil {
  1268  		return nil, errors.Annotate(maybeConvertCredentialError(err, ctx), "listing instances")
  1269  	}
  1270  	var insts []instances.Instance
  1271  	for _, r := range resp.Reservations {
  1272  		for i := range r.Instances {
  1273  			inst := r.Instances[i]
  1274  			// TODO(wallyworld): lookup the details to fill in the instance type data
  1275  			insts = append(insts, &ec2Instance{e: e, Instance: &inst})
  1276  		}
  1277  	}
  1278  	return insts, nil
  1279  }
  1280  
  1281  // Destroy is part of the environs.Environ interface.
  1282  func (e *environ) Destroy(ctx context.ProviderCallContext) error {
  1283  	if err := common.Destroy(e, ctx); err != nil {
  1284  		return errors.Trace(maybeConvertCredentialError(err, ctx))
  1285  	}
  1286  	if err := e.cleanEnvironmentSecurityGroups(ctx); err != nil {
  1287  		return errors.Annotate(maybeConvertCredentialError(err, ctx), "cannot delete environment security groups")
  1288  	}
  1289  	return nil
  1290  }
  1291  
  1292  // DestroyController implements the Environ interface.
  1293  func (e *environ) DestroyController(ctx context.ProviderCallContext, controllerUUID string) error {
  1294  	// In case any hosted environment hasn't been cleaned up yet,
  1295  	// we also attempt to delete their resources when the controller
  1296  	// environment is destroyed.
  1297  	if err := e.destroyControllerManagedEnvirons(ctx, controllerUUID); err != nil {
  1298  		return errors.Annotate(err, "destroying managed environs")
  1299  	}
  1300  	return e.Destroy(ctx)
  1301  }
  1302  
  1303  // destroyControllerManagedEnvirons destroys all environments managed by this
  1304  // environment's controller.
  1305  func (e *environ) destroyControllerManagedEnvirons(ctx context.ProviderCallContext, controllerUUID string) error {
  1306  
  1307  	// Terminate all instances managed by the controller.
  1308  	instIds, err := e.allControllerManagedInstances(ctx, controllerUUID)
  1309  	if err != nil {
  1310  		return errors.Annotate(err, "listing instances")
  1311  	}
  1312  	if err := e.terminateInstances(ctx, instIds); err != nil {
  1313  		return errors.Annotate(err, "terminating instances")
  1314  	}
  1315  
  1316  	// Delete all volumes managed by the controller. (No need to delete root disks manually.)
  1317  	volIds, err := e.allControllerManagedVolumes(ctx, controllerUUID, false)
  1318  	if err != nil {
  1319  		return errors.Annotate(err, "listing volumes")
  1320  	}
  1321  	errs := foreachVolume(e.ec2, ctx, volIds, destroyVolume)
  1322  	for i, err := range errs {
  1323  		if err == nil {
  1324  			continue
  1325  		}
  1326  		// (anastasiamac 2018-03-21) This is strange - we do try
  1327  		// to destroy all volumes but afterwards, if we have encountered any errors,
  1328  		// we will return first one...The same logic happens on detach..?...
  1329  		return errors.Annotatef(err, "destroying volume %q", volIds[i])
  1330  	}
  1331  
  1332  	// Delete security groups managed by the controller.
  1333  	groups, err := e.controllerSecurityGroups(ctx, controllerUUID)
  1334  	if err != nil {
  1335  		return errors.Trace(err)
  1336  	}
  1337  	for _, g := range groups {
  1338  		if err := deleteSecurityGroupInsistently(e.ec2, ctx, g, clock.WallClock); err != nil {
  1339  			return errors.Annotatef(
  1340  				err, "cannot delete security group %q (%q)",
  1341  				g.Name, g.Id,
  1342  			)
  1343  		}
  1344  	}
  1345  	return nil
  1346  }
  1347  
  1348  func (e *environ) allControllerManagedVolumes(ctx context.ProviderCallContext, controllerUUID string, includeRootDisks bool) ([]string, error) {
  1349  	filter := ec2.NewFilter()
  1350  	e.addControllerFilter(filter, controllerUUID)
  1351  	return listVolumes(e.ec2, ctx, filter, includeRootDisks)
  1352  }
  1353  
  1354  func (e *environ) allModelVolumes(ctx context.ProviderCallContext, includeRootDisks bool) ([]string, error) {
  1355  	filter := ec2.NewFilter()
  1356  	e.addModelFilter(filter)
  1357  	return listVolumes(e.ec2, ctx, filter, includeRootDisks)
  1358  }
  1359  
  1360  func rulesToIPPerms(rules []network.IngressRule) []ec2.IPPerm {
  1361  	ipPerms := make([]ec2.IPPerm, len(rules))
  1362  	for i, r := range rules {
  1363  		ipPerms[i] = ec2.IPPerm{
  1364  			Protocol: r.Protocol,
  1365  			FromPort: r.FromPort,
  1366  			ToPort:   r.ToPort,
  1367  		}
  1368  		if len(r.SourceCIDRs) == 0 {
  1369  			ipPerms[i].SourceIPs = []string{defaultRouteCIDRBlock}
  1370  		} else {
  1371  			ipPerms[i].SourceIPs = make([]string, len(r.SourceCIDRs))
  1372  			copy(ipPerms[i].SourceIPs, r.SourceCIDRs)
  1373  		}
  1374  	}
  1375  	return ipPerms
  1376  }
  1377  
  1378  func (e *environ) openPortsInGroup(ctx context.ProviderCallContext, name string, rules []network.IngressRule) error {
  1379  	if len(rules) == 0 {
  1380  		return nil
  1381  	}
  1382  	// Give permissions for anyone to access the given ports.
  1383  	g, err := e.groupByName(ctx, name)
  1384  	if err != nil {
  1385  		return err
  1386  	}
  1387  	ipPerms := rulesToIPPerms(rules)
  1388  	_, err = e.ec2.AuthorizeSecurityGroup(g, ipPerms)
  1389  	if err != nil && ec2ErrCode(err) == "InvalidPermission.Duplicate" {
  1390  		if len(rules) == 1 {
  1391  			return nil
  1392  		}
  1393  		// If there's more than one port and we get a duplicate error,
  1394  		// then we go through authorizing each port individually,
  1395  		// otherwise the ports that were *not* duplicates will have
  1396  		// been ignored
  1397  		for i := range ipPerms {
  1398  			_, err := e.ec2.AuthorizeSecurityGroup(g, ipPerms[i:i+1])
  1399  			if err != nil && ec2ErrCode(err) != "InvalidPermission.Duplicate" {
  1400  				return errors.Annotatef(maybeConvertCredentialError(err, ctx), "cannot open port %v", ipPerms[i])
  1401  			}
  1402  		}
  1403  		return nil
  1404  	}
  1405  	if err != nil {
  1406  		return errors.Annotate(maybeConvertCredentialError(err, ctx), "cannot open ports")
  1407  	}
  1408  	return nil
  1409  }
  1410  
  1411  func (e *environ) closePortsInGroup(ctx context.ProviderCallContext, name string, rules []network.IngressRule) error {
  1412  	if len(rules) == 0 {
  1413  		return nil
  1414  	}
  1415  	// Revoke permissions for anyone to access the given ports.
  1416  	// Note that ec2 allows the revocation of permissions that aren't
  1417  	// granted, so this is naturally idempotent.
  1418  	g, err := e.groupByName(ctx, name)
  1419  	if err != nil {
  1420  		return err
  1421  	}
  1422  	_, err = e.ec2.RevokeSecurityGroup(g, rulesToIPPerms(rules))
  1423  	if err != nil {
  1424  		return errors.Annotate(maybeConvertCredentialError(err, ctx), "cannot close ports")
  1425  	}
  1426  	return nil
  1427  }
  1428  
  1429  func (e *environ) ingressRulesInGroup(ctx context.ProviderCallContext, name string) (rules []network.IngressRule, err error) {
  1430  	group, err := e.groupInfoByName(ctx, name)
  1431  	if err != nil {
  1432  		return nil, err
  1433  	}
  1434  	for _, p := range group.IPPerms {
  1435  		ips := p.SourceIPs
  1436  		if len(ips) == 0 {
  1437  			ips = []string{defaultRouteCIDRBlock}
  1438  		}
  1439  		rule, err := network.NewIngressRule(p.Protocol, p.FromPort, p.ToPort, ips...)
  1440  		if err != nil {
  1441  			return nil, errors.Trace(err)
  1442  		}
  1443  		rules = append(rules, rule)
  1444  	}
  1445  	network.SortIngressRules(rules)
  1446  	return rules, nil
  1447  }
  1448  
  1449  func (e *environ) OpenPorts(ctx context.ProviderCallContext, rules []network.IngressRule) error {
  1450  	if e.Config().FirewallMode() != config.FwGlobal {
  1451  		return errors.Errorf("invalid firewall mode %q for opening ports on model", e.Config().FirewallMode())
  1452  	}
  1453  	if err := e.openPortsInGroup(ctx, e.globalGroupName(), rules); err != nil {
  1454  		return errors.Trace(err)
  1455  	}
  1456  	logger.Infof("opened ports in global group: %v", rules)
  1457  	return nil
  1458  }
  1459  
  1460  func (e *environ) ClosePorts(ctx context.ProviderCallContext, rules []network.IngressRule) error {
  1461  	if e.Config().FirewallMode() != config.FwGlobal {
  1462  		return errors.Errorf("invalid firewall mode %q for closing ports on model", e.Config().FirewallMode())
  1463  	}
  1464  	if err := e.closePortsInGroup(ctx, e.globalGroupName(), rules); err != nil {
  1465  		return errors.Trace(err)
  1466  	}
  1467  	logger.Infof("closed ports in global group: %v", rules)
  1468  	return nil
  1469  }
  1470  
  1471  func (e *environ) IngressRules(ctx context.ProviderCallContext) ([]network.IngressRule, error) {
  1472  	if e.Config().FirewallMode() != config.FwGlobal {
  1473  		return nil, errors.Errorf("invalid firewall mode %q for retrieving ingress rules from model", e.Config().FirewallMode())
  1474  	}
  1475  	return e.ingressRulesInGroup(ctx, e.globalGroupName())
  1476  }
  1477  
  1478  func (*environ) Provider() environs.EnvironProvider {
  1479  	return &providerInstance
  1480  }
  1481  
  1482  func (e *environ) instanceSecurityGroups(ctx context.ProviderCallContext, instIDs []instance.Id, states ...string) ([]ec2.SecurityGroup, error) {
  1483  	strInstID := make([]string, len(instIDs))
  1484  	for i := range instIDs {
  1485  		strInstID[i] = string(instIDs[i])
  1486  	}
  1487  
  1488  	filter := ec2.NewFilter()
  1489  	if len(states) > 0 {
  1490  		filter.Add("instance-state-name", states...)
  1491  	}
  1492  
  1493  	resp, err := e.ec2.Instances(strInstID, filter)
  1494  	if err != nil {
  1495  		return nil, errors.Annotatef(maybeConvertCredentialError(err, ctx), "cannot retrieve instance information from aws to delete security groups")
  1496  	}
  1497  
  1498  	securityGroups := []ec2.SecurityGroup{}
  1499  	for _, res := range resp.Reservations {
  1500  		for _, inst := range res.Instances {
  1501  			logger.Debugf("instance %q has security groups %+v", inst.InstanceId, inst.SecurityGroups)
  1502  			securityGroups = append(securityGroups, inst.SecurityGroups...)
  1503  		}
  1504  	}
  1505  	return securityGroups, nil
  1506  }
  1507  
  1508  // controllerSecurityGroups returns the details of all security groups managed
  1509  // by the environment's controller.
  1510  func (e *environ) controllerSecurityGroups(ctx context.ProviderCallContext, controllerUUID string) ([]ec2.SecurityGroup, error) {
  1511  	filter := ec2.NewFilter()
  1512  	e.addControllerFilter(filter, controllerUUID)
  1513  	resp, err := e.ec2.SecurityGroups(nil, filter)
  1514  	if err != nil {
  1515  		return nil, errors.Annotate(maybeConvertCredentialError(err, ctx), "listing security groups")
  1516  	}
  1517  	groups := make([]ec2.SecurityGroup, len(resp.Groups))
  1518  	for i, info := range resp.Groups {
  1519  		groups[i] = ec2.SecurityGroup{Id: info.Id, Name: info.Name}
  1520  	}
  1521  	return groups, nil
  1522  }
  1523  
  1524  func (e *environ) modelSecurityGroupIDs(ctx context.ProviderCallContext) ([]string, error) {
  1525  	filter := ec2.NewFilter()
  1526  	e.addModelFilter(filter)
  1527  	resp, err := e.ec2.SecurityGroups(nil, filter)
  1528  	if err != nil {
  1529  		return nil, errors.Annotate(maybeConvertCredentialError(err, ctx), "listing security groups")
  1530  	}
  1531  	groupIDs := make([]string, len(resp.Groups))
  1532  	for i, info := range resp.Groups {
  1533  		groupIDs[i] = info.Id
  1534  	}
  1535  	return groupIDs, nil
  1536  }
  1537  
  1538  // cleanEnvironmentSecurityGroups attempts to delete all security groups owned
  1539  // by the environment.
  1540  func (e *environ) cleanEnvironmentSecurityGroups(ctx context.ProviderCallContext) error {
  1541  	jujuGroup := e.jujuGroupName()
  1542  	g, err := e.groupByName(ctx, jujuGroup)
  1543  	if isNotFoundError(err) {
  1544  		return nil
  1545  	}
  1546  	if err != nil {
  1547  		return errors.Annotatef(err, "cannot retrieve default security group: %q", jujuGroup)
  1548  	}
  1549  	if err := deleteSecurityGroupInsistently(e.ec2, ctx, g, clock.WallClock); err != nil {
  1550  		return errors.Annotate(err, "cannot delete default security group")
  1551  	}
  1552  	return nil
  1553  }
  1554  
  1555  func (e *environ) terminateInstances(ctx context.ProviderCallContext, ids []instance.Id) error {
  1556  	if len(ids) == 0 {
  1557  		return nil
  1558  	}
  1559  
  1560  	// TODO (anastasiamac 2016-04-11) Err if instances still have resources hanging around.
  1561  	// LP#1568654
  1562  	defer func() {
  1563  		e.deleteSecurityGroupsForInstances(ctx, ids)
  1564  	}()
  1565  
  1566  	// TODO (anastasiamac 2016-04-7) instance termination would benefit
  1567  	// from retry with exponential delay just like security groups
  1568  	// in defer. Bug#1567179.
  1569  	var err error
  1570  	for a := shortAttempt.Start(); a.Next(); {
  1571  		_, err = terminateInstancesById(e.ec2, ctx, ids...)
  1572  		if err == nil || ec2ErrCode(err) != "InvalidInstanceID.NotFound" {
  1573  			// This will return either success at terminating all instances (1st condition) or
  1574  			// encountered error as long as it's not NotFound (2nd condition).
  1575  			return maybeConvertCredentialError(err, ctx)
  1576  		}
  1577  	}
  1578  
  1579  	// We will get here only if we got a NotFound error.
  1580  	// 1. If we attempted to terminate only one instance was, return now.
  1581  	if len(ids) == 1 {
  1582  		ids = nil
  1583  		return nil
  1584  	}
  1585  	// 2. If we attempted to terminate several instances and got a NotFound error,
  1586  	// it means that no instances were terminated.
  1587  	// So try each instance individually, ignoring a NotFound error this time.
  1588  	deletedIDs := []instance.Id{}
  1589  	for _, id := range ids {
  1590  		_, err = terminateInstancesById(e.ec2, ctx, id)
  1591  		if err == nil {
  1592  			deletedIDs = append(deletedIDs, id)
  1593  		}
  1594  		if err != nil && ec2ErrCode(err) != "InvalidInstanceID.NotFound" {
  1595  			ids = deletedIDs
  1596  			return err
  1597  		}
  1598  	}
  1599  	// We will get here if all of the instances are deleted successfully,
  1600  	// or are not found, which implies they were previously deleted.
  1601  	ids = deletedIDs
  1602  	return nil
  1603  }
  1604  
  1605  var terminateInstancesById = func(ec2inst *ec2.EC2, ctx context.ProviderCallContext, ids ...instance.Id) (*ec2.TerminateInstancesResp, error) {
  1606  	strs := make([]string, len(ids))
  1607  	for i, id := range ids {
  1608  		strs[i] = string(id)
  1609  	}
  1610  	r, err := ec2inst.TerminateInstances(strs)
  1611  	if err != nil {
  1612  		return nil, maybeConvertCredentialError(err, ctx)
  1613  	}
  1614  	return r, nil
  1615  }
  1616  
  1617  func (e *environ) deleteSecurityGroupsForInstances(ctx context.ProviderCallContext, ids []instance.Id) {
  1618  	if len(ids) == 0 {
  1619  		logger.Debugf("no need to delete security groups: no intances were terminated successfully")
  1620  		return
  1621  	}
  1622  
  1623  	// We only want to attempt deleting security groups for the
  1624  	// instances that have been successfully terminated.
  1625  	securityGroups, err := e.instanceSecurityGroups(ctx, ids, "shutting-down", "terminated")
  1626  	if err != nil {
  1627  		logger.Errorf("cannot determine security groups to delete: %v", err)
  1628  		return
  1629  	}
  1630  
  1631  	// TODO(perrito666) we need to tag global security groups to be able
  1632  	// to tell them apart from future groups that are neither machine
  1633  	// nor environment group.
  1634  	// https://bugs.launchpad.net/juju-core/+bug/1534289
  1635  	jujuGroup := e.jujuGroupName()
  1636  
  1637  	for _, deletable := range securityGroups {
  1638  		if deletable.Name == jujuGroup {
  1639  			continue
  1640  		}
  1641  		if err := deleteSecurityGroupInsistently(e.ec2, ctx, deletable, clock.WallClock); err != nil {
  1642  			// In ideal world, we would err out here.
  1643  			// However:
  1644  			// 1. We do not know if all instances have been terminated.
  1645  			// If some instances erred out, they may still be using this security group.
  1646  			// In this case, our failure to delete security group is reasonable: it's still in use.
  1647  			// 2. Some security groups may be shared by multiple instances,
  1648  			// for example, global firewalling. We should not delete these.
  1649  			logger.Errorf("provider failure: %v", err)
  1650  		}
  1651  	}
  1652  }
  1653  
  1654  // SecurityGroupCleaner defines provider instance methods needed to delete
  1655  // a security group.
  1656  type SecurityGroupCleaner interface {
  1657  
  1658  	// DeleteSecurityGroup deletes security group on the provider.
  1659  	DeleteSecurityGroup(group ec2.SecurityGroup) (resp *ec2.SimpleResp, err error)
  1660  }
  1661  
  1662  var deleteSecurityGroupInsistently = func(inst SecurityGroupCleaner, ctx context.ProviderCallContext, group ec2.SecurityGroup, clock clock.Clock) error {
  1663  	err := retry.Call(retry.CallArgs{
  1664  		Attempts:    30,
  1665  		Delay:       time.Second,
  1666  		MaxDelay:    time.Minute, // because 2**29 seconds is beyond reasonable
  1667  		BackoffFunc: retry.DoubleDelay,
  1668  		Clock:       clock,
  1669  		Func: func() error {
  1670  			_, err := inst.DeleteSecurityGroup(group)
  1671  			if err == nil || isNotFoundError(err) {
  1672  				logger.Debugf("deleting security group %q", group.Name)
  1673  				return nil
  1674  			}
  1675  			return errors.Trace(maybeConvertCredentialError(err, ctx))
  1676  		},
  1677  		IsFatalError: func(err error) bool {
  1678  			return common.IsCredentialNotValid(err)
  1679  		},
  1680  		NotifyFunc: func(err error, attempt int) {
  1681  			logger.Debugf("deleting security group %q, attempt %d", group.Name, attempt)
  1682  		},
  1683  	})
  1684  	if err != nil {
  1685  		return errors.Annotatef(err, "cannot delete security group %q: consider deleting it manually", group.Name)
  1686  	}
  1687  	return nil
  1688  }
  1689  
  1690  func (e *environ) addModelFilter(f *ec2.Filter) {
  1691  	f.Add(fmt.Sprintf("tag:%s", tags.JujuModel), e.uuid())
  1692  }
  1693  
  1694  func (e *environ) addControllerFilter(f *ec2.Filter, controllerUUID string) {
  1695  	f.Add(fmt.Sprintf("tag:%s", tags.JujuController), controllerUUID)
  1696  }
  1697  
  1698  func (e *environ) uuid() string {
  1699  	return e.Config().UUID()
  1700  }
  1701  
  1702  func (e *environ) globalGroupName() string {
  1703  	return fmt.Sprintf("%s-global", e.jujuGroupName())
  1704  }
  1705  
  1706  func (e *environ) machineGroupName(machineId string) string {
  1707  	return fmt.Sprintf("%s-%s", e.jujuGroupName(), machineId)
  1708  }
  1709  
  1710  func (e *environ) jujuGroupName() string {
  1711  	return "juju-" + e.uuid()
  1712  }
  1713  
  1714  // setUpGroups creates the security groups for the new machine, and
  1715  // returns them.
  1716  //
  1717  // Instances are tagged with a group so they can be distinguished from
  1718  // other instances that might be running on the same EC2 account.  In
  1719  // addition, a specific machine security group is created for each
  1720  // machine, so that its firewall rules can be configured per machine.
  1721  func (e *environ) setUpGroups(ctx context.ProviderCallContext, controllerUUID, machineId string, apiPorts []int) ([]ec2.SecurityGroup, error) {
  1722  	perms := []ec2.IPPerm{{
  1723  		Protocol:  "tcp",
  1724  		FromPort:  22,
  1725  		ToPort:    22,
  1726  		SourceIPs: []string{"0.0.0.0/0"},
  1727  	}}
  1728  	for _, apiPort := range apiPorts {
  1729  		perms = append(perms, ec2.IPPerm{
  1730  			Protocol:  "tcp",
  1731  			FromPort:  apiPort,
  1732  			ToPort:    apiPort,
  1733  			SourceIPs: []string{"0.0.0.0/0"},
  1734  		})
  1735  	}
  1736  	perms = append(perms, ec2.IPPerm{
  1737  		Protocol: "tcp",
  1738  		FromPort: 0,
  1739  		ToPort:   65535,
  1740  	}, ec2.IPPerm{
  1741  		Protocol: "udp",
  1742  		FromPort: 0,
  1743  		ToPort:   65535,
  1744  	}, ec2.IPPerm{
  1745  		Protocol: "icmp",
  1746  		FromPort: -1,
  1747  		ToPort:   -1,
  1748  	})
  1749  	// Ensure there's a global group for Juju-related traffic.
  1750  	jujuGroup, err := e.ensureGroup(ctx, controllerUUID, e.jujuGroupName(), perms)
  1751  	if err != nil {
  1752  		return nil, err
  1753  	}
  1754  
  1755  	var machineGroup ec2.SecurityGroup
  1756  	switch e.Config().FirewallMode() {
  1757  	case config.FwInstance:
  1758  		machineGroup, err = e.ensureGroup(ctx, controllerUUID, e.machineGroupName(machineId), nil)
  1759  	case config.FwGlobal:
  1760  		machineGroup, err = e.ensureGroup(ctx, controllerUUID, e.globalGroupName(), nil)
  1761  	}
  1762  	if err != nil {
  1763  		return nil, err
  1764  	}
  1765  	return []ec2.SecurityGroup{jujuGroup, machineGroup}, nil
  1766  }
  1767  
  1768  // zeroGroup holds the zero security group.
  1769  var zeroGroup ec2.SecurityGroup
  1770  
  1771  // securityGroupsByNameOrID calls ec2.SecurityGroups() either with the given
  1772  // groupName or with filter by vpc-id and group-name, depending on whether
  1773  // vpc-id is empty or not.
  1774  func (e *environ) securityGroupsByNameOrID(groupName string) (*ec2.SecurityGroupsResp, error) {
  1775  	if chosenVPCID := e.ecfg().vpcID(); isVPCIDSet(chosenVPCID) {
  1776  		// AWS VPC API requires both of these filters (and no
  1777  		// group names/ids set) for non-default EC2-VPC groups:
  1778  		filter := ec2.NewFilter()
  1779  		filter.Add("vpc-id", chosenVPCID)
  1780  		filter.Add("group-name", groupName)
  1781  		return e.ec2.SecurityGroups(nil, filter)
  1782  	}
  1783  
  1784  	// EC2-Classic or EC2-VPC with implicit default VPC need to use the
  1785  	// GroupName.X arguments instead of the filters.
  1786  	groups := ec2.SecurityGroupNames(groupName)
  1787  	return e.ec2.SecurityGroups(groups, nil)
  1788  }
  1789  
  1790  // ensureGroup returns the security group with name and perms.
  1791  // If a group with name does not exist, one will be created.
  1792  // If it exists, its permissions are set to perms.
  1793  // Any entries in perms without SourceIPs will be granted for
  1794  // the named group only.
  1795  func (e *environ) ensureGroup(ctx context.ProviderCallContext, controllerUUID, name string, perms []ec2.IPPerm) (g ec2.SecurityGroup, err error) {
  1796  	// Due to parallelization of the provisioner, it's possible that we try
  1797  	// to create the model security group a second time before the first time
  1798  	// is complete causing failures.
  1799  	e.ensureGroupMutex.Lock()
  1800  	defer e.ensureGroupMutex.Unlock()
  1801  
  1802  	// Specify explicit VPC ID if needed (not for default VPC or EC2-classic).
  1803  	chosenVPCID := e.ecfg().vpcID()
  1804  	inVPCLogSuffix := fmt.Sprintf(" (in VPC %q)", chosenVPCID)
  1805  	if !isVPCIDSet(chosenVPCID) {
  1806  		chosenVPCID = ""
  1807  		inVPCLogSuffix = ""
  1808  	}
  1809  
  1810  	resp, err := e.ec2.CreateSecurityGroup(chosenVPCID, name, "juju group")
  1811  	if err != nil && ec2ErrCode(err) != "InvalidGroup.Duplicate" {
  1812  		err = errors.Annotatef(maybeConvertCredentialError(err, ctx), "creating security group %q%s", name, inVPCLogSuffix)
  1813  		return zeroGroup, err
  1814  	}
  1815  
  1816  	var have permSet
  1817  	if err == nil {
  1818  		g = resp.SecurityGroup
  1819  		// Tag the created group with the model and controller UUIDs.
  1820  		cfg := e.Config()
  1821  		tags := tags.ResourceTags(
  1822  			names.NewModelTag(cfg.UUID()),
  1823  			names.NewControllerTag(controllerUUID),
  1824  			cfg,
  1825  		)
  1826  		if err := tagResources(e.ec2, ctx, tags, g.Id); err != nil {
  1827  			return g, errors.Annotate(err, "tagging security group")
  1828  		}
  1829  		logger.Debugf("created security group %q with ID %q%s", name, g.Id, inVPCLogSuffix)
  1830  	} else {
  1831  		resp, err := e.securityGroupsByNameOrID(name)
  1832  		if err != nil {
  1833  			return zeroGroup, errors.Annotatef(maybeConvertCredentialError(err, ctx), "fetching security group %q%s", name, inVPCLogSuffix)
  1834  		}
  1835  		if len(resp.Groups) == 0 {
  1836  			return zeroGroup, errors.NotFoundf("security group %q%s", name, inVPCLogSuffix)
  1837  		}
  1838  		info := resp.Groups[0]
  1839  		// It's possible that the old group has the wrong
  1840  		// description here, but if it does it's probably due
  1841  		// to something deliberately playing games with juju,
  1842  		// so we ignore it.
  1843  		g = info.SecurityGroup
  1844  		have = newPermSetForGroup(info.IPPerms, g)
  1845  	}
  1846  
  1847  	want := newPermSetForGroup(perms, g)
  1848  	revoke := make(permSet)
  1849  	for p := range have {
  1850  		if !want[p] {
  1851  			revoke[p] = true
  1852  		}
  1853  	}
  1854  	if len(revoke) > 0 {
  1855  		_, err := e.ec2.RevokeSecurityGroup(g, revoke.ipPerms())
  1856  		if err != nil {
  1857  			return zeroGroup, errors.Annotatef(maybeConvertCredentialError(err, ctx), "revoking security group %q%s", g.Id, inVPCLogSuffix)
  1858  		}
  1859  	}
  1860  
  1861  	add := make(permSet)
  1862  	for p := range want {
  1863  		if !have[p] {
  1864  			add[p] = true
  1865  		}
  1866  	}
  1867  	if len(add) > 0 {
  1868  		_, err := e.ec2.AuthorizeSecurityGroup(g, add.ipPerms())
  1869  		if err != nil {
  1870  			return zeroGroup, errors.Annotatef(maybeConvertCredentialError(err, ctx), "authorizing security group %q%s", g.Id, inVPCLogSuffix)
  1871  		}
  1872  	}
  1873  	return g, nil
  1874  }
  1875  
  1876  // permKey represents a permission for a group or an ip address range to access
  1877  // the given range of ports. Only one of groupId or ipAddr should be non-empty.
  1878  type permKey struct {
  1879  	protocol string
  1880  	fromPort int
  1881  	toPort   int
  1882  	groupId  string
  1883  	ipAddr   string
  1884  }
  1885  
  1886  type permSet map[permKey]bool
  1887  
  1888  // newPermSetForGroup returns a set of all the permissions in the
  1889  // given slice of IPPerms. It ignores the name and owner
  1890  // id in source groups, and any entry with no source ips will
  1891  // be granted for the given group only.
  1892  func newPermSetForGroup(ps []ec2.IPPerm, group ec2.SecurityGroup) permSet {
  1893  	m := make(permSet)
  1894  	for _, p := range ps {
  1895  		k := permKey{
  1896  			protocol: p.Protocol,
  1897  			fromPort: p.FromPort,
  1898  			toPort:   p.ToPort,
  1899  		}
  1900  		if len(p.SourceIPs) > 0 {
  1901  			for _, ip := range p.SourceIPs {
  1902  				k.ipAddr = ip
  1903  				m[k] = true
  1904  			}
  1905  		} else {
  1906  			k.groupId = group.Id
  1907  			m[k] = true
  1908  		}
  1909  	}
  1910  	return m
  1911  }
  1912  
  1913  // ipPerms returns m as a slice of permissions usable
  1914  // with the ec2 package.
  1915  func (m permSet) ipPerms() (ps []ec2.IPPerm) {
  1916  	// We could compact the permissions, but it
  1917  	// hardly seems worth it.
  1918  	for p := range m {
  1919  		ipp := ec2.IPPerm{
  1920  			Protocol: p.protocol,
  1921  			FromPort: p.fromPort,
  1922  			ToPort:   p.toPort,
  1923  		}
  1924  		if p.ipAddr != "" {
  1925  			ipp.SourceIPs = []string{p.ipAddr}
  1926  		} else {
  1927  			ipp.SourceGroups = []ec2.UserSecurityGroup{{Id: p.groupId}}
  1928  		}
  1929  		ps = append(ps, ipp)
  1930  	}
  1931  	return
  1932  }
  1933  
  1934  func isZoneOrSubnetConstrainedError(err error) bool {
  1935  	return isZoneConstrainedError(err) || isSubnetConstrainedError(err)
  1936  }
  1937  
  1938  // isZoneConstrainedError reports whether or not the error indicates
  1939  // RunInstances failed due to the specified availability zone being
  1940  // constrained for the instance type being provisioned, or is
  1941  // otherwise unusable for the specific request made.
  1942  func isZoneConstrainedError(err error) bool {
  1943  	switch err := errors.Cause(err).(type) {
  1944  	case *ec2.Error:
  1945  		switch err.Code {
  1946  		case "Unsupported", "InsufficientInstanceCapacity":
  1947  			// A big hammer, but we've now seen several different error messages
  1948  			// for constrained zones, and who knows how many more there might
  1949  			// be. If the message contains "Availability Zone", it's a fair
  1950  			// bet that it's constrained or otherwise unusable.
  1951  			return strings.Contains(err.Message, "Availability Zone")
  1952  		case "InvalidInput":
  1953  			// If the region has a default VPC, then we will receive an error
  1954  			// if the AZ does not have a default subnet. Until we have proper
  1955  			// support for networks, we'll skip over these.
  1956  			return strings.HasPrefix(err.Message, "No default subnet for availability zone")
  1957  		case "VolumeTypeNotAvailableInZone":
  1958  			return true
  1959  		}
  1960  	}
  1961  	return false
  1962  }
  1963  
  1964  // isSubnetConstrainedError reports whether or not the error indicates
  1965  // RunInstances failed due to the specified VPC subnet ID being constrained for
  1966  // the instance type being provisioned, or is otherwise unusable for the
  1967  // specific request made.
  1968  func isSubnetConstrainedError(err error) bool {
  1969  	switch err := errors.Cause(err).(type) {
  1970  	case *ec2.Error:
  1971  		switch err.Code {
  1972  		case "InsufficientFreeAddressesInSubnet", "InsufficientInstanceCapacity":
  1973  			// Subnet and/or VPC general limits reached.
  1974  			return true
  1975  		case "InvalidSubnetID.NotFound":
  1976  			// This shouldn't happen, as we validate the subnet IDs, but it can
  1977  			// happen if the user manually deleted the subnet outside of Juju.
  1978  			return true
  1979  		}
  1980  	}
  1981  	return false
  1982  }
  1983  
  1984  // If the err is of type *ec2.Error, ec2ErrCode returns
  1985  // its code, otherwise it returns the empty string.
  1986  func ec2ErrCode(err error) string {
  1987  	ec2err, _ := errors.Cause(err).(*ec2.Error)
  1988  	if ec2err == nil {
  1989  		return ""
  1990  	}
  1991  	return ec2err.Code
  1992  }
  1993  
  1994  func (e *environ) AllocateContainerAddresses(ctx context.ProviderCallContext, hostInstanceID instance.Id, containerTag names.MachineTag, preparedInfo []network.InterfaceInfo) ([]network.InterfaceInfo, error) {
  1995  	return nil, errors.NotSupportedf("container address allocation")
  1996  }
  1997  
  1998  func (e *environ) ReleaseContainerAddresses(ctx context.ProviderCallContext, interfaces []network.ProviderInterfaceInfo) error {
  1999  	return errors.NotSupportedf("container address allocation")
  2000  }
  2001  
  2002  func (e *environ) supportedInstanceTypes(ctx context.ProviderCallContext) ([]instances.InstanceType, error) {
  2003  	allInstanceTypes := ec2instancetypes.RegionInstanceTypes(e.cloud.Region)
  2004  	if isVPCIDSet(e.ecfg().vpcID()) {
  2005  		return allInstanceTypes, nil
  2006  	}
  2007  	hasDefaultVPC, err := e.hasDefaultVPC(ctx)
  2008  	if err != nil {
  2009  		return nil, errors.Trace(err)
  2010  	}
  2011  	if hasDefaultVPC {
  2012  		return allInstanceTypes, nil
  2013  	}
  2014  
  2015  	// The region has no default VPC, and the user has not specified
  2016  	// one to use. We filter out any instance types that are not
  2017  	// supported in EC2-Classic.
  2018  	supportedInstanceTypes := make([]instances.InstanceType, 0, len(allInstanceTypes))
  2019  	for _, instanceType := range allInstanceTypes {
  2020  		if !ec2instancetypes.SupportsClassic(instanceType.Name) {
  2021  			continue
  2022  		}
  2023  		supportedInstanceTypes = append(supportedInstanceTypes, instanceType)
  2024  	}
  2025  	return supportedInstanceTypes, nil
  2026  }
  2027  
  2028  func (e *environ) hasDefaultVPC(ctx context.ProviderCallContext) (bool, error) {
  2029  	e.defaultVPCMutex.Lock()
  2030  	defer e.defaultVPCMutex.Unlock()
  2031  	if !e.defaultVPCChecked {
  2032  		filter := ec2.NewFilter()
  2033  		filter.Add("isDefault", "true")
  2034  		resp, err := e.ec2.VPCs(nil, filter)
  2035  		if err != nil {
  2036  			return false, errors.Trace(maybeConvertCredentialError(err, ctx))
  2037  		}
  2038  		if len(resp.VPCs) > 0 {
  2039  			e.defaultVPC = &resp.VPCs[0]
  2040  		}
  2041  		e.defaultVPCChecked = true
  2042  	}
  2043  	return e.defaultVPC != nil, nil
  2044  }
  2045  
  2046  // ProviderSpaceInfo implements NetworkingEnviron.
  2047  func (*environ) ProviderSpaceInfo(ctx context.ProviderCallContext, space *network.SpaceInfo) (*environs.ProviderSpaceInfo, error) {
  2048  	return nil, errors.NotSupportedf("provider space info")
  2049  }
  2050  
  2051  // AreSpacesRoutable implements NetworkingEnviron.
  2052  func (*environ) AreSpacesRoutable(ctx context.ProviderCallContext, space1, space2 *environs.ProviderSpaceInfo) (bool, error) {
  2053  	return false, nil
  2054  }
  2055  
  2056  // SSHAddresses implements environs.SSHAddresses.
  2057  func (*environ) SSHAddresses(ctx context.ProviderCallContext, addresses []network.Address) ([]network.Address, error) {
  2058  	return addresses, nil
  2059  }
  2060  
  2061  // SuperSubnets implements NetworkingEnviron.SuperSubnets
  2062  func (e *environ) SuperSubnets(ctx context.ProviderCallContext) ([]string, error) {
  2063  	vpcId := e.ecfg().vpcID()
  2064  	if !isVPCIDSet(vpcId) {
  2065  		if hasDefaultVPC, err := e.hasDefaultVPC(ctx); err == nil && hasDefaultVPC {
  2066  			vpcId = e.defaultVPC.Id
  2067  		}
  2068  	}
  2069  	if !isVPCIDSet(vpcId) {
  2070  		return nil, errors.NotSupportedf("Not a VPC environment")
  2071  	}
  2072  	cidr, err := getVPCCIDR(e.ec2, ctx, vpcId)
  2073  	if err != nil {
  2074  		return nil, err
  2075  	}
  2076  	return []string{cidr}, nil
  2077  }