sigs.k8s.io/cluster-api-provider-aws@v1.5.5/pkg/cloud/services/ec2/instances.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package ec2
    18  
    19  import (
    20  	"context"
    21  	"encoding/base64"
    22  	"fmt"
    23  	"sort"
    24  	"strings"
    25  	"time"
    26  
    27  	"github.com/aws/aws-sdk-go/aws"
    28  	"github.com/aws/aws-sdk-go/aws/request"
    29  	"github.com/aws/aws-sdk-go/service/ec2"
    30  	"github.com/pkg/errors"
    31  	"k8s.io/utils/pointer"
    32  
    33  	infrav1 "sigs.k8s.io/cluster-api-provider-aws/api/v1beta1"
    34  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/awserrors"
    35  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/converters"
    36  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/filter"
    37  	awslogs "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/logs"
    38  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/scope"
    39  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/userdata"
    40  	"sigs.k8s.io/cluster-api-provider-aws/pkg/record"
    41  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    42  	capierrors "sigs.k8s.io/cluster-api/errors"
    43  )
    44  
    45  // GetRunningInstanceByTags returns the existing instance or nothing if it doesn't exist.
    46  func (s *Service) GetRunningInstanceByTags(scope *scope.MachineScope) (*infrav1.Instance, error) {
    47  	s.scope.V(2).Info("Looking for existing machine instance by tags")
    48  
    49  	input := &ec2.DescribeInstancesInput{
    50  		Filters: []*ec2.Filter{
    51  			filter.EC2.VPC(s.scope.VPC().ID),
    52  			filter.EC2.ClusterOwned(s.scope.Name()),
    53  			filter.EC2.Name(scope.Name()),
    54  			filter.EC2.InstanceStates(ec2.InstanceStateNamePending, ec2.InstanceStateNameRunning),
    55  		},
    56  	}
    57  
    58  	out, err := s.EC2Client.DescribeInstances(input)
    59  	switch {
    60  	case awserrors.IsNotFound(err):
    61  		return nil, nil
    62  	case err != nil:
    63  		record.Eventf(s.scope.InfraCluster(), "FailedDescribeInstances", "Failed to describe instances by tags: %v", err)
    64  		return nil, errors.Wrap(err, "failed to describe instances by tags")
    65  	}
    66  
    67  	// TODO: currently just returns the first matched instance, need to
    68  	// better rationalize how to find the right instance to return if multiple
    69  	// match
    70  	for _, res := range out.Reservations {
    71  		for _, inst := range res.Instances {
    72  			return s.SDKToInstance(inst)
    73  		}
    74  	}
    75  
    76  	return nil, nil
    77  }
    78  
    79  // InstanceIfExists returns the existing instance by id and errors if it cannot find the instance(ErrInstanceNotFoundByID) or API call fails (ErrDescribeInstance).
    80  // Returns empty instance with nil error, only when providerID is nil.
    81  func (s *Service) InstanceIfExists(id *string) (*infrav1.Instance, error) {
    82  	if id == nil {
    83  		s.scope.Info("Instance does not have an instance id")
    84  		return nil, nil
    85  	}
    86  
    87  	s.scope.V(2).Info("Looking for instance by id", "instance-id", *id)
    88  
    89  	input := &ec2.DescribeInstancesInput{
    90  		InstanceIds: []*string{id},
    91  	}
    92  
    93  	out, err := s.EC2Client.DescribeInstances(input)
    94  	switch {
    95  	case awserrors.IsNotFound(err):
    96  		record.Eventf(s.scope.InfraCluster(), "FailedFindInstances", "failed to find instance by providerId %q: %v", *id, err)
    97  		return nil, ErrInstanceNotFoundByID
    98  	case err != nil:
    99  		record.Eventf(s.scope.InfraCluster(), "FailedDescribeInstances", "failed to describe instance %q: %v", *id, err)
   100  		return nil, ErrDescribeInstance
   101  	}
   102  
   103  	if len(out.Reservations) > 0 && len(out.Reservations[0].Instances) > 0 {
   104  		return s.SDKToInstance(out.Reservations[0].Instances[0])
   105  	} else {
   106  		// Failed to find instance with provider id.
   107  		record.Eventf(s.scope.InfraCluster(), "FailedFindInstances", "failed to find instance by providerId %q: %v", *id, err)
   108  		return nil, ErrInstanceNotFoundByID
   109  	}
   110  }
   111  
   112  // CreateInstance runs an ec2 instance.
   113  func (s *Service) CreateInstance(scope *scope.MachineScope, userData []byte, userDataFormat string) (*infrav1.Instance, error) {
   114  	s.scope.V(2).Info("Creating an instance for a machine")
   115  
   116  	input := &infrav1.Instance{
   117  		Type:              scope.AWSMachine.Spec.InstanceType,
   118  		IAMProfile:        scope.AWSMachine.Spec.IAMInstanceProfile,
   119  		RootVolume:        scope.AWSMachine.Spec.RootVolume.DeepCopy(),
   120  		NonRootVolumes:    scope.AWSMachine.Spec.NonRootVolumes,
   121  		NetworkInterfaces: scope.AWSMachine.Spec.NetworkInterfaces,
   122  	}
   123  
   124  	// Make sure to use the MachineScope here to get the merger of AWSCluster and AWSMachine tags
   125  	additionalTags := scope.AdditionalTags()
   126  	input.Tags = infrav1.Build(infrav1.BuildParams{
   127  		ClusterName: s.scope.KubernetesClusterName(),
   128  		Lifecycle:   infrav1.ResourceLifecycleOwned,
   129  		Name:        aws.String(scope.Name()),
   130  		Role:        aws.String(scope.Role()),
   131  		Additional:  additionalTags,
   132  	}.WithCloudProvider(s.scope.KubernetesClusterName()).WithMachineName(scope.Machine))
   133  
   134  	var err error
   135  	// Pick image from the machine configuration, or use a default one.
   136  	if scope.AWSMachine.Spec.AMI.ID != nil { // nolint:nestif
   137  		input.ImageID = *scope.AWSMachine.Spec.AMI.ID
   138  	} else {
   139  		if scope.Machine.Spec.Version == nil {
   140  			err := errors.New("Either AWSMachine's spec.ami.id or Machine's spec.version must be defined")
   141  			scope.SetFailureReason(capierrors.CreateMachineError)
   142  			scope.SetFailureMessage(err)
   143  			return nil, err
   144  		}
   145  
   146  		imageLookupFormat := scope.AWSMachine.Spec.ImageLookupFormat
   147  		if imageLookupFormat == "" {
   148  			imageLookupFormat = scope.InfraCluster.ImageLookupFormat()
   149  		}
   150  
   151  		imageLookupOrg := scope.AWSMachine.Spec.ImageLookupOrg
   152  		if imageLookupOrg == "" {
   153  			imageLookupOrg = scope.InfraCluster.ImageLookupOrg()
   154  		}
   155  
   156  		imageLookupBaseOS := scope.AWSMachine.Spec.ImageLookupBaseOS
   157  		if imageLookupBaseOS == "" {
   158  			imageLookupBaseOS = scope.InfraCluster.ImageLookupBaseOS()
   159  		}
   160  
   161  		if scope.IsEKSManaged() && imageLookupFormat == "" && imageLookupOrg == "" && imageLookupBaseOS == "" {
   162  			input.ImageID, err = s.eksAMILookup(*scope.Machine.Spec.Version, scope.AWSMachine.Spec.AMI.EKSOptimizedLookupType)
   163  			if err != nil {
   164  				return nil, err
   165  			}
   166  		} else {
   167  			input.ImageID, err = s.defaultAMIIDLookup(imageLookupFormat, imageLookupOrg, imageLookupBaseOS, *scope.Machine.Spec.Version)
   168  			if err != nil {
   169  				return nil, err
   170  			}
   171  		}
   172  	}
   173  
   174  	subnetID, err := s.findSubnet(scope)
   175  	if err != nil {
   176  		return nil, err
   177  	}
   178  	input.SubnetID = subnetID
   179  
   180  	if !scope.IsExternallyManaged() && !scope.IsEKSManaged() && s.scope.Network().APIServerELB.DNSName == "" {
   181  		record.Eventf(s.scope.InfraCluster(), "FailedCreateInstance", "Failed to run controlplane, APIServer ELB not available")
   182  
   183  		return nil, awserrors.NewFailedDependency("failed to run controlplane, APIServer ELB not available")
   184  	}
   185  
   186  	if scope.CompressUserData(userDataFormat) {
   187  		userData, err = userdata.GzipBytes(userData)
   188  		if err != nil {
   189  			return nil, errors.New("failed to gzip userdata")
   190  		}
   191  	}
   192  
   193  	input.UserData = pointer.StringPtr(base64.StdEncoding.EncodeToString(userData))
   194  
   195  	// Set security groups.
   196  	ids, err := s.GetCoreSecurityGroups(scope)
   197  	if err != nil {
   198  		return nil, err
   199  	}
   200  	input.SecurityGroupIDs = append(input.SecurityGroupIDs, ids...)
   201  
   202  	// If SSHKeyName WAS NOT provided in the AWSMachine Spec, fallback to the value provided in the AWSCluster Spec.
   203  	// If a value was not provided in the AWSCluster Spec, then use the defaultSSHKeyName
   204  	// Note that:
   205  	// - a nil AWSMachine.Spec.SSHKeyName value means use the AWSCluster.Spec.SSHKeyName SSH key name value
   206  	// - nil values for both AWSCluster.Spec.SSHKeyName and AWSMachine.Spec.SSHKeyName means use the default SSH key name value
   207  	// - an empty string means do not set an SSH key name at all
   208  	// - otherwise use the value specified in either AWSMachine or AWSCluster
   209  	var prioritizedSSHKeyName string
   210  	switch {
   211  	case scope.AWSMachine.Spec.SSHKeyName != nil:
   212  		// prefer AWSMachine.Spec.SSHKeyName if it is defined
   213  		prioritizedSSHKeyName = *scope.AWSMachine.Spec.SSHKeyName
   214  	case scope.InfraCluster.SSHKeyName() != nil:
   215  		// fallback to AWSCluster.Spec.SSHKeyName if it is defined
   216  		prioritizedSSHKeyName = *scope.InfraCluster.SSHKeyName()
   217  	default:
   218  		if !scope.IsExternallyManaged() {
   219  			prioritizedSSHKeyName = defaultSSHKeyName
   220  		}
   221  	}
   222  
   223  	// Only set input.SSHKeyName if the user did not explicitly request no ssh key be set (explicitly setting "" on either the Machine or related Cluster)
   224  	if prioritizedSSHKeyName != "" {
   225  		input.SSHKeyName = aws.String(prioritizedSSHKeyName)
   226  	}
   227  
   228  	input.SpotMarketOptions = scope.AWSMachine.Spec.SpotMarketOptions
   229  
   230  	input.Tenancy = scope.AWSMachine.Spec.Tenancy
   231  
   232  	s.scope.V(2).Info("Running instance", "machine-role", scope.Role())
   233  	out, err := s.runInstance(scope.Role(), input)
   234  	if err != nil {
   235  		// Only record the failure event if the error is not related to failed dependencies.
   236  		// This is to avoid spamming failure events since the machine will be requeued by the actuator.
   237  		if !awserrors.IsFailedDependency(errors.Cause(err)) {
   238  			record.Warnf(scope.AWSMachine, "FailedCreate", "Failed to create instance: %v", err)
   239  		}
   240  		return nil, err
   241  	}
   242  
   243  	if len(input.NetworkInterfaces) > 0 {
   244  		for _, id := range input.NetworkInterfaces {
   245  			s.scope.V(2).Info("Attaching security groups to provided network interface", "groups", input.SecurityGroupIDs, "interface", id)
   246  			if err := s.attachSecurityGroupsToNetworkInterface(input.SecurityGroupIDs, id); err != nil {
   247  				return nil, err
   248  			}
   249  		}
   250  	}
   251  
   252  	record.Eventf(scope.AWSMachine, "SuccessfulCreate", "Created new %s instance with id %q", scope.Role(), out.ID)
   253  	return out, nil
   254  }
   255  
   256  // findSubnet attempts to retrieve a subnet ID in the following order:
   257  // - subnetID specified in machine configuration,
   258  // - subnet based on filters in machine configuration
   259  // - subnet based on the availability zone specified,
   260  // - default to the first private subnet available.
   261  func (s *Service) findSubnet(scope *scope.MachineScope) (string, error) {
   262  	// Check Machine.Spec.FailureDomain first as it's used by KubeadmControlPlane to spread machines across failure domains.
   263  	failureDomain := scope.Machine.Spec.FailureDomain
   264  	if failureDomain == nil {
   265  		failureDomain = scope.AWSMachine.Spec.FailureDomain
   266  	}
   267  
   268  	// We basically have 2 sources for subnets:
   269  	//   1. If subnet.id or subnet.filters are specified, we directly query AWS
   270  	//   2. All other cases use the subnets provided in the cluster network spec without ever calling AWS
   271  
   272  	switch {
   273  	case scope.AWSMachine.Spec.Subnet != nil && (scope.AWSMachine.Spec.Subnet.ID != nil || scope.AWSMachine.Spec.Subnet.Filters != nil):
   274  		criteria := []*ec2.Filter{
   275  			filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
   276  		}
   277  		if !scope.IsExternallyManaged() {
   278  			criteria = append(criteria, filter.EC2.VPC(s.scope.VPC().ID))
   279  		}
   280  		if scope.AWSMachine.Spec.Subnet.ID != nil {
   281  			criteria = append(criteria, &ec2.Filter{Name: aws.String("subnet-id"), Values: aws.StringSlice([]string{*scope.AWSMachine.Spec.Subnet.ID})})
   282  		}
   283  		for _, f := range scope.AWSMachine.Spec.Subnet.Filters {
   284  			criteria = append(criteria, &ec2.Filter{Name: aws.String(f.Name), Values: aws.StringSlice(f.Values)})
   285  		}
   286  
   287  		subnets, err := s.getFilteredSubnets(criteria...)
   288  		if err != nil {
   289  			return "", errors.Wrapf(err, "failed to filter subnets for criteria %q", criteria)
   290  		}
   291  		if len(subnets) == 0 {
   292  			errMessage := fmt.Sprintf("failed to run machine %q, no subnets available matching criteria %q",
   293  				scope.Name(), criteria)
   294  			record.Warnf(scope.AWSMachine, "FailedCreate", errMessage)
   295  			return "", awserrors.NewFailedDependency(errMessage)
   296  		}
   297  
   298  		var filtered []*ec2.Subnet
   299  		var errMessage string
   300  		for _, subnet := range subnets {
   301  			if failureDomain != nil && *subnet.AvailabilityZone != *failureDomain {
   302  				// we could have included the failure domain in the query criteria, but then we end up with EC2 error
   303  				// messages that don't give a good hint about what is really wrong
   304  				errMessage += fmt.Sprintf(" subnet %q availability zone %q does not match failure domain %q.",
   305  					*subnet.SubnetId, *subnet.AvailabilityZone, *failureDomain)
   306  				continue
   307  			}
   308  			if scope.AWSMachine.Spec.PublicIP != nil && *scope.AWSMachine.Spec.PublicIP && !*subnet.MapPublicIpOnLaunch {
   309  				errMessage += fmt.Sprintf(" subnet %q is a private subnet.", *subnet.SubnetId)
   310  				continue
   311  			}
   312  			filtered = append(filtered, subnet)
   313  		}
   314  		if len(filtered) == 0 {
   315  			errMessage = fmt.Sprintf("failed to run machine %q, found %d subnets matching criteria but post-filtering failed.",
   316  				scope.Name(), len(subnets)) + errMessage
   317  			record.Warnf(scope.AWSMachine, "FailedCreate", errMessage)
   318  			return "", awserrors.NewFailedDependency(errMessage)
   319  		}
   320  		return *filtered[0].SubnetId, nil
   321  	case failureDomain != nil:
   322  		if scope.AWSMachine.Spec.PublicIP != nil && *scope.AWSMachine.Spec.PublicIP {
   323  			subnets := s.scope.Subnets().FilterPublic().FilterByZone(*failureDomain)
   324  			if len(subnets) == 0 {
   325  				errMessage := fmt.Sprintf("failed to run machine %q with public IP, no public subnets available in availability zone %q",
   326  					scope.Name(), *failureDomain)
   327  				record.Warnf(scope.AWSMachine, "FailedCreate", errMessage)
   328  				return "", awserrors.NewFailedDependency(errMessage)
   329  			}
   330  			return subnets[0].ID, nil
   331  		}
   332  
   333  		subnets := s.scope.Subnets().FilterPrivate().FilterByZone(*failureDomain)
   334  		if len(subnets) == 0 {
   335  			errMessage := fmt.Sprintf("failed to run machine %q, no subnets available in availability zone %q",
   336  				scope.Name(), *failureDomain)
   337  			record.Warnf(scope.AWSMachine, "FailedCreate", errMessage)
   338  			return "", awserrors.NewFailedDependency(errMessage)
   339  		}
   340  		return subnets[0].ID, nil
   341  	case scope.AWSMachine.Spec.PublicIP != nil && *scope.AWSMachine.Spec.PublicIP:
   342  		subnets := s.scope.Subnets().FilterPublic()
   343  		if len(subnets) == 0 {
   344  			errMessage := fmt.Sprintf("failed to run machine %q with public IP, no public subnets available", scope.Name())
   345  			record.Eventf(scope.AWSMachine, "FailedCreate", errMessage)
   346  			return "", awserrors.NewFailedDependency(errMessage)
   347  		}
   348  		return subnets[0].ID, nil
   349  
   350  		// TODO(vincepri): Define a tag that would allow to pick a preferred subnet in an AZ when working
   351  		// with control plane machines.
   352  
   353  	default:
   354  		sns := s.scope.Subnets().FilterPrivate()
   355  		if len(sns) == 0 {
   356  			errMessage := fmt.Sprintf("failed to run machine %q, no subnets available", scope.Name())
   357  			record.Eventf(s.scope.InfraCluster(), "FailedCreateInstance", errMessage)
   358  			return "", awserrors.NewFailedDependency(errMessage)
   359  		}
   360  		return sns[0].ID, nil
   361  	}
   362  }
   363  
   364  // getFilteredSubnets fetches subnets filtered based on the criteria passed.
   365  func (s *Service) getFilteredSubnets(criteria ...*ec2.Filter) ([]*ec2.Subnet, error) {
   366  	out, err := s.EC2Client.DescribeSubnets(&ec2.DescribeSubnetsInput{Filters: criteria})
   367  	if err != nil {
   368  		return nil, err
   369  	}
   370  	return out.Subnets, nil
   371  }
   372  
   373  // GetCoreSecurityGroups looks up the security group IDs managed by this actuator
   374  // They are considered "core" to its proper functioning.
   375  func (s *Service) GetCoreSecurityGroups(scope *scope.MachineScope) ([]string, error) {
   376  	if scope.IsExternallyManaged() {
   377  		return nil, nil
   378  	}
   379  
   380  	// These are common across both controlplane and node machines
   381  	sgRoles := []infrav1.SecurityGroupRole{
   382  		infrav1.SecurityGroupNode,
   383  	}
   384  
   385  	if !scope.IsEKSManaged() {
   386  		sgRoles = append(sgRoles, infrav1.SecurityGroupLB)
   387  	}
   388  
   389  	switch scope.Role() {
   390  	case "node":
   391  		// Just the common security groups above
   392  		if scope.IsEKSManaged() {
   393  			sgRoles = append(sgRoles, infrav1.SecurityGroupEKSNodeAdditional)
   394  		}
   395  	case "control-plane":
   396  		sgRoles = append(sgRoles, infrav1.SecurityGroupControlPlane)
   397  	default:
   398  		return nil, errors.Errorf("Unknown node role %q", scope.Role())
   399  	}
   400  	ids := make([]string, 0, len(sgRoles))
   401  	for _, sg := range sgRoles {
   402  		if _, ok := s.scope.SecurityGroups()[sg]; !ok {
   403  			return nil, awserrors.NewFailedDependency(fmt.Sprintf("%s security group not available", sg))
   404  		}
   405  		ids = append(ids, s.scope.SecurityGroups()[sg].ID)
   406  	}
   407  	return ids, nil
   408  }
   409  
   410  // GetCoreNodeSecurityGroups looks up the security group IDs managed by this actuator
   411  // They are considered "core" to its proper functioning.
   412  func (s *Service) GetCoreNodeSecurityGroups(scope *scope.MachinePoolScope) ([]string, error) {
   413  	// These are common across both controlplane and node machines
   414  	sgRoles := []infrav1.SecurityGroupRole{
   415  		infrav1.SecurityGroupNode,
   416  	}
   417  
   418  	if !scope.IsEKSManaged() {
   419  		sgRoles = append(sgRoles, infrav1.SecurityGroupLB)
   420  	} else {
   421  		sgRoles = append(sgRoles, infrav1.SecurityGroupEKSNodeAdditional)
   422  	}
   423  
   424  	ids := make([]string, 0, len(sgRoles))
   425  	for _, sg := range sgRoles {
   426  		if _, ok := s.scope.SecurityGroups()[sg]; !ok {
   427  			return nil, awserrors.NewFailedDependency(
   428  				fmt.Sprintf("%s security group not available", sg),
   429  			)
   430  		}
   431  		ids = append(ids, s.scope.SecurityGroups()[sg].ID)
   432  	}
   433  	return ids, nil
   434  }
   435  
   436  // TerminateInstance terminates an EC2 instance.
   437  // Returns nil on success, error in all other cases.
   438  func (s *Service) TerminateInstance(instanceID string) error {
   439  	s.scope.V(2).Info("Attempting to terminate instance", "instance-id", instanceID)
   440  
   441  	input := &ec2.TerminateInstancesInput{
   442  		InstanceIds: aws.StringSlice([]string{instanceID}),
   443  	}
   444  
   445  	if _, err := s.EC2Client.TerminateInstances(input); err != nil {
   446  		return errors.Wrapf(err, "failed to terminate instance with id %q", instanceID)
   447  	}
   448  
   449  	s.scope.V(2).Info("Terminated instance", "instance-id", instanceID)
   450  	return nil
   451  }
   452  
   453  // TerminateInstanceAndWait terminates and waits
   454  // for an EC2 instance to terminate.
   455  func (s *Service) TerminateInstanceAndWait(instanceID string) error {
   456  	if err := s.TerminateInstance(instanceID); err != nil {
   457  		return err
   458  	}
   459  
   460  	s.scope.V(2).Info("Waiting for EC2 instance to terminate", "instance-id", instanceID)
   461  
   462  	input := &ec2.DescribeInstancesInput{
   463  		InstanceIds: aws.StringSlice([]string{instanceID}),
   464  	}
   465  
   466  	if err := s.EC2Client.WaitUntilInstanceTerminated(input); err != nil {
   467  		return errors.Wrapf(err, "failed to wait for instance %q termination", instanceID)
   468  	}
   469  
   470  	return nil
   471  }
   472  
   473  func (s *Service) runInstance(role string, i *infrav1.Instance) (*infrav1.Instance, error) {
   474  	input := &ec2.RunInstancesInput{
   475  		InstanceType: aws.String(i.Type),
   476  		ImageId:      aws.String(i.ImageID),
   477  		KeyName:      i.SSHKeyName,
   478  		EbsOptimized: i.EBSOptimized,
   479  		MaxCount:     aws.Int64(1),
   480  		MinCount:     aws.Int64(1),
   481  		UserData:     i.UserData,
   482  	}
   483  
   484  	s.scope.V(2).Info("userData size", "bytes", len(*i.UserData), "role", role)
   485  
   486  	if len(i.NetworkInterfaces) > 0 {
   487  		netInterfaces := make([]*ec2.InstanceNetworkInterfaceSpecification, 0, len(i.NetworkInterfaces))
   488  
   489  		for index, id := range i.NetworkInterfaces {
   490  			netInterfaces = append(netInterfaces, &ec2.InstanceNetworkInterfaceSpecification{
   491  				NetworkInterfaceId: aws.String(id),
   492  				DeviceIndex:        aws.Int64(int64(index)),
   493  			})
   494  		}
   495  
   496  		input.NetworkInterfaces = netInterfaces
   497  	} else {
   498  		input.SubnetId = aws.String(i.SubnetID)
   499  
   500  		if len(i.SecurityGroupIDs) > 0 {
   501  			input.SecurityGroupIds = aws.StringSlice(i.SecurityGroupIDs)
   502  		}
   503  	}
   504  
   505  	if i.IAMProfile != "" {
   506  		input.IamInstanceProfile = &ec2.IamInstanceProfileSpecification{
   507  			Name: aws.String(i.IAMProfile),
   508  		}
   509  	}
   510  
   511  	blockdeviceMappings := []*ec2.BlockDeviceMapping{}
   512  
   513  	if i.RootVolume != nil {
   514  		rootDeviceName, err := s.checkRootVolume(i.RootVolume, i.ImageID)
   515  		if err != nil {
   516  			return nil, err
   517  		}
   518  
   519  		i.RootVolume.DeviceName = aws.StringValue(rootDeviceName)
   520  		blockDeviceMapping := volumeToBlockDeviceMapping(i.RootVolume)
   521  		blockdeviceMappings = append(blockdeviceMappings, blockDeviceMapping)
   522  	}
   523  
   524  	for vi := range i.NonRootVolumes {
   525  		nonRootVolume := i.NonRootVolumes[vi]
   526  
   527  		if nonRootVolume.DeviceName == "" {
   528  			return nil, errors.Errorf("non root volume should have device name specified")
   529  		}
   530  
   531  		blockDeviceMapping := volumeToBlockDeviceMapping(&nonRootVolume)
   532  		blockdeviceMappings = append(blockdeviceMappings, blockDeviceMapping)
   533  	}
   534  
   535  	if len(blockdeviceMappings) != 0 {
   536  		input.BlockDeviceMappings = blockdeviceMappings
   537  	}
   538  
   539  	if len(i.Tags) > 0 {
   540  		spec := &ec2.TagSpecification{ResourceType: aws.String(ec2.ResourceTypeInstance)}
   541  		// We need to sort keys for tests to work
   542  		keys := make([]string, 0, len(i.Tags))
   543  		for k := range i.Tags {
   544  			keys = append(keys, k)
   545  		}
   546  		sort.Strings(keys)
   547  		for _, key := range keys {
   548  			spec.Tags = append(spec.Tags, &ec2.Tag{
   549  				Key:   aws.String(key),
   550  				Value: aws.String(i.Tags[key]),
   551  			})
   552  		}
   553  
   554  		input.TagSpecifications = append(input.TagSpecifications, spec)
   555  	}
   556  
   557  	input.InstanceMarketOptions = getInstanceMarketOptionsRequest(i.SpotMarketOptions)
   558  
   559  	if i.Tenancy != "" {
   560  		input.Placement = &ec2.Placement{
   561  			Tenancy: &i.Tenancy,
   562  		}
   563  	}
   564  
   565  	out, err := s.EC2Client.RunInstances(input)
   566  	if err != nil {
   567  		return nil, errors.Wrap(err, "failed to run instance")
   568  	}
   569  
   570  	if len(out.Instances) == 0 {
   571  		return nil, errors.Errorf("no instance returned for reservation %v", out.GoString())
   572  	}
   573  
   574  	waitTimeout := 1 * time.Minute
   575  	s.scope.V(2).Info("Waiting for instance to be in running state", "instance-id", *out.Instances[0].InstanceId, "timeout", waitTimeout.String())
   576  	ctx, cancel := context.WithTimeout(aws.BackgroundContext(), waitTimeout)
   577  	defer cancel()
   578  
   579  	if err := s.EC2Client.WaitUntilInstanceRunningWithContext(
   580  		ctx,
   581  		&ec2.DescribeInstancesInput{InstanceIds: []*string{out.Instances[0].InstanceId}},
   582  		request.WithWaiterLogger(awslogs.NewWrapLogr(s.scope)),
   583  	); err != nil {
   584  		s.scope.V(2).Info("Could not determine if Machine is running. Machine state might be unavailable until next renconciliation.")
   585  	}
   586  
   587  	return s.SDKToInstance(out.Instances[0])
   588  }
   589  
   590  func volumeToBlockDeviceMapping(v *infrav1.Volume) *ec2.BlockDeviceMapping {
   591  	ebsDevice := &ec2.EbsBlockDevice{
   592  		DeleteOnTermination: aws.Bool(true),
   593  		VolumeSize:          aws.Int64(v.Size),
   594  		Encrypted:           v.Encrypted,
   595  	}
   596  
   597  	if v.Throughput != nil {
   598  		ebsDevice.Throughput = v.Throughput
   599  	}
   600  
   601  	if v.IOPS != 0 {
   602  		ebsDevice.Iops = aws.Int64(v.IOPS)
   603  	}
   604  
   605  	if v.EncryptionKey != "" {
   606  		ebsDevice.Encrypted = aws.Bool(true)
   607  		ebsDevice.KmsKeyId = aws.String(v.EncryptionKey)
   608  	}
   609  
   610  	if v.Type != "" {
   611  		ebsDevice.VolumeType = aws.String(string(v.Type))
   612  	}
   613  
   614  	return &ec2.BlockDeviceMapping{
   615  		DeviceName: &v.DeviceName,
   616  		Ebs:        ebsDevice,
   617  	}
   618  }
   619  
   620  // GetInstanceSecurityGroups returns a map from ENI id to the security groups applied to that ENI
   621  // While some security group operations take place at the "instance" level, these are in fact an API convenience for manipulating the first ("primary") ENI's properties.
   622  func (s *Service) GetInstanceSecurityGroups(instanceID string) (map[string][]string, error) {
   623  	enis, err := s.getInstanceENIs(instanceID)
   624  	if err != nil {
   625  		return nil, errors.Wrapf(err, "failed to get ENIs for instance %q", instanceID)
   626  	}
   627  
   628  	out := make(map[string][]string)
   629  	for _, eni := range enis {
   630  		var groups []string
   631  		for _, group := range eni.Groups {
   632  			groups = append(groups, aws.StringValue(group.GroupId))
   633  		}
   634  		out[aws.StringValue(eni.NetworkInterfaceId)] = groups
   635  	}
   636  	return out, nil
   637  }
   638  
   639  // UpdateInstanceSecurityGroups modifies the security groups of the given
   640  // EC2 instance.
   641  func (s *Service) UpdateInstanceSecurityGroups(instanceID string, ids []string) error {
   642  	s.scope.V(2).Info("Attempting to update security groups on instance", "instance-id", instanceID)
   643  
   644  	enis, err := s.getInstanceENIs(instanceID)
   645  	if err != nil {
   646  		return errors.Wrapf(err, "failed to get ENIs for instance %q", instanceID)
   647  	}
   648  
   649  	s.scope.V(3).Info("Found ENIs on instance", "number-of-enis", len(enis), "instance-id", instanceID)
   650  
   651  	for _, eni := range enis {
   652  		if err := s.attachSecurityGroupsToNetworkInterface(ids, aws.StringValue(eni.NetworkInterfaceId)); err != nil {
   653  			return errors.Wrapf(err, "failed to modify network interfaces on instance %q", instanceID)
   654  		}
   655  	}
   656  
   657  	return nil
   658  }
   659  
   660  // UpdateResourceTags updates the tags for an instance.
   661  // This will be called if there is anything to create (update) or delete.
   662  // We may not always have to perform each action, so we check what we're
   663  // receiving to avoid calling AWS if we don't need to.
   664  func (s *Service) UpdateResourceTags(resourceID *string, create, remove map[string]string) error {
   665  	s.scope.V(2).Info("Attempting to update tags on resource", "resource-id", *resourceID)
   666  
   667  	// If we have anything to create or update
   668  	if len(create) > 0 {
   669  		s.scope.V(2).Info("Attempting to create tags on resource", "resource-id", *resourceID)
   670  
   671  		// Convert our create map into an array of *ec2.Tag
   672  		createTagsInput := converters.MapToTags(create)
   673  
   674  		// Create the CreateTags input.
   675  		input := &ec2.CreateTagsInput{
   676  			Resources: []*string{resourceID},
   677  			Tags:      createTagsInput,
   678  		}
   679  
   680  		// Create/Update tags in AWS.
   681  		if _, err := s.EC2Client.CreateTags(input); err != nil {
   682  			return errors.Wrapf(err, "failed to create tags for resource %q: %+v", *resourceID, create)
   683  		}
   684  	}
   685  
   686  	// If we have anything to remove
   687  	if len(remove) > 0 {
   688  		s.scope.V(2).Info("Attempting to delete tags on resource", "resource-id", *resourceID)
   689  
   690  		// Convert our remove map into an array of *ec2.Tag
   691  		removeTagsInput := converters.MapToTags(remove)
   692  
   693  		// Create the DeleteTags input
   694  		input := &ec2.DeleteTagsInput{
   695  			Resources: []*string{resourceID},
   696  			Tags:      removeTagsInput,
   697  		}
   698  
   699  		// Delete tags in AWS.
   700  		if _, err := s.EC2Client.DeleteTags(input); err != nil {
   701  			return errors.Wrapf(err, "failed to delete tags for resource %q: %v", *resourceID, remove)
   702  		}
   703  	}
   704  
   705  	return nil
   706  }
   707  
   708  func (s *Service) getInstanceENIs(instanceID string) ([]*ec2.NetworkInterface, error) {
   709  	input := &ec2.DescribeNetworkInterfacesInput{
   710  		Filters: []*ec2.Filter{
   711  			{
   712  				Name:   aws.String("attachment.instance-id"),
   713  				Values: []*string{aws.String(instanceID)},
   714  			},
   715  		},
   716  	}
   717  
   718  	output, err := s.EC2Client.DescribeNetworkInterfaces(input)
   719  	if err != nil {
   720  		return nil, err
   721  	}
   722  
   723  	return output.NetworkInterfaces, nil
   724  }
   725  
   726  func (s *Service) getImageRootDevice(imageID string) (*string, error) {
   727  	input := &ec2.DescribeImagesInput{
   728  		ImageIds: []*string{aws.String(imageID)},
   729  	}
   730  
   731  	output, err := s.EC2Client.DescribeImages(input)
   732  	if err != nil {
   733  		return nil, err
   734  	}
   735  
   736  	if len(output.Images) == 0 {
   737  		return nil, errors.Errorf("no images returned when looking up ID %q", imageID)
   738  	}
   739  
   740  	return output.Images[0].RootDeviceName, nil
   741  }
   742  
   743  func (s *Service) getImageSnapshotSize(imageID string) (*int64, error) {
   744  	input := &ec2.DescribeImagesInput{
   745  		ImageIds: []*string{aws.String(imageID)},
   746  	}
   747  
   748  	output, err := s.EC2Client.DescribeImages(input)
   749  	if err != nil {
   750  		return nil, err
   751  	}
   752  
   753  	if len(output.Images) == 0 {
   754  		return nil, errors.Errorf("no images returned when looking up ID %q", imageID)
   755  	}
   756  
   757  	if len(output.Images[0].BlockDeviceMappings) == 0 {
   758  		return nil, errors.Errorf("no block device mappings returned when looking up ID %q", imageID)
   759  	}
   760  
   761  	if output.Images[0].BlockDeviceMappings[0].Ebs == nil {
   762  		return nil, errors.Errorf("no EBS returned when looking up ID %q", imageID)
   763  	}
   764  
   765  	if output.Images[0].BlockDeviceMappings[0].Ebs.VolumeSize == nil {
   766  		return nil, errors.Errorf("no EBS volume size returned when looking up ID %q", imageID)
   767  	}
   768  
   769  	return output.Images[0].BlockDeviceMappings[0].Ebs.VolumeSize, nil
   770  }
   771  
   772  // SDKToInstance converts an AWS EC2 SDK instance to the CAPA instance type.
   773  // SDKToInstance populates all instance fields except for rootVolumeSize,
   774  // because EC2.DescribeInstances does not return the size of storage devices. An
   775  // additional call to EC2 is required to get this value.
   776  func (s *Service) SDKToInstance(v *ec2.Instance) (*infrav1.Instance, error) {
   777  	i := &infrav1.Instance{
   778  		ID:           aws.StringValue(v.InstanceId),
   779  		State:        infrav1.InstanceState(*v.State.Name),
   780  		Type:         aws.StringValue(v.InstanceType),
   781  		SubnetID:     aws.StringValue(v.SubnetId),
   782  		ImageID:      aws.StringValue(v.ImageId),
   783  		SSHKeyName:   v.KeyName,
   784  		PrivateIP:    v.PrivateIpAddress,
   785  		PublicIP:     v.PublicIpAddress,
   786  		ENASupport:   v.EnaSupport,
   787  		EBSOptimized: v.EbsOptimized,
   788  	}
   789  
   790  	// Extract IAM Instance Profile name from ARN
   791  	// TODO: Handle this comparison more safely, perhaps by querying IAM for the
   792  	// instance profile ARN and comparing to the ARN returned by EC2
   793  	if v.IamInstanceProfile != nil && v.IamInstanceProfile.Arn != nil {
   794  		split := strings.Split(aws.StringValue(v.IamInstanceProfile.Arn), "instance-profile/")
   795  		if len(split) > 1 && split[1] != "" {
   796  			i.IAMProfile = split[1]
   797  		}
   798  	}
   799  
   800  	for _, sg := range v.SecurityGroups {
   801  		i.SecurityGroupIDs = append(i.SecurityGroupIDs, *sg.GroupId)
   802  	}
   803  
   804  	if len(v.Tags) > 0 {
   805  		i.Tags = converters.TagsToMap(v.Tags)
   806  	}
   807  
   808  	i.Addresses = s.getInstanceAddresses(v)
   809  
   810  	i.AvailabilityZone = aws.StringValue(v.Placement.AvailabilityZone)
   811  
   812  	for _, volume := range v.BlockDeviceMappings {
   813  		i.VolumeIDs = append(i.VolumeIDs, *volume.Ebs.VolumeId)
   814  	}
   815  
   816  	return i, nil
   817  }
   818  
   819  func (s *Service) getInstanceAddresses(instance *ec2.Instance) []clusterv1.MachineAddress {
   820  	addresses := []clusterv1.MachineAddress{}
   821  	for _, eni := range instance.NetworkInterfaces {
   822  		privateDNSAddress := clusterv1.MachineAddress{
   823  			Type:    clusterv1.MachineInternalDNS,
   824  			Address: aws.StringValue(eni.PrivateDnsName),
   825  		}
   826  		privateIPAddress := clusterv1.MachineAddress{
   827  			Type:    clusterv1.MachineInternalIP,
   828  			Address: aws.StringValue(eni.PrivateIpAddress),
   829  		}
   830  		addresses = append(addresses, privateDNSAddress, privateIPAddress)
   831  
   832  		// An elastic IP is attached if association is non nil pointer
   833  		if eni.Association != nil {
   834  			publicDNSAddress := clusterv1.MachineAddress{
   835  				Type:    clusterv1.MachineExternalDNS,
   836  				Address: aws.StringValue(eni.Association.PublicDnsName),
   837  			}
   838  			publicIPAddress := clusterv1.MachineAddress{
   839  				Type:    clusterv1.MachineExternalIP,
   840  				Address: aws.StringValue(eni.Association.PublicIp),
   841  			}
   842  			addresses = append(addresses, publicDNSAddress, publicIPAddress)
   843  		}
   844  	}
   845  	return addresses
   846  }
   847  
   848  func (s *Service) getNetworkInterfaceSecurityGroups(interfaceID string) ([]string, error) {
   849  	input := &ec2.DescribeNetworkInterfaceAttributeInput{
   850  		Attribute:          aws.String("groupSet"),
   851  		NetworkInterfaceId: aws.String(interfaceID),
   852  	}
   853  
   854  	output, err := s.EC2Client.DescribeNetworkInterfaceAttribute(input)
   855  	if err != nil {
   856  		return nil, err
   857  	}
   858  
   859  	groups := make([]string, len(output.Groups))
   860  	for i := range output.Groups {
   861  		groups[i] = aws.StringValue(output.Groups[i].GroupId)
   862  	}
   863  
   864  	return groups, nil
   865  }
   866  
   867  func (s *Service) attachSecurityGroupsToNetworkInterface(groups []string, interfaceID string) error {
   868  	existingGroups, err := s.getNetworkInterfaceSecurityGroups(interfaceID)
   869  	if err != nil {
   870  		return errors.Wrapf(err, "failed to look up network interface security groups: %+v", err)
   871  	}
   872  
   873  	totalGroups := make([]string, len(existingGroups))
   874  	copy(totalGroups, existingGroups)
   875  
   876  	for _, group := range groups {
   877  		if !containsGroup(existingGroups, group) {
   878  			totalGroups = append(totalGroups, group)
   879  		}
   880  	}
   881  
   882  	// no new groups to attach
   883  	if len(existingGroups) == len(totalGroups) {
   884  		return nil
   885  	}
   886  
   887  	s.scope.Info("Updating security groups", "groups", totalGroups)
   888  
   889  	input := &ec2.ModifyNetworkInterfaceAttributeInput{
   890  		NetworkInterfaceId: aws.String(interfaceID),
   891  		Groups:             aws.StringSlice(totalGroups),
   892  	}
   893  
   894  	if _, err := s.EC2Client.ModifyNetworkInterfaceAttribute(input); err != nil {
   895  		return errors.Wrapf(err, "failed to modify interface %q to have security groups %v", interfaceID, totalGroups)
   896  	}
   897  	return nil
   898  }
   899  
   900  // DetachSecurityGroupsFromNetworkInterface looks up an ENI by interfaceID and
   901  // detaches a list of Security Groups from that ENI.
   902  func (s *Service) DetachSecurityGroupsFromNetworkInterface(groups []string, interfaceID string) error {
   903  	existingGroups, err := s.getNetworkInterfaceSecurityGroups(interfaceID)
   904  	if err != nil {
   905  		return errors.Wrapf(err, "failed to look up network interface security groups")
   906  	}
   907  
   908  	remainingGroups := existingGroups
   909  	for _, group := range groups {
   910  		remainingGroups = filterGroups(remainingGroups, group)
   911  	}
   912  
   913  	input := &ec2.ModifyNetworkInterfaceAttributeInput{
   914  		NetworkInterfaceId: aws.String(interfaceID),
   915  		Groups:             aws.StringSlice(remainingGroups),
   916  	}
   917  
   918  	if _, err := s.EC2Client.ModifyNetworkInterfaceAttribute(input); err != nil {
   919  		return errors.Wrapf(err, "failed to modify interface %q", interfaceID)
   920  	}
   921  	return nil
   922  }
   923  
   924  // checkRootVolume checks the input root volume options against the requested AMI's defaults
   925  // and returns the AMI's root device name.
   926  func (s *Service) checkRootVolume(rootVolume *infrav1.Volume, imageID string) (*string, error) {
   927  	rootDeviceName, err := s.getImageRootDevice(imageID)
   928  	if err != nil {
   929  		return nil, errors.Wrapf(err, "failed to get root volume from image %q", imageID)
   930  	}
   931  
   932  	snapshotSize, err := s.getImageSnapshotSize(imageID)
   933  	if err != nil {
   934  		return nil, errors.Wrapf(err, "failed to get root volume from image %q", imageID)
   935  	}
   936  
   937  	if rootVolume.Size < *snapshotSize {
   938  		return nil, errors.Errorf("root volume size (%d) must be greater than or equal to snapshot size (%d)", rootVolume.Size, *snapshotSize)
   939  	}
   940  
   941  	return rootDeviceName, nil
   942  }
   943  
   944  // filterGroups filters a list for a string.
   945  func filterGroups(list []string, strToFilter string) (newList []string) {
   946  	for _, item := range list {
   947  		if item != strToFilter {
   948  			newList = append(newList, item)
   949  		}
   950  	}
   951  	return
   952  }
   953  
   954  // containsGroup returns true if a list contains a string.
   955  func containsGroup(list []string, strToSearch string) bool {
   956  	for _, item := range list {
   957  		if item == strToSearch {
   958  			return true
   959  		}
   960  	}
   961  	return false
   962  }
   963  
   964  func getInstanceMarketOptionsRequest(spotMarketOptions *infrav1.SpotMarketOptions) *ec2.InstanceMarketOptionsRequest {
   965  	if spotMarketOptions == nil {
   966  		// Instance is not a Spot instance
   967  		return nil
   968  	}
   969  
   970  	// Set required values for Spot instances
   971  	spotOptions := &ec2.SpotMarketOptions{}
   972  
   973  	// The following two options ensure that:
   974  	// - If an instance is interrupted, it is terminated rather than hibernating or stopping
   975  	// - No replacement instance will be created if the instance is interrupted
   976  	// - If the spot request cannot immediately be fulfilled, it will not be created
   977  	// This behaviour should satisfy the 1:1 mapping of Machines to Instances as
   978  	// assumed by the Cluster API.
   979  	spotOptions.SetInstanceInterruptionBehavior(ec2.InstanceInterruptionBehaviorTerminate)
   980  	spotOptions.SetSpotInstanceType(ec2.SpotInstanceTypeOneTime)
   981  
   982  	maxPrice := spotMarketOptions.MaxPrice
   983  	if maxPrice != nil && *maxPrice != "" {
   984  		spotOptions.SetMaxPrice(*maxPrice)
   985  	}
   986  
   987  	instanceMarketOptionsRequest := &ec2.InstanceMarketOptionsRequest{}
   988  	instanceMarketOptionsRequest.SetMarketType(ec2.MarketTypeSpot)
   989  	instanceMarketOptionsRequest.SetSpotOptions(spotOptions)
   990  
   991  	return instanceMarketOptionsRequest
   992  }