sigs.k8s.io/cluster-api-provider-aws@v1.5.5/pkg/cloud/services/ec2/instances.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package ec2 18 19 import ( 20 "context" 21 "encoding/base64" 22 "fmt" 23 "sort" 24 "strings" 25 "time" 26 27 "github.com/aws/aws-sdk-go/aws" 28 "github.com/aws/aws-sdk-go/aws/request" 29 "github.com/aws/aws-sdk-go/service/ec2" 30 "github.com/pkg/errors" 31 "k8s.io/utils/pointer" 32 33 infrav1 "sigs.k8s.io/cluster-api-provider-aws/api/v1beta1" 34 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/awserrors" 35 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/converters" 36 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/filter" 37 awslogs "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/logs" 38 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/scope" 39 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/userdata" 40 "sigs.k8s.io/cluster-api-provider-aws/pkg/record" 41 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 42 capierrors "sigs.k8s.io/cluster-api/errors" 43 ) 44 45 // GetRunningInstanceByTags returns the existing instance or nothing if it doesn't exist. 46 func (s *Service) GetRunningInstanceByTags(scope *scope.MachineScope) (*infrav1.Instance, error) { 47 s.scope.V(2).Info("Looking for existing machine instance by tags") 48 49 input := &ec2.DescribeInstancesInput{ 50 Filters: []*ec2.Filter{ 51 filter.EC2.VPC(s.scope.VPC().ID), 52 filter.EC2.ClusterOwned(s.scope.Name()), 53 filter.EC2.Name(scope.Name()), 54 filter.EC2.InstanceStates(ec2.InstanceStateNamePending, ec2.InstanceStateNameRunning), 55 }, 56 } 57 58 out, err := s.EC2Client.DescribeInstances(input) 59 switch { 60 case awserrors.IsNotFound(err): 61 return nil, nil 62 case err != nil: 63 record.Eventf(s.scope.InfraCluster(), "FailedDescribeInstances", "Failed to describe instances by tags: %v", err) 64 return nil, errors.Wrap(err, "failed to describe instances by tags") 65 } 66 67 // TODO: currently just returns the first matched instance, need to 68 // better rationalize how to find the right instance to return if multiple 69 // match 70 for _, res := range out.Reservations { 71 for _, inst := range res.Instances { 72 return s.SDKToInstance(inst) 73 } 74 } 75 76 return nil, nil 77 } 78 79 // InstanceIfExists returns the existing instance by id and errors if it cannot find the instance(ErrInstanceNotFoundByID) or API call fails (ErrDescribeInstance). 80 // Returns empty instance with nil error, only when providerID is nil. 81 func (s *Service) InstanceIfExists(id *string) (*infrav1.Instance, error) { 82 if id == nil { 83 s.scope.Info("Instance does not have an instance id") 84 return nil, nil 85 } 86 87 s.scope.V(2).Info("Looking for instance by id", "instance-id", *id) 88 89 input := &ec2.DescribeInstancesInput{ 90 InstanceIds: []*string{id}, 91 } 92 93 out, err := s.EC2Client.DescribeInstances(input) 94 switch { 95 case awserrors.IsNotFound(err): 96 record.Eventf(s.scope.InfraCluster(), "FailedFindInstances", "failed to find instance by providerId %q: %v", *id, err) 97 return nil, ErrInstanceNotFoundByID 98 case err != nil: 99 record.Eventf(s.scope.InfraCluster(), "FailedDescribeInstances", "failed to describe instance %q: %v", *id, err) 100 return nil, ErrDescribeInstance 101 } 102 103 if len(out.Reservations) > 0 && len(out.Reservations[0].Instances) > 0 { 104 return s.SDKToInstance(out.Reservations[0].Instances[0]) 105 } else { 106 // Failed to find instance with provider id. 107 record.Eventf(s.scope.InfraCluster(), "FailedFindInstances", "failed to find instance by providerId %q: %v", *id, err) 108 return nil, ErrInstanceNotFoundByID 109 } 110 } 111 112 // CreateInstance runs an ec2 instance. 113 func (s *Service) CreateInstance(scope *scope.MachineScope, userData []byte, userDataFormat string) (*infrav1.Instance, error) { 114 s.scope.V(2).Info("Creating an instance for a machine") 115 116 input := &infrav1.Instance{ 117 Type: scope.AWSMachine.Spec.InstanceType, 118 IAMProfile: scope.AWSMachine.Spec.IAMInstanceProfile, 119 RootVolume: scope.AWSMachine.Spec.RootVolume.DeepCopy(), 120 NonRootVolumes: scope.AWSMachine.Spec.NonRootVolumes, 121 NetworkInterfaces: scope.AWSMachine.Spec.NetworkInterfaces, 122 } 123 124 // Make sure to use the MachineScope here to get the merger of AWSCluster and AWSMachine tags 125 additionalTags := scope.AdditionalTags() 126 input.Tags = infrav1.Build(infrav1.BuildParams{ 127 ClusterName: s.scope.KubernetesClusterName(), 128 Lifecycle: infrav1.ResourceLifecycleOwned, 129 Name: aws.String(scope.Name()), 130 Role: aws.String(scope.Role()), 131 Additional: additionalTags, 132 }.WithCloudProvider(s.scope.KubernetesClusterName()).WithMachineName(scope.Machine)) 133 134 var err error 135 // Pick image from the machine configuration, or use a default one. 136 if scope.AWSMachine.Spec.AMI.ID != nil { // nolint:nestif 137 input.ImageID = *scope.AWSMachine.Spec.AMI.ID 138 } else { 139 if scope.Machine.Spec.Version == nil { 140 err := errors.New("Either AWSMachine's spec.ami.id or Machine's spec.version must be defined") 141 scope.SetFailureReason(capierrors.CreateMachineError) 142 scope.SetFailureMessage(err) 143 return nil, err 144 } 145 146 imageLookupFormat := scope.AWSMachine.Spec.ImageLookupFormat 147 if imageLookupFormat == "" { 148 imageLookupFormat = scope.InfraCluster.ImageLookupFormat() 149 } 150 151 imageLookupOrg := scope.AWSMachine.Spec.ImageLookupOrg 152 if imageLookupOrg == "" { 153 imageLookupOrg = scope.InfraCluster.ImageLookupOrg() 154 } 155 156 imageLookupBaseOS := scope.AWSMachine.Spec.ImageLookupBaseOS 157 if imageLookupBaseOS == "" { 158 imageLookupBaseOS = scope.InfraCluster.ImageLookupBaseOS() 159 } 160 161 if scope.IsEKSManaged() && imageLookupFormat == "" && imageLookupOrg == "" && imageLookupBaseOS == "" { 162 input.ImageID, err = s.eksAMILookup(*scope.Machine.Spec.Version, scope.AWSMachine.Spec.AMI.EKSOptimizedLookupType) 163 if err != nil { 164 return nil, err 165 } 166 } else { 167 input.ImageID, err = s.defaultAMIIDLookup(imageLookupFormat, imageLookupOrg, imageLookupBaseOS, *scope.Machine.Spec.Version) 168 if err != nil { 169 return nil, err 170 } 171 } 172 } 173 174 subnetID, err := s.findSubnet(scope) 175 if err != nil { 176 return nil, err 177 } 178 input.SubnetID = subnetID 179 180 if !scope.IsExternallyManaged() && !scope.IsEKSManaged() && s.scope.Network().APIServerELB.DNSName == "" { 181 record.Eventf(s.scope.InfraCluster(), "FailedCreateInstance", "Failed to run controlplane, APIServer ELB not available") 182 183 return nil, awserrors.NewFailedDependency("failed to run controlplane, APIServer ELB not available") 184 } 185 186 if scope.CompressUserData(userDataFormat) { 187 userData, err = userdata.GzipBytes(userData) 188 if err != nil { 189 return nil, errors.New("failed to gzip userdata") 190 } 191 } 192 193 input.UserData = pointer.StringPtr(base64.StdEncoding.EncodeToString(userData)) 194 195 // Set security groups. 196 ids, err := s.GetCoreSecurityGroups(scope) 197 if err != nil { 198 return nil, err 199 } 200 input.SecurityGroupIDs = append(input.SecurityGroupIDs, ids...) 201 202 // If SSHKeyName WAS NOT provided in the AWSMachine Spec, fallback to the value provided in the AWSCluster Spec. 203 // If a value was not provided in the AWSCluster Spec, then use the defaultSSHKeyName 204 // Note that: 205 // - a nil AWSMachine.Spec.SSHKeyName value means use the AWSCluster.Spec.SSHKeyName SSH key name value 206 // - nil values for both AWSCluster.Spec.SSHKeyName and AWSMachine.Spec.SSHKeyName means use the default SSH key name value 207 // - an empty string means do not set an SSH key name at all 208 // - otherwise use the value specified in either AWSMachine or AWSCluster 209 var prioritizedSSHKeyName string 210 switch { 211 case scope.AWSMachine.Spec.SSHKeyName != nil: 212 // prefer AWSMachine.Spec.SSHKeyName if it is defined 213 prioritizedSSHKeyName = *scope.AWSMachine.Spec.SSHKeyName 214 case scope.InfraCluster.SSHKeyName() != nil: 215 // fallback to AWSCluster.Spec.SSHKeyName if it is defined 216 prioritizedSSHKeyName = *scope.InfraCluster.SSHKeyName() 217 default: 218 if !scope.IsExternallyManaged() { 219 prioritizedSSHKeyName = defaultSSHKeyName 220 } 221 } 222 223 // Only set input.SSHKeyName if the user did not explicitly request no ssh key be set (explicitly setting "" on either the Machine or related Cluster) 224 if prioritizedSSHKeyName != "" { 225 input.SSHKeyName = aws.String(prioritizedSSHKeyName) 226 } 227 228 input.SpotMarketOptions = scope.AWSMachine.Spec.SpotMarketOptions 229 230 input.Tenancy = scope.AWSMachine.Spec.Tenancy 231 232 s.scope.V(2).Info("Running instance", "machine-role", scope.Role()) 233 out, err := s.runInstance(scope.Role(), input) 234 if err != nil { 235 // Only record the failure event if the error is not related to failed dependencies. 236 // This is to avoid spamming failure events since the machine will be requeued by the actuator. 237 if !awserrors.IsFailedDependency(errors.Cause(err)) { 238 record.Warnf(scope.AWSMachine, "FailedCreate", "Failed to create instance: %v", err) 239 } 240 return nil, err 241 } 242 243 if len(input.NetworkInterfaces) > 0 { 244 for _, id := range input.NetworkInterfaces { 245 s.scope.V(2).Info("Attaching security groups to provided network interface", "groups", input.SecurityGroupIDs, "interface", id) 246 if err := s.attachSecurityGroupsToNetworkInterface(input.SecurityGroupIDs, id); err != nil { 247 return nil, err 248 } 249 } 250 } 251 252 record.Eventf(scope.AWSMachine, "SuccessfulCreate", "Created new %s instance with id %q", scope.Role(), out.ID) 253 return out, nil 254 } 255 256 // findSubnet attempts to retrieve a subnet ID in the following order: 257 // - subnetID specified in machine configuration, 258 // - subnet based on filters in machine configuration 259 // - subnet based on the availability zone specified, 260 // - default to the first private subnet available. 261 func (s *Service) findSubnet(scope *scope.MachineScope) (string, error) { 262 // Check Machine.Spec.FailureDomain first as it's used by KubeadmControlPlane to spread machines across failure domains. 263 failureDomain := scope.Machine.Spec.FailureDomain 264 if failureDomain == nil { 265 failureDomain = scope.AWSMachine.Spec.FailureDomain 266 } 267 268 // We basically have 2 sources for subnets: 269 // 1. If subnet.id or subnet.filters are specified, we directly query AWS 270 // 2. All other cases use the subnets provided in the cluster network spec without ever calling AWS 271 272 switch { 273 case scope.AWSMachine.Spec.Subnet != nil && (scope.AWSMachine.Spec.Subnet.ID != nil || scope.AWSMachine.Spec.Subnet.Filters != nil): 274 criteria := []*ec2.Filter{ 275 filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable), 276 } 277 if !scope.IsExternallyManaged() { 278 criteria = append(criteria, filter.EC2.VPC(s.scope.VPC().ID)) 279 } 280 if scope.AWSMachine.Spec.Subnet.ID != nil { 281 criteria = append(criteria, &ec2.Filter{Name: aws.String("subnet-id"), Values: aws.StringSlice([]string{*scope.AWSMachine.Spec.Subnet.ID})}) 282 } 283 for _, f := range scope.AWSMachine.Spec.Subnet.Filters { 284 criteria = append(criteria, &ec2.Filter{Name: aws.String(f.Name), Values: aws.StringSlice(f.Values)}) 285 } 286 287 subnets, err := s.getFilteredSubnets(criteria...) 288 if err != nil { 289 return "", errors.Wrapf(err, "failed to filter subnets for criteria %q", criteria) 290 } 291 if len(subnets) == 0 { 292 errMessage := fmt.Sprintf("failed to run machine %q, no subnets available matching criteria %q", 293 scope.Name(), criteria) 294 record.Warnf(scope.AWSMachine, "FailedCreate", errMessage) 295 return "", awserrors.NewFailedDependency(errMessage) 296 } 297 298 var filtered []*ec2.Subnet 299 var errMessage string 300 for _, subnet := range subnets { 301 if failureDomain != nil && *subnet.AvailabilityZone != *failureDomain { 302 // we could have included the failure domain in the query criteria, but then we end up with EC2 error 303 // messages that don't give a good hint about what is really wrong 304 errMessage += fmt.Sprintf(" subnet %q availability zone %q does not match failure domain %q.", 305 *subnet.SubnetId, *subnet.AvailabilityZone, *failureDomain) 306 continue 307 } 308 if scope.AWSMachine.Spec.PublicIP != nil && *scope.AWSMachine.Spec.PublicIP && !*subnet.MapPublicIpOnLaunch { 309 errMessage += fmt.Sprintf(" subnet %q is a private subnet.", *subnet.SubnetId) 310 continue 311 } 312 filtered = append(filtered, subnet) 313 } 314 if len(filtered) == 0 { 315 errMessage = fmt.Sprintf("failed to run machine %q, found %d subnets matching criteria but post-filtering failed.", 316 scope.Name(), len(subnets)) + errMessage 317 record.Warnf(scope.AWSMachine, "FailedCreate", errMessage) 318 return "", awserrors.NewFailedDependency(errMessage) 319 } 320 return *filtered[0].SubnetId, nil 321 case failureDomain != nil: 322 if scope.AWSMachine.Spec.PublicIP != nil && *scope.AWSMachine.Spec.PublicIP { 323 subnets := s.scope.Subnets().FilterPublic().FilterByZone(*failureDomain) 324 if len(subnets) == 0 { 325 errMessage := fmt.Sprintf("failed to run machine %q with public IP, no public subnets available in availability zone %q", 326 scope.Name(), *failureDomain) 327 record.Warnf(scope.AWSMachine, "FailedCreate", errMessage) 328 return "", awserrors.NewFailedDependency(errMessage) 329 } 330 return subnets[0].ID, nil 331 } 332 333 subnets := s.scope.Subnets().FilterPrivate().FilterByZone(*failureDomain) 334 if len(subnets) == 0 { 335 errMessage := fmt.Sprintf("failed to run machine %q, no subnets available in availability zone %q", 336 scope.Name(), *failureDomain) 337 record.Warnf(scope.AWSMachine, "FailedCreate", errMessage) 338 return "", awserrors.NewFailedDependency(errMessage) 339 } 340 return subnets[0].ID, nil 341 case scope.AWSMachine.Spec.PublicIP != nil && *scope.AWSMachine.Spec.PublicIP: 342 subnets := s.scope.Subnets().FilterPublic() 343 if len(subnets) == 0 { 344 errMessage := fmt.Sprintf("failed to run machine %q with public IP, no public subnets available", scope.Name()) 345 record.Eventf(scope.AWSMachine, "FailedCreate", errMessage) 346 return "", awserrors.NewFailedDependency(errMessage) 347 } 348 return subnets[0].ID, nil 349 350 // TODO(vincepri): Define a tag that would allow to pick a preferred subnet in an AZ when working 351 // with control plane machines. 352 353 default: 354 sns := s.scope.Subnets().FilterPrivate() 355 if len(sns) == 0 { 356 errMessage := fmt.Sprintf("failed to run machine %q, no subnets available", scope.Name()) 357 record.Eventf(s.scope.InfraCluster(), "FailedCreateInstance", errMessage) 358 return "", awserrors.NewFailedDependency(errMessage) 359 } 360 return sns[0].ID, nil 361 } 362 } 363 364 // getFilteredSubnets fetches subnets filtered based on the criteria passed. 365 func (s *Service) getFilteredSubnets(criteria ...*ec2.Filter) ([]*ec2.Subnet, error) { 366 out, err := s.EC2Client.DescribeSubnets(&ec2.DescribeSubnetsInput{Filters: criteria}) 367 if err != nil { 368 return nil, err 369 } 370 return out.Subnets, nil 371 } 372 373 // GetCoreSecurityGroups looks up the security group IDs managed by this actuator 374 // They are considered "core" to its proper functioning. 375 func (s *Service) GetCoreSecurityGroups(scope *scope.MachineScope) ([]string, error) { 376 if scope.IsExternallyManaged() { 377 return nil, nil 378 } 379 380 // These are common across both controlplane and node machines 381 sgRoles := []infrav1.SecurityGroupRole{ 382 infrav1.SecurityGroupNode, 383 } 384 385 if !scope.IsEKSManaged() { 386 sgRoles = append(sgRoles, infrav1.SecurityGroupLB) 387 } 388 389 switch scope.Role() { 390 case "node": 391 // Just the common security groups above 392 if scope.IsEKSManaged() { 393 sgRoles = append(sgRoles, infrav1.SecurityGroupEKSNodeAdditional) 394 } 395 case "control-plane": 396 sgRoles = append(sgRoles, infrav1.SecurityGroupControlPlane) 397 default: 398 return nil, errors.Errorf("Unknown node role %q", scope.Role()) 399 } 400 ids := make([]string, 0, len(sgRoles)) 401 for _, sg := range sgRoles { 402 if _, ok := s.scope.SecurityGroups()[sg]; !ok { 403 return nil, awserrors.NewFailedDependency(fmt.Sprintf("%s security group not available", sg)) 404 } 405 ids = append(ids, s.scope.SecurityGroups()[sg].ID) 406 } 407 return ids, nil 408 } 409 410 // GetCoreNodeSecurityGroups looks up the security group IDs managed by this actuator 411 // They are considered "core" to its proper functioning. 412 func (s *Service) GetCoreNodeSecurityGroups(scope *scope.MachinePoolScope) ([]string, error) { 413 // These are common across both controlplane and node machines 414 sgRoles := []infrav1.SecurityGroupRole{ 415 infrav1.SecurityGroupNode, 416 } 417 418 if !scope.IsEKSManaged() { 419 sgRoles = append(sgRoles, infrav1.SecurityGroupLB) 420 } else { 421 sgRoles = append(sgRoles, infrav1.SecurityGroupEKSNodeAdditional) 422 } 423 424 ids := make([]string, 0, len(sgRoles)) 425 for _, sg := range sgRoles { 426 if _, ok := s.scope.SecurityGroups()[sg]; !ok { 427 return nil, awserrors.NewFailedDependency( 428 fmt.Sprintf("%s security group not available", sg), 429 ) 430 } 431 ids = append(ids, s.scope.SecurityGroups()[sg].ID) 432 } 433 return ids, nil 434 } 435 436 // TerminateInstance terminates an EC2 instance. 437 // Returns nil on success, error in all other cases. 438 func (s *Service) TerminateInstance(instanceID string) error { 439 s.scope.V(2).Info("Attempting to terminate instance", "instance-id", instanceID) 440 441 input := &ec2.TerminateInstancesInput{ 442 InstanceIds: aws.StringSlice([]string{instanceID}), 443 } 444 445 if _, err := s.EC2Client.TerminateInstances(input); err != nil { 446 return errors.Wrapf(err, "failed to terminate instance with id %q", instanceID) 447 } 448 449 s.scope.V(2).Info("Terminated instance", "instance-id", instanceID) 450 return nil 451 } 452 453 // TerminateInstanceAndWait terminates and waits 454 // for an EC2 instance to terminate. 455 func (s *Service) TerminateInstanceAndWait(instanceID string) error { 456 if err := s.TerminateInstance(instanceID); err != nil { 457 return err 458 } 459 460 s.scope.V(2).Info("Waiting for EC2 instance to terminate", "instance-id", instanceID) 461 462 input := &ec2.DescribeInstancesInput{ 463 InstanceIds: aws.StringSlice([]string{instanceID}), 464 } 465 466 if err := s.EC2Client.WaitUntilInstanceTerminated(input); err != nil { 467 return errors.Wrapf(err, "failed to wait for instance %q termination", instanceID) 468 } 469 470 return nil 471 } 472 473 func (s *Service) runInstance(role string, i *infrav1.Instance) (*infrav1.Instance, error) { 474 input := &ec2.RunInstancesInput{ 475 InstanceType: aws.String(i.Type), 476 ImageId: aws.String(i.ImageID), 477 KeyName: i.SSHKeyName, 478 EbsOptimized: i.EBSOptimized, 479 MaxCount: aws.Int64(1), 480 MinCount: aws.Int64(1), 481 UserData: i.UserData, 482 } 483 484 s.scope.V(2).Info("userData size", "bytes", len(*i.UserData), "role", role) 485 486 if len(i.NetworkInterfaces) > 0 { 487 netInterfaces := make([]*ec2.InstanceNetworkInterfaceSpecification, 0, len(i.NetworkInterfaces)) 488 489 for index, id := range i.NetworkInterfaces { 490 netInterfaces = append(netInterfaces, &ec2.InstanceNetworkInterfaceSpecification{ 491 NetworkInterfaceId: aws.String(id), 492 DeviceIndex: aws.Int64(int64(index)), 493 }) 494 } 495 496 input.NetworkInterfaces = netInterfaces 497 } else { 498 input.SubnetId = aws.String(i.SubnetID) 499 500 if len(i.SecurityGroupIDs) > 0 { 501 input.SecurityGroupIds = aws.StringSlice(i.SecurityGroupIDs) 502 } 503 } 504 505 if i.IAMProfile != "" { 506 input.IamInstanceProfile = &ec2.IamInstanceProfileSpecification{ 507 Name: aws.String(i.IAMProfile), 508 } 509 } 510 511 blockdeviceMappings := []*ec2.BlockDeviceMapping{} 512 513 if i.RootVolume != nil { 514 rootDeviceName, err := s.checkRootVolume(i.RootVolume, i.ImageID) 515 if err != nil { 516 return nil, err 517 } 518 519 i.RootVolume.DeviceName = aws.StringValue(rootDeviceName) 520 blockDeviceMapping := volumeToBlockDeviceMapping(i.RootVolume) 521 blockdeviceMappings = append(blockdeviceMappings, blockDeviceMapping) 522 } 523 524 for vi := range i.NonRootVolumes { 525 nonRootVolume := i.NonRootVolumes[vi] 526 527 if nonRootVolume.DeviceName == "" { 528 return nil, errors.Errorf("non root volume should have device name specified") 529 } 530 531 blockDeviceMapping := volumeToBlockDeviceMapping(&nonRootVolume) 532 blockdeviceMappings = append(blockdeviceMappings, blockDeviceMapping) 533 } 534 535 if len(blockdeviceMappings) != 0 { 536 input.BlockDeviceMappings = blockdeviceMappings 537 } 538 539 if len(i.Tags) > 0 { 540 spec := &ec2.TagSpecification{ResourceType: aws.String(ec2.ResourceTypeInstance)} 541 // We need to sort keys for tests to work 542 keys := make([]string, 0, len(i.Tags)) 543 for k := range i.Tags { 544 keys = append(keys, k) 545 } 546 sort.Strings(keys) 547 for _, key := range keys { 548 spec.Tags = append(spec.Tags, &ec2.Tag{ 549 Key: aws.String(key), 550 Value: aws.String(i.Tags[key]), 551 }) 552 } 553 554 input.TagSpecifications = append(input.TagSpecifications, spec) 555 } 556 557 input.InstanceMarketOptions = getInstanceMarketOptionsRequest(i.SpotMarketOptions) 558 559 if i.Tenancy != "" { 560 input.Placement = &ec2.Placement{ 561 Tenancy: &i.Tenancy, 562 } 563 } 564 565 out, err := s.EC2Client.RunInstances(input) 566 if err != nil { 567 return nil, errors.Wrap(err, "failed to run instance") 568 } 569 570 if len(out.Instances) == 0 { 571 return nil, errors.Errorf("no instance returned for reservation %v", out.GoString()) 572 } 573 574 waitTimeout := 1 * time.Minute 575 s.scope.V(2).Info("Waiting for instance to be in running state", "instance-id", *out.Instances[0].InstanceId, "timeout", waitTimeout.String()) 576 ctx, cancel := context.WithTimeout(aws.BackgroundContext(), waitTimeout) 577 defer cancel() 578 579 if err := s.EC2Client.WaitUntilInstanceRunningWithContext( 580 ctx, 581 &ec2.DescribeInstancesInput{InstanceIds: []*string{out.Instances[0].InstanceId}}, 582 request.WithWaiterLogger(awslogs.NewWrapLogr(s.scope)), 583 ); err != nil { 584 s.scope.V(2).Info("Could not determine if Machine is running. Machine state might be unavailable until next renconciliation.") 585 } 586 587 return s.SDKToInstance(out.Instances[0]) 588 } 589 590 func volumeToBlockDeviceMapping(v *infrav1.Volume) *ec2.BlockDeviceMapping { 591 ebsDevice := &ec2.EbsBlockDevice{ 592 DeleteOnTermination: aws.Bool(true), 593 VolumeSize: aws.Int64(v.Size), 594 Encrypted: v.Encrypted, 595 } 596 597 if v.Throughput != nil { 598 ebsDevice.Throughput = v.Throughput 599 } 600 601 if v.IOPS != 0 { 602 ebsDevice.Iops = aws.Int64(v.IOPS) 603 } 604 605 if v.EncryptionKey != "" { 606 ebsDevice.Encrypted = aws.Bool(true) 607 ebsDevice.KmsKeyId = aws.String(v.EncryptionKey) 608 } 609 610 if v.Type != "" { 611 ebsDevice.VolumeType = aws.String(string(v.Type)) 612 } 613 614 return &ec2.BlockDeviceMapping{ 615 DeviceName: &v.DeviceName, 616 Ebs: ebsDevice, 617 } 618 } 619 620 // GetInstanceSecurityGroups returns a map from ENI id to the security groups applied to that ENI 621 // While some security group operations take place at the "instance" level, these are in fact an API convenience for manipulating the first ("primary") ENI's properties. 622 func (s *Service) GetInstanceSecurityGroups(instanceID string) (map[string][]string, error) { 623 enis, err := s.getInstanceENIs(instanceID) 624 if err != nil { 625 return nil, errors.Wrapf(err, "failed to get ENIs for instance %q", instanceID) 626 } 627 628 out := make(map[string][]string) 629 for _, eni := range enis { 630 var groups []string 631 for _, group := range eni.Groups { 632 groups = append(groups, aws.StringValue(group.GroupId)) 633 } 634 out[aws.StringValue(eni.NetworkInterfaceId)] = groups 635 } 636 return out, nil 637 } 638 639 // UpdateInstanceSecurityGroups modifies the security groups of the given 640 // EC2 instance. 641 func (s *Service) UpdateInstanceSecurityGroups(instanceID string, ids []string) error { 642 s.scope.V(2).Info("Attempting to update security groups on instance", "instance-id", instanceID) 643 644 enis, err := s.getInstanceENIs(instanceID) 645 if err != nil { 646 return errors.Wrapf(err, "failed to get ENIs for instance %q", instanceID) 647 } 648 649 s.scope.V(3).Info("Found ENIs on instance", "number-of-enis", len(enis), "instance-id", instanceID) 650 651 for _, eni := range enis { 652 if err := s.attachSecurityGroupsToNetworkInterface(ids, aws.StringValue(eni.NetworkInterfaceId)); err != nil { 653 return errors.Wrapf(err, "failed to modify network interfaces on instance %q", instanceID) 654 } 655 } 656 657 return nil 658 } 659 660 // UpdateResourceTags updates the tags for an instance. 661 // This will be called if there is anything to create (update) or delete. 662 // We may not always have to perform each action, so we check what we're 663 // receiving to avoid calling AWS if we don't need to. 664 func (s *Service) UpdateResourceTags(resourceID *string, create, remove map[string]string) error { 665 s.scope.V(2).Info("Attempting to update tags on resource", "resource-id", *resourceID) 666 667 // If we have anything to create or update 668 if len(create) > 0 { 669 s.scope.V(2).Info("Attempting to create tags on resource", "resource-id", *resourceID) 670 671 // Convert our create map into an array of *ec2.Tag 672 createTagsInput := converters.MapToTags(create) 673 674 // Create the CreateTags input. 675 input := &ec2.CreateTagsInput{ 676 Resources: []*string{resourceID}, 677 Tags: createTagsInput, 678 } 679 680 // Create/Update tags in AWS. 681 if _, err := s.EC2Client.CreateTags(input); err != nil { 682 return errors.Wrapf(err, "failed to create tags for resource %q: %+v", *resourceID, create) 683 } 684 } 685 686 // If we have anything to remove 687 if len(remove) > 0 { 688 s.scope.V(2).Info("Attempting to delete tags on resource", "resource-id", *resourceID) 689 690 // Convert our remove map into an array of *ec2.Tag 691 removeTagsInput := converters.MapToTags(remove) 692 693 // Create the DeleteTags input 694 input := &ec2.DeleteTagsInput{ 695 Resources: []*string{resourceID}, 696 Tags: removeTagsInput, 697 } 698 699 // Delete tags in AWS. 700 if _, err := s.EC2Client.DeleteTags(input); err != nil { 701 return errors.Wrapf(err, "failed to delete tags for resource %q: %v", *resourceID, remove) 702 } 703 } 704 705 return nil 706 } 707 708 func (s *Service) getInstanceENIs(instanceID string) ([]*ec2.NetworkInterface, error) { 709 input := &ec2.DescribeNetworkInterfacesInput{ 710 Filters: []*ec2.Filter{ 711 { 712 Name: aws.String("attachment.instance-id"), 713 Values: []*string{aws.String(instanceID)}, 714 }, 715 }, 716 } 717 718 output, err := s.EC2Client.DescribeNetworkInterfaces(input) 719 if err != nil { 720 return nil, err 721 } 722 723 return output.NetworkInterfaces, nil 724 } 725 726 func (s *Service) getImageRootDevice(imageID string) (*string, error) { 727 input := &ec2.DescribeImagesInput{ 728 ImageIds: []*string{aws.String(imageID)}, 729 } 730 731 output, err := s.EC2Client.DescribeImages(input) 732 if err != nil { 733 return nil, err 734 } 735 736 if len(output.Images) == 0 { 737 return nil, errors.Errorf("no images returned when looking up ID %q", imageID) 738 } 739 740 return output.Images[0].RootDeviceName, nil 741 } 742 743 func (s *Service) getImageSnapshotSize(imageID string) (*int64, error) { 744 input := &ec2.DescribeImagesInput{ 745 ImageIds: []*string{aws.String(imageID)}, 746 } 747 748 output, err := s.EC2Client.DescribeImages(input) 749 if err != nil { 750 return nil, err 751 } 752 753 if len(output.Images) == 0 { 754 return nil, errors.Errorf("no images returned when looking up ID %q", imageID) 755 } 756 757 if len(output.Images[0].BlockDeviceMappings) == 0 { 758 return nil, errors.Errorf("no block device mappings returned when looking up ID %q", imageID) 759 } 760 761 if output.Images[0].BlockDeviceMappings[0].Ebs == nil { 762 return nil, errors.Errorf("no EBS returned when looking up ID %q", imageID) 763 } 764 765 if output.Images[0].BlockDeviceMappings[0].Ebs.VolumeSize == nil { 766 return nil, errors.Errorf("no EBS volume size returned when looking up ID %q", imageID) 767 } 768 769 return output.Images[0].BlockDeviceMappings[0].Ebs.VolumeSize, nil 770 } 771 772 // SDKToInstance converts an AWS EC2 SDK instance to the CAPA instance type. 773 // SDKToInstance populates all instance fields except for rootVolumeSize, 774 // because EC2.DescribeInstances does not return the size of storage devices. An 775 // additional call to EC2 is required to get this value. 776 func (s *Service) SDKToInstance(v *ec2.Instance) (*infrav1.Instance, error) { 777 i := &infrav1.Instance{ 778 ID: aws.StringValue(v.InstanceId), 779 State: infrav1.InstanceState(*v.State.Name), 780 Type: aws.StringValue(v.InstanceType), 781 SubnetID: aws.StringValue(v.SubnetId), 782 ImageID: aws.StringValue(v.ImageId), 783 SSHKeyName: v.KeyName, 784 PrivateIP: v.PrivateIpAddress, 785 PublicIP: v.PublicIpAddress, 786 ENASupport: v.EnaSupport, 787 EBSOptimized: v.EbsOptimized, 788 } 789 790 // Extract IAM Instance Profile name from ARN 791 // TODO: Handle this comparison more safely, perhaps by querying IAM for the 792 // instance profile ARN and comparing to the ARN returned by EC2 793 if v.IamInstanceProfile != nil && v.IamInstanceProfile.Arn != nil { 794 split := strings.Split(aws.StringValue(v.IamInstanceProfile.Arn), "instance-profile/") 795 if len(split) > 1 && split[1] != "" { 796 i.IAMProfile = split[1] 797 } 798 } 799 800 for _, sg := range v.SecurityGroups { 801 i.SecurityGroupIDs = append(i.SecurityGroupIDs, *sg.GroupId) 802 } 803 804 if len(v.Tags) > 0 { 805 i.Tags = converters.TagsToMap(v.Tags) 806 } 807 808 i.Addresses = s.getInstanceAddresses(v) 809 810 i.AvailabilityZone = aws.StringValue(v.Placement.AvailabilityZone) 811 812 for _, volume := range v.BlockDeviceMappings { 813 i.VolumeIDs = append(i.VolumeIDs, *volume.Ebs.VolumeId) 814 } 815 816 return i, nil 817 } 818 819 func (s *Service) getInstanceAddresses(instance *ec2.Instance) []clusterv1.MachineAddress { 820 addresses := []clusterv1.MachineAddress{} 821 for _, eni := range instance.NetworkInterfaces { 822 privateDNSAddress := clusterv1.MachineAddress{ 823 Type: clusterv1.MachineInternalDNS, 824 Address: aws.StringValue(eni.PrivateDnsName), 825 } 826 privateIPAddress := clusterv1.MachineAddress{ 827 Type: clusterv1.MachineInternalIP, 828 Address: aws.StringValue(eni.PrivateIpAddress), 829 } 830 addresses = append(addresses, privateDNSAddress, privateIPAddress) 831 832 // An elastic IP is attached if association is non nil pointer 833 if eni.Association != nil { 834 publicDNSAddress := clusterv1.MachineAddress{ 835 Type: clusterv1.MachineExternalDNS, 836 Address: aws.StringValue(eni.Association.PublicDnsName), 837 } 838 publicIPAddress := clusterv1.MachineAddress{ 839 Type: clusterv1.MachineExternalIP, 840 Address: aws.StringValue(eni.Association.PublicIp), 841 } 842 addresses = append(addresses, publicDNSAddress, publicIPAddress) 843 } 844 } 845 return addresses 846 } 847 848 func (s *Service) getNetworkInterfaceSecurityGroups(interfaceID string) ([]string, error) { 849 input := &ec2.DescribeNetworkInterfaceAttributeInput{ 850 Attribute: aws.String("groupSet"), 851 NetworkInterfaceId: aws.String(interfaceID), 852 } 853 854 output, err := s.EC2Client.DescribeNetworkInterfaceAttribute(input) 855 if err != nil { 856 return nil, err 857 } 858 859 groups := make([]string, len(output.Groups)) 860 for i := range output.Groups { 861 groups[i] = aws.StringValue(output.Groups[i].GroupId) 862 } 863 864 return groups, nil 865 } 866 867 func (s *Service) attachSecurityGroupsToNetworkInterface(groups []string, interfaceID string) error { 868 existingGroups, err := s.getNetworkInterfaceSecurityGroups(interfaceID) 869 if err != nil { 870 return errors.Wrapf(err, "failed to look up network interface security groups: %+v", err) 871 } 872 873 totalGroups := make([]string, len(existingGroups)) 874 copy(totalGroups, existingGroups) 875 876 for _, group := range groups { 877 if !containsGroup(existingGroups, group) { 878 totalGroups = append(totalGroups, group) 879 } 880 } 881 882 // no new groups to attach 883 if len(existingGroups) == len(totalGroups) { 884 return nil 885 } 886 887 s.scope.Info("Updating security groups", "groups", totalGroups) 888 889 input := &ec2.ModifyNetworkInterfaceAttributeInput{ 890 NetworkInterfaceId: aws.String(interfaceID), 891 Groups: aws.StringSlice(totalGroups), 892 } 893 894 if _, err := s.EC2Client.ModifyNetworkInterfaceAttribute(input); err != nil { 895 return errors.Wrapf(err, "failed to modify interface %q to have security groups %v", interfaceID, totalGroups) 896 } 897 return nil 898 } 899 900 // DetachSecurityGroupsFromNetworkInterface looks up an ENI by interfaceID and 901 // detaches a list of Security Groups from that ENI. 902 func (s *Service) DetachSecurityGroupsFromNetworkInterface(groups []string, interfaceID string) error { 903 existingGroups, err := s.getNetworkInterfaceSecurityGroups(interfaceID) 904 if err != nil { 905 return errors.Wrapf(err, "failed to look up network interface security groups") 906 } 907 908 remainingGroups := existingGroups 909 for _, group := range groups { 910 remainingGroups = filterGroups(remainingGroups, group) 911 } 912 913 input := &ec2.ModifyNetworkInterfaceAttributeInput{ 914 NetworkInterfaceId: aws.String(interfaceID), 915 Groups: aws.StringSlice(remainingGroups), 916 } 917 918 if _, err := s.EC2Client.ModifyNetworkInterfaceAttribute(input); err != nil { 919 return errors.Wrapf(err, "failed to modify interface %q", interfaceID) 920 } 921 return nil 922 } 923 924 // checkRootVolume checks the input root volume options against the requested AMI's defaults 925 // and returns the AMI's root device name. 926 func (s *Service) checkRootVolume(rootVolume *infrav1.Volume, imageID string) (*string, error) { 927 rootDeviceName, err := s.getImageRootDevice(imageID) 928 if err != nil { 929 return nil, errors.Wrapf(err, "failed to get root volume from image %q", imageID) 930 } 931 932 snapshotSize, err := s.getImageSnapshotSize(imageID) 933 if err != nil { 934 return nil, errors.Wrapf(err, "failed to get root volume from image %q", imageID) 935 } 936 937 if rootVolume.Size < *snapshotSize { 938 return nil, errors.Errorf("root volume size (%d) must be greater than or equal to snapshot size (%d)", rootVolume.Size, *snapshotSize) 939 } 940 941 return rootDeviceName, nil 942 } 943 944 // filterGroups filters a list for a string. 945 func filterGroups(list []string, strToFilter string) (newList []string) { 946 for _, item := range list { 947 if item != strToFilter { 948 newList = append(newList, item) 949 } 950 } 951 return 952 } 953 954 // containsGroup returns true if a list contains a string. 955 func containsGroup(list []string, strToSearch string) bool { 956 for _, item := range list { 957 if item == strToSearch { 958 return true 959 } 960 } 961 return false 962 } 963 964 func getInstanceMarketOptionsRequest(spotMarketOptions *infrav1.SpotMarketOptions) *ec2.InstanceMarketOptionsRequest { 965 if spotMarketOptions == nil { 966 // Instance is not a Spot instance 967 return nil 968 } 969 970 // Set required values for Spot instances 971 spotOptions := &ec2.SpotMarketOptions{} 972 973 // The following two options ensure that: 974 // - If an instance is interrupted, it is terminated rather than hibernating or stopping 975 // - No replacement instance will be created if the instance is interrupted 976 // - If the spot request cannot immediately be fulfilled, it will not be created 977 // This behaviour should satisfy the 1:1 mapping of Machines to Instances as 978 // assumed by the Cluster API. 979 spotOptions.SetInstanceInterruptionBehavior(ec2.InstanceInterruptionBehaviorTerminate) 980 spotOptions.SetSpotInstanceType(ec2.SpotInstanceTypeOneTime) 981 982 maxPrice := spotMarketOptions.MaxPrice 983 if maxPrice != nil && *maxPrice != "" { 984 spotOptions.SetMaxPrice(*maxPrice) 985 } 986 987 instanceMarketOptionsRequest := &ec2.InstanceMarketOptionsRequest{} 988 instanceMarketOptionsRequest.SetMarketType(ec2.MarketTypeSpot) 989 instanceMarketOptionsRequest.SetSpotOptions(spotOptions) 990 991 return instanceMarketOptionsRequest 992 }