sigs.k8s.io/cluster-api-provider-aws@v1.5.5/pkg/cloud/services/network/natgateways.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package network
    18  
    19  import (
    20  	"fmt"
    21  
    22  	"github.com/aws/aws-sdk-go/aws"
    23  	"github.com/aws/aws-sdk-go/service/ec2"
    24  	"github.com/pkg/errors"
    25  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    26  
    27  	infrav1 "sigs.k8s.io/cluster-api-provider-aws/api/v1beta1"
    28  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/awserrors"
    29  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/converters"
    30  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/filter"
    31  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services"
    32  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/wait"
    33  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/tags"
    34  	"sigs.k8s.io/cluster-api-provider-aws/pkg/record"
    35  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    36  	"sigs.k8s.io/cluster-api/util/conditions"
    37  )
    38  
    39  func (s *Service) reconcileNatGateways() error {
    40  	if s.scope.VPC().IsUnmanaged(s.scope.Name()) {
    41  		s.scope.V(4).Info("Skipping NAT gateway reconcile in unmanaged mode")
    42  		return nil
    43  	}
    44  
    45  	s.scope.V(2).Info("Reconciling NAT gateways")
    46  
    47  	if len(s.scope.Subnets().FilterPrivate()) == 0 {
    48  		s.scope.V(2).Info("No private subnets available, skipping NAT gateways")
    49  		conditions.MarkFalse(
    50  			s.scope.InfraCluster(),
    51  			infrav1.NatGatewaysReadyCondition,
    52  			infrav1.NatGatewaysReconciliationFailedReason,
    53  			clusterv1.ConditionSeverityWarning,
    54  			"No private subnets available, skipping NAT gateways")
    55  		return nil
    56  	} else if len(s.scope.Subnets().FilterPublic()) == 0 {
    57  		s.scope.V(2).Info("No public subnets available. Cannot create NAT gateways for private subnets, this might be a configuration error.")
    58  		conditions.MarkFalse(
    59  			s.scope.InfraCluster(),
    60  			infrav1.NatGatewaysReadyCondition,
    61  			infrav1.NatGatewaysReconciliationFailedReason,
    62  			clusterv1.ConditionSeverityWarning,
    63  			"No public subnets available. Cannot create NAT gateways for private subnets, this might be a configuration error.")
    64  		return nil
    65  	}
    66  
    67  	existing, err := s.describeNatGatewaysBySubnet()
    68  	if err != nil {
    69  		return err
    70  	}
    71  
    72  	subnetIDs := []string{}
    73  
    74  	for _, sn := range s.scope.Subnets().FilterPublic() {
    75  		if sn.ID == "" {
    76  			continue
    77  		}
    78  
    79  		if ngw, ok := existing[sn.ID]; ok {
    80  			// Make sure tags are up to date.
    81  			if err := wait.WaitForWithRetryable(wait.NewBackoff(), func() (bool, error) {
    82  				buildParams := s.getNatGatewayTagParams(*ngw.NatGatewayId)
    83  				tagsBuilder := tags.New(&buildParams, tags.WithEC2(s.EC2Client))
    84  				if err := tagsBuilder.Ensure(converters.TagsToMap(ngw.Tags)); err != nil {
    85  					return false, err
    86  				}
    87  				return true, nil
    88  			}, awserrors.ResourceNotFound); err != nil {
    89  				record.Warnf(s.scope.InfraCluster(), "FailedTagNATGateway", "Failed to tag managed NAT Gateway %q: %v", *ngw.NatGatewayId, err)
    90  				return errors.Wrapf(err, "failed to tag nat gateway %q", *ngw.NatGatewayId)
    91  			}
    92  
    93  			continue
    94  		}
    95  
    96  		subnetIDs = append(subnetIDs, sn.ID)
    97  	}
    98  
    99  	// Batch the creation of NAT gateways
   100  	if len(subnetIDs) > 0 {
   101  		// set NatGatewayCreationStarted if the condition has never been set before
   102  		if !conditions.Has(s.scope.InfraCluster(), infrav1.NatGatewaysReadyCondition) {
   103  			conditions.MarkFalse(s.scope.InfraCluster(), infrav1.NatGatewaysReadyCondition, infrav1.NatGatewaysCreationStartedReason, clusterv1.ConditionSeverityInfo, "")
   104  			if err := s.scope.PatchObject(); err != nil {
   105  				return errors.Wrap(err, "failed to patch conditions")
   106  			}
   107  		}
   108  		ngws, err := s.createNatGateways(subnetIDs)
   109  
   110  		for _, ng := range ngws {
   111  			subnet := s.scope.Subnets().FindByID(*ng.SubnetId)
   112  			subnet.NatGatewayID = ng.NatGatewayId
   113  		}
   114  
   115  		if err != nil {
   116  			return err
   117  		}
   118  		conditions.MarkTrue(s.scope.InfraCluster(), infrav1.NatGatewaysReadyCondition)
   119  	}
   120  
   121  	return nil
   122  }
   123  
   124  func (s *Service) deleteNatGateways() error {
   125  	if s.scope.VPC().IsUnmanaged(s.scope.Name()) {
   126  		s.scope.V(4).Info("Skipping NAT gateway deletion in unmanaged mode")
   127  		return nil
   128  	}
   129  
   130  	if len(s.scope.Subnets().FilterPrivate()) == 0 {
   131  		s.scope.V(2).Info("No private subnets available, skipping NAT gateways")
   132  		return nil
   133  	} else if len(s.scope.Subnets().FilterPublic()) == 0 {
   134  		s.scope.V(2).Info("No public subnets available. Cannot create NAT gateways for private subnets, this might be a configuration error.")
   135  		return nil
   136  	}
   137  
   138  	existing, err := s.describeNatGatewaysBySubnet()
   139  	if err != nil {
   140  		return err
   141  	}
   142  
   143  	var ngIDs []*ec2.NatGateway
   144  	for _, sn := range s.scope.Subnets().FilterPublic() {
   145  		if sn.ID == "" {
   146  			continue
   147  		}
   148  
   149  		if ngID, ok := existing[sn.ID]; ok {
   150  			ngIDs = append(ngIDs, ngID)
   151  		}
   152  	}
   153  
   154  	c := make(chan error, len(ngIDs))
   155  	errs := []error{}
   156  
   157  	for _, ngID := range ngIDs {
   158  		go func(c chan error, ngID *ec2.NatGateway) {
   159  			err := s.deleteNatGateway(*ngID.NatGatewayId)
   160  			c <- err
   161  		}(c, ngID)
   162  	}
   163  
   164  	for i := 0; i < len(ngIDs); i++ {
   165  		ngwErr := <-c
   166  		if ngwErr != nil {
   167  			errs = append(errs, ngwErr)
   168  		}
   169  	}
   170  
   171  	return kerrors.NewAggregate(errs)
   172  }
   173  
   174  func (s *Service) describeNatGatewaysBySubnet() (map[string]*ec2.NatGateway, error) {
   175  	describeNatGatewayInput := &ec2.DescribeNatGatewaysInput{
   176  		Filter: []*ec2.Filter{
   177  			filter.EC2.VPC(s.scope.VPC().ID),
   178  			filter.EC2.NATGatewayStates(ec2.NatGatewayStatePending, ec2.NatGatewayStateAvailable),
   179  		},
   180  	}
   181  
   182  	gateways := make(map[string]*ec2.NatGateway)
   183  
   184  	err := s.EC2Client.DescribeNatGatewaysPages(describeNatGatewayInput,
   185  		func(page *ec2.DescribeNatGatewaysOutput, lastPage bool) bool {
   186  			for _, r := range page.NatGateways {
   187  				gateways[*r.SubnetId] = r
   188  			}
   189  			return !lastPage
   190  		})
   191  	if err != nil {
   192  		record.Eventf(s.scope.InfraCluster(), "FailedDescribeNATGateways", "Failed to describe NAT gateways with VPC ID %q: %v", s.scope.VPC().ID, err)
   193  		return nil, errors.Wrapf(err, "failed to describe NAT gateways with VPC ID %q", s.scope.VPC().ID)
   194  	}
   195  
   196  	return gateways, nil
   197  }
   198  
   199  func (s *Service) getNatGatewayTagParams(id string) infrav1.BuildParams {
   200  	name := fmt.Sprintf("%s-nat", s.scope.Name())
   201  
   202  	return infrav1.BuildParams{
   203  		ClusterName: s.scope.Name(),
   204  		ResourceID:  id,
   205  		Lifecycle:   infrav1.ResourceLifecycleOwned,
   206  		Name:        aws.String(name),
   207  		Role:        aws.String(infrav1.CommonRoleTagValue),
   208  		Additional:  s.scope.AdditionalTags(),
   209  	}
   210  }
   211  
   212  func (s *Service) createNatGateways(subnetIDs []string) (natgateways []*ec2.NatGateway, err error) {
   213  	eips, err := s.getOrAllocateAddresses(len(subnetIDs), infrav1.APIServerRoleTagValue)
   214  	if err != nil {
   215  		return nil, errors.Wrapf(err, "failed to create one or more IP addresses for NAT gateways")
   216  	}
   217  	type ngwCreation struct {
   218  		natGateway *ec2.NatGateway
   219  		error      error
   220  	}
   221  	c := make(chan ngwCreation, len(subnetIDs))
   222  
   223  	for i, sn := range subnetIDs {
   224  		go func(c chan ngwCreation, subnetID, ip string) {
   225  			ngw, err := s.createNatGateway(subnetID, ip)
   226  			c <- ngwCreation{natGateway: ngw, error: err}
   227  		}(c, sn, eips[i])
   228  	}
   229  
   230  	for i := 0; i < len(subnetIDs); i++ {
   231  		ngwResult := <-c
   232  		if ngwResult.error != nil {
   233  			return nil, err
   234  		}
   235  		natgateways = append(natgateways, ngwResult.natGateway)
   236  	}
   237  	return natgateways, nil
   238  }
   239  
   240  func (s *Service) createNatGateway(subnetID, ip string) (*ec2.NatGateway, error) {
   241  	var out *ec2.CreateNatGatewayOutput
   242  	var err error
   243  
   244  	if err := wait.WaitForWithRetryable(wait.NewBackoff(), func() (bool, error) {
   245  		if out, err = s.EC2Client.CreateNatGateway(&ec2.CreateNatGatewayInput{
   246  			SubnetId:          aws.String(subnetID),
   247  			AllocationId:      aws.String(ip),
   248  			TagSpecifications: []*ec2.TagSpecification{tags.BuildParamsToTagSpecification(ec2.ResourceTypeNatgateway, s.getNatGatewayTagParams(services.TemporaryResourceID))},
   249  		}); err != nil {
   250  			return false, err
   251  		}
   252  		return true, nil
   253  	}, awserrors.InvalidSubnet); err != nil {
   254  		record.Warnf(s.scope.InfraCluster(), "FailedCreateNATGateway", "Failed to create new NAT Gateway: %v", err)
   255  		return nil, errors.Wrapf(err, "failed to create NAT gateway for subnet ID %q", subnetID)
   256  	}
   257  	record.Eventf(s.scope.InfraCluster(), "SuccessfulCreateNATGateway", "Created new NAT Gateway %q", *out.NatGateway.NatGatewayId)
   258  
   259  	wReq := &ec2.DescribeNatGatewaysInput{NatGatewayIds: []*string{out.NatGateway.NatGatewayId}}
   260  	if err := s.EC2Client.WaitUntilNatGatewayAvailable(wReq); err != nil {
   261  		return nil, errors.Wrapf(err, "failed to wait for nat gateway %q in subnet %q", *out.NatGateway.NatGatewayId, subnetID)
   262  	}
   263  
   264  	s.scope.Info("Created NAT gateway for subnet", "nat-gateway-id", *out.NatGateway.NatGatewayId, "subnet-id", subnetID)
   265  	return out.NatGateway, nil
   266  }
   267  
   268  func (s *Service) deleteNatGateway(id string) error {
   269  	_, err := s.EC2Client.DeleteNatGateway(&ec2.DeleteNatGatewayInput{
   270  		NatGatewayId: aws.String(id),
   271  	})
   272  	if err != nil {
   273  		record.Warnf(s.scope.InfraCluster(), "FailedDeleteNATGateway", "Failed to delete NAT Gateway %q previously attached to VPC %q: %v", id, s.scope.VPC().ID, err)
   274  		return errors.Wrapf(err, "failed to delete nat gateway %q", id)
   275  	}
   276  	record.Eventf(s.scope.InfraCluster(), "SuccessfulDeleteNATGateway", "Deleted NAT Gateway %q previously attached to VPC %q", id, s.scope.VPC().ID)
   277  	s.scope.Info("Deleted NAT gateway in VPC", "nat-gateway-id", id, "vpc-id", s.scope.VPC().ID)
   278  
   279  	describeInput := &ec2.DescribeNatGatewaysInput{
   280  		NatGatewayIds: []*string{aws.String(id)},
   281  	}
   282  
   283  	if err := wait.WaitForWithRetryable(wait.NewBackoff(), func() (done bool, err error) {
   284  		out, err := s.EC2Client.DescribeNatGateways(describeInput)
   285  		if err != nil {
   286  			return false, err
   287  		}
   288  
   289  		if out == nil || len(out.NatGateways) == 0 {
   290  			return false, errors.New(fmt.Sprintf("no NAT gateway returned for id %q", id))
   291  		}
   292  
   293  		ng := out.NatGateways[0]
   294  		switch state := ng.State; *state {
   295  		case ec2.NatGatewayStateAvailable, ec2.NatGatewayStateDeleting:
   296  			return false, nil
   297  		case ec2.NatGatewayStateDeleted:
   298  			return true, nil
   299  		case ec2.NatGatewayStatePending:
   300  			return false, errors.Errorf("in pending state")
   301  		case ec2.NatGatewayStateFailed:
   302  			return false, errors.Errorf("in failed state: %q - %s", *ng.FailureCode, *ng.FailureMessage)
   303  		}
   304  
   305  		return false, errors.Errorf("in unknown state")
   306  	}); err != nil {
   307  		return errors.Wrapf(err, "failed to wait for NAT gateway deletion %q", id)
   308  	}
   309  
   310  	return nil
   311  }
   312  
   313  func (s *Service) getNatGatewayForSubnet(sn *infrav1.SubnetSpec) (string, error) {
   314  	if sn.IsPublic {
   315  		return "", errors.Errorf("cannot get NAT gateway for a public subnet, got id %q", sn.ID)
   316  	}
   317  
   318  	azGateways := make(map[string][]string)
   319  	for _, psn := range s.scope.Subnets().FilterPublic() {
   320  		if psn.NatGatewayID == nil {
   321  			continue
   322  		}
   323  
   324  		azGateways[psn.AvailabilityZone] = append(azGateways[psn.AvailabilityZone], *psn.NatGatewayID)
   325  	}
   326  
   327  	if gws, ok := azGateways[sn.AvailabilityZone]; ok && len(gws) > 0 {
   328  		return gws[0], nil
   329  	}
   330  
   331  	return "", errors.Errorf("no nat gateways available in %q for private subnet %q, current state: %+v", sn.AvailabilityZone, sn.ID, azGateways)
   332  }