sigs.k8s.io/cluster-api-provider-aws@v1.5.5/controllers/awscluster_controller.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controllers
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"net"
    23  	"time"
    24  
    25  	"github.com/go-logr/logr"
    26  	"github.com/google/go-cmp/cmp"
    27  	"github.com/pkg/errors"
    28  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    29  	"k8s.io/apimachinery/pkg/types"
    30  	"k8s.io/client-go/tools/record"
    31  	ctrl "sigs.k8s.io/controller-runtime"
    32  	"sigs.k8s.io/controller-runtime/pkg/client"
    33  	"sigs.k8s.io/controller-runtime/pkg/controller"
    34  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    35  	"sigs.k8s.io/controller-runtime/pkg/event"
    36  	"sigs.k8s.io/controller-runtime/pkg/handler"
    37  	"sigs.k8s.io/controller-runtime/pkg/predicate"
    38  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    39  	"sigs.k8s.io/controller-runtime/pkg/source"
    40  
    41  	infrav1 "sigs.k8s.io/cluster-api-provider-aws/api/v1beta1"
    42  	"sigs.k8s.io/cluster-api-provider-aws/feature"
    43  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/scope"
    44  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services"
    45  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/ec2"
    46  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/elb"
    47  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/gc"
    48  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/instancestate"
    49  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/network"
    50  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/s3"
    51  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/securitygroup"
    52  	infrautilconditions "sigs.k8s.io/cluster-api-provider-aws/util/conditions"
    53  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    54  	"sigs.k8s.io/cluster-api/util"
    55  	capiannotations "sigs.k8s.io/cluster-api/util/annotations"
    56  	"sigs.k8s.io/cluster-api/util/conditions"
    57  	"sigs.k8s.io/cluster-api/util/patch"
    58  	"sigs.k8s.io/cluster-api/util/predicates"
    59  )
    60  
    61  var defaultAWSSecurityGroupRoles = []infrav1.SecurityGroupRole{
    62  	infrav1.SecurityGroupAPIServerLB,
    63  	infrav1.SecurityGroupLB,
    64  	infrav1.SecurityGroupControlPlane,
    65  	infrav1.SecurityGroupNode,
    66  }
    67  
    68  // AWSClusterReconciler reconciles a AwsCluster object.
    69  type AWSClusterReconciler struct {
    70  	client.Client
    71  	Recorder              record.EventRecorder
    72  	ec2ServiceFactory     func(scope.EC2Scope) services.EC2Interface
    73  	networkServiceFactory func(scope.ClusterScope) services.NetworkInterface
    74  	elbServiceFactory     func(scope.ELBScope) services.ELBInterface
    75  	securityGroupFactory  func(scope.ClusterScope) services.SecurityGroupInterface
    76  	Endpoints             []scope.ServiceEndpoint
    77  	WatchFilterValue      string
    78  	ExternalResourceGC    bool
    79  }
    80  
    81  // getEC2Service factory func is added for testing purpose so that we can inject mocked EC2Service to the AWSClusterReconciler.
    82  func (r *AWSClusterReconciler) getEC2Service(scope scope.EC2Scope) services.EC2Interface {
    83  	if r.ec2ServiceFactory != nil {
    84  		return r.ec2ServiceFactory(scope)
    85  	}
    86  	return ec2.NewService(scope)
    87  }
    88  
    89  // getELBService factory func is added for testing purpose so that we can inject mocked ELBService to the AWSClusterReconciler.
    90  func (r *AWSClusterReconciler) getELBService(scope scope.ELBScope) services.ELBInterface {
    91  	if r.elbServiceFactory != nil {
    92  		return r.elbServiceFactory(scope)
    93  	}
    94  	return elb.NewService(scope)
    95  }
    96  
    97  // getNetworkService factory func is added for testing purpose so that we can inject mocked NetworkService to the AWSClusterReconciler.
    98  func (r *AWSClusterReconciler) getNetworkService(scope scope.ClusterScope) services.NetworkInterface {
    99  	if r.networkServiceFactory != nil {
   100  		return r.networkServiceFactory(scope)
   101  	}
   102  	return network.NewService(&scope)
   103  }
   104  
   105  // securityGroupRolesForCluster returns the security group roles determined by the cluster configuration.
   106  func securityGroupRolesForCluster(scope scope.ClusterScope) []infrav1.SecurityGroupRole {
   107  	// Copy to ensure we do not modify the package-level variable.
   108  	roles := make([]infrav1.SecurityGroupRole, len(defaultAWSSecurityGroupRoles))
   109  	copy(roles, defaultAWSSecurityGroupRoles)
   110  
   111  	if scope.Bastion().Enabled {
   112  		roles = append(roles, infrav1.SecurityGroupBastion)
   113  	}
   114  	return roles
   115  }
   116  
   117  // getSecurityGroupService factory func is added for testing purpose so that we can inject mocked SecurityGroupService to the AWSClusterReconciler.
   118  func (r *AWSClusterReconciler) getSecurityGroupService(scope scope.ClusterScope) services.SecurityGroupInterface {
   119  	if r.securityGroupFactory != nil {
   120  		return r.securityGroupFactory(scope)
   121  	}
   122  	return securitygroup.NewService(&scope, securityGroupRolesForCluster(scope))
   123  }
   124  
   125  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsclusters,verbs=get;list;watch;create;update;patch;delete
   126  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsclusters/status,verbs=get;update;patch
   127  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch
   128  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsclusterroleidentities;awsclusterstaticidentities,verbs=get;list;watch
   129  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsclustercontrolleridentities,verbs=get;list;watch;create;
   130  
   131  func (r *AWSClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
   132  	log := ctrl.LoggerFrom(ctx)
   133  
   134  	// Fetch the AWSCluster instance
   135  	awsCluster := &infrav1.AWSCluster{}
   136  	err := r.Get(ctx, req.NamespacedName, awsCluster)
   137  	if err != nil {
   138  		if apierrors.IsNotFound(err) {
   139  			return reconcile.Result{}, nil
   140  		}
   141  		return reconcile.Result{}, err
   142  	}
   143  
   144  	// Fetch the Cluster.
   145  	cluster, err := util.GetOwnerCluster(ctx, r.Client, awsCluster.ObjectMeta)
   146  	if err != nil {
   147  		return reconcile.Result{}, err
   148  	}
   149  
   150  	if cluster == nil {
   151  		log.Info("Cluster Controller has not yet set OwnerRef")
   152  		return reconcile.Result{}, nil
   153  	}
   154  
   155  	if capiannotations.IsPaused(cluster, awsCluster) {
   156  		log.Info("AWSCluster or linked Cluster is marked as paused. Won't reconcile")
   157  		return reconcile.Result{}, nil
   158  	}
   159  
   160  	log = log.WithValues("cluster", cluster.Name)
   161  	helper, err := patch.NewHelper(awsCluster, r.Client)
   162  	if err != nil {
   163  		return reconcile.Result{}, errors.Wrap(err, "failed to init patch helper")
   164  	}
   165  
   166  	defer func() {
   167  		e := helper.Patch(
   168  			context.TODO(),
   169  			awsCluster,
   170  			patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
   171  				infrav1.PrincipalCredentialRetrievedCondition,
   172  				infrav1.PrincipalUsageAllowedCondition,
   173  				infrav1.LoadBalancerReadyCondition,
   174  			}})
   175  		if e != nil {
   176  			fmt.Println(e.Error())
   177  		}
   178  	}()
   179  
   180  	// Create the scope.
   181  	clusterScope, err := scope.NewClusterScope(scope.ClusterScopeParams{
   182  		Client:         r.Client,
   183  		Logger:         &log,
   184  		Cluster:        cluster,
   185  		AWSCluster:     awsCluster,
   186  		ControllerName: "awscluster",
   187  		Endpoints:      r.Endpoints,
   188  	})
   189  	if err != nil {
   190  		return reconcile.Result{}, errors.Errorf("failed to create scope: %+v", err)
   191  	}
   192  
   193  	// Always close the scope when exiting this function so we can persist any AWSCluster changes.
   194  	defer func() {
   195  		if err := clusterScope.Close(); err != nil && reterr == nil {
   196  			reterr = err
   197  		}
   198  	}()
   199  
   200  	// Handle deleted clusters
   201  	if !awsCluster.DeletionTimestamp.IsZero() {
   202  		return r.reconcileDelete(ctx, clusterScope)
   203  	}
   204  
   205  	// Handle non-deleted clusters
   206  	return r.reconcileNormal(clusterScope)
   207  }
   208  
   209  func (r *AWSClusterReconciler) reconcileDelete(ctx context.Context, clusterScope *scope.ClusterScope) (reconcile.Result, error) {
   210  	clusterScope.Info("Reconciling AWSCluster delete")
   211  
   212  	ec2svc := r.getEC2Service(clusterScope)
   213  	elbsvc := r.getELBService(clusterScope)
   214  	networkSvc := r.getNetworkService(*clusterScope)
   215  	sgService := r.getSecurityGroupService(*clusterScope)
   216  	s3Service := s3.NewService(clusterScope)
   217  
   218  	if feature.Gates.Enabled(feature.EventBridgeInstanceState) {
   219  		instancestateSvc := instancestate.NewService(clusterScope)
   220  		if err := instancestateSvc.DeleteEC2Events(); err != nil {
   221  			// Not deleting the events isn't critical to cluster deletion
   222  			clusterScope.Error(err, "non-fatal: failed to delete EventBridge notifications")
   223  		}
   224  	}
   225  
   226  	if err := elbsvc.DeleteLoadbalancers(); err != nil {
   227  		clusterScope.Error(err, "error deleting load balancer")
   228  		return reconcile.Result{}, err
   229  	}
   230  
   231  	if err := ec2svc.DeleteBastion(); err != nil {
   232  		clusterScope.Error(err, "error deleting bastion")
   233  		return reconcile.Result{}, err
   234  	}
   235  
   236  	if err := sgService.DeleteSecurityGroups(); err != nil {
   237  		clusterScope.Error(err, "error deleting security groups")
   238  		return reconcile.Result{}, err
   239  	}
   240  
   241  	if r.ExternalResourceGC {
   242  		gcSvc := gc.NewService(clusterScope)
   243  		if gcErr := gcSvc.ReconcileDelete(ctx); gcErr != nil {
   244  			return reconcile.Result{}, fmt.Errorf("failed delete reconcile for gc service: %w", gcErr)
   245  		}
   246  	}
   247  
   248  	if err := networkSvc.DeleteNetwork(); err != nil {
   249  		clusterScope.Error(err, "error deleting network")
   250  		return reconcile.Result{}, err
   251  	}
   252  
   253  	if err := s3Service.DeleteBucket(); err != nil {
   254  		return reconcile.Result{}, errors.Wrapf(err, "error deleting S3 Bucket")
   255  	}
   256  
   257  	// Cluster is deleted so remove the finalizer.
   258  	controllerutil.RemoveFinalizer(clusterScope.AWSCluster, infrav1.ClusterFinalizer)
   259  
   260  	return reconcile.Result{}, nil
   261  }
   262  
   263  func (r *AWSClusterReconciler) reconcileNormal(clusterScope *scope.ClusterScope) (reconcile.Result, error) {
   264  	clusterScope.Info("Reconciling AWSCluster")
   265  
   266  	awsCluster := clusterScope.AWSCluster
   267  
   268  	// If the AWSCluster doesn't have our finalizer, add it.
   269  	controllerutil.AddFinalizer(awsCluster, infrav1.ClusterFinalizer)
   270  	// Register the finalizer immediately to avoid orphaning AWS resources on delete
   271  	if err := clusterScope.PatchObject(); err != nil {
   272  		return reconcile.Result{}, err
   273  	}
   274  
   275  	ec2Service := r.getEC2Service(clusterScope)
   276  	elbService := r.getELBService(clusterScope)
   277  	networkSvc := r.getNetworkService(*clusterScope)
   278  	sgService := r.getSecurityGroupService(*clusterScope)
   279  	s3Service := s3.NewService(clusterScope)
   280  
   281  	if err := networkSvc.ReconcileNetwork(); err != nil {
   282  		clusterScope.Error(err, "failed to reconcile network")
   283  		return reconcile.Result{}, err
   284  	}
   285  
   286  	// CNI related security groups gets deleted from the AWSClusters created prior to networkSpec.cni defaulting (5.5) after upgrading controllers.
   287  	// https://github.com/kubernetes-sigs/cluster-api-provider-aws/issues/2084
   288  	// TODO: Remove this after v1aplha4
   289  	clusterScope.AWSCluster.Default()
   290  
   291  	if err := sgService.ReconcileSecurityGroups(); err != nil {
   292  		clusterScope.Error(err, "failed to reconcile security groups")
   293  		conditions.MarkFalse(awsCluster, infrav1.ClusterSecurityGroupsReadyCondition, infrav1.ClusterSecurityGroupReconciliationFailedReason, infrautilconditions.ErrorConditionAfterInit(clusterScope.ClusterObj()), err.Error())
   294  		return reconcile.Result{}, err
   295  	}
   296  
   297  	if err := ec2Service.ReconcileBastion(); err != nil {
   298  		conditions.MarkFalse(awsCluster, infrav1.BastionHostReadyCondition, infrav1.BastionHostFailedReason, infrautilconditions.ErrorConditionAfterInit(clusterScope.ClusterObj()), err.Error())
   299  		clusterScope.Error(err, "failed to reconcile bastion host")
   300  		return reconcile.Result{}, err
   301  	}
   302  
   303  	if feature.Gates.Enabled(feature.EventBridgeInstanceState) {
   304  		instancestateSvc := instancestate.NewService(clusterScope)
   305  		if err := instancestateSvc.ReconcileEC2Events(); err != nil {
   306  			// non fatal error, so we continue
   307  			clusterScope.Error(err, "non-fatal: failed to set up EventBridge")
   308  		}
   309  	}
   310  
   311  	if err := elbService.ReconcileLoadbalancers(); err != nil {
   312  		clusterScope.Error(err, "failed to reconcile load balancer")
   313  		conditions.MarkFalse(awsCluster, infrav1.LoadBalancerReadyCondition, infrav1.LoadBalancerFailedReason, infrautilconditions.ErrorConditionAfterInit(clusterScope.ClusterObj()), err.Error())
   314  		return reconcile.Result{}, err
   315  	}
   316  
   317  	if err := s3Service.ReconcileBucket(); err != nil {
   318  		conditions.MarkFalse(awsCluster, infrav1.S3BucketReadyCondition, infrav1.S3BucketFailedReason, clusterv1.ConditionSeverityError, err.Error())
   319  		return reconcile.Result{}, errors.Wrapf(err, "failed to reconcile S3 Bucket for AWSCluster %s/%s", awsCluster.Namespace, awsCluster.Name)
   320  	}
   321  
   322  	if awsCluster.Status.Network.APIServerELB.DNSName == "" {
   323  		conditions.MarkFalse(awsCluster, infrav1.LoadBalancerReadyCondition, infrav1.WaitForDNSNameReason, clusterv1.ConditionSeverityInfo, "")
   324  		clusterScope.Info("Waiting on API server ELB DNS name")
   325  		return reconcile.Result{RequeueAfter: 15 * time.Second}, nil
   326  	}
   327  
   328  	if _, err := net.LookupIP(awsCluster.Status.Network.APIServerELB.DNSName); err != nil {
   329  		conditions.MarkFalse(awsCluster, infrav1.LoadBalancerReadyCondition, infrav1.WaitForDNSNameResolveReason, clusterv1.ConditionSeverityInfo, "")
   330  		clusterScope.Info("Waiting on API server ELB DNS name to resolve")
   331  		return reconcile.Result{RequeueAfter: 15 * time.Second}, nil // nolint:nilerr
   332  	}
   333  	conditions.MarkTrue(awsCluster, infrav1.LoadBalancerReadyCondition)
   334  
   335  	awsCluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{
   336  		Host: awsCluster.Status.Network.APIServerELB.DNSName,
   337  		Port: clusterScope.APIServerPort(),
   338  	}
   339  
   340  	for _, subnet := range clusterScope.Subnets().FilterPrivate() {
   341  		found := false
   342  		for _, az := range awsCluster.Status.Network.APIServerELB.AvailabilityZones {
   343  			if az == subnet.AvailabilityZone {
   344  				found = true
   345  				break
   346  			}
   347  		}
   348  
   349  		clusterScope.SetFailureDomain(subnet.AvailabilityZone, clusterv1.FailureDomainSpec{
   350  			ControlPlane: found,
   351  		})
   352  	}
   353  
   354  	awsCluster.Status.Ready = true
   355  	return reconcile.Result{}, nil
   356  }
   357  
   358  func (r *AWSClusterReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
   359  	log := ctrl.LoggerFrom(ctx)
   360  	controller, err := ctrl.NewControllerManagedBy(mgr).
   361  		WithOptions(options).
   362  		For(&infrav1.AWSCluster{}).
   363  		WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(log, r.WatchFilterValue)).
   364  		WithEventFilter(
   365  			predicate.Funcs{
   366  				// Avoid reconciling if the event triggering the reconciliation is related to incremental status updates
   367  				// for AWSCluster resources only
   368  				UpdateFunc: func(e event.UpdateEvent) bool {
   369  					if e.ObjectOld.GetObjectKind().GroupVersionKind().Kind != "AWSCluster" {
   370  						return true
   371  					}
   372  
   373  					oldCluster := e.ObjectOld.(*infrav1.AWSCluster).DeepCopy()
   374  					newCluster := e.ObjectNew.(*infrav1.AWSCluster).DeepCopy()
   375  
   376  					oldCluster.Status = infrav1.AWSClusterStatus{}
   377  					newCluster.Status = infrav1.AWSClusterStatus{}
   378  
   379  					oldCluster.ObjectMeta.ResourceVersion = ""
   380  					newCluster.ObjectMeta.ResourceVersion = ""
   381  
   382  					return !cmp.Equal(oldCluster, newCluster)
   383  				},
   384  			},
   385  		).
   386  		WithEventFilter(predicates.ResourceIsNotExternallyManaged(log)).
   387  		Build(r)
   388  	if err != nil {
   389  		return errors.Wrap(err, "error creating controller")
   390  	}
   391  
   392  	return controller.Watch(
   393  		&source.Kind{Type: &clusterv1.Cluster{}},
   394  		handler.EnqueueRequestsFromMapFunc(r.requeueAWSClusterForUnpausedCluster(ctx, log)),
   395  		predicates.ClusterUnpaused(log),
   396  	)
   397  }
   398  
   399  func (r *AWSClusterReconciler) requeueAWSClusterForUnpausedCluster(ctx context.Context, log logr.Logger) handler.MapFunc {
   400  	return func(o client.Object) []ctrl.Request {
   401  		c, ok := o.(*clusterv1.Cluster)
   402  		if !ok {
   403  			panic(fmt.Sprintf("Expected a Cluster but got a %T", o))
   404  		}
   405  
   406  		log := log.WithValues("objectMapper", "clusterToAWSCluster", "namespace", c.Namespace, "cluster", c.Name)
   407  
   408  		// Don't handle deleted clusters
   409  		if !c.ObjectMeta.DeletionTimestamp.IsZero() {
   410  			log.V(4).Info("Cluster has a deletion timestamp, skipping mapping.")
   411  			return nil
   412  		}
   413  
   414  		// Make sure the ref is set
   415  		if c.Spec.InfrastructureRef == nil {
   416  			log.V(4).Info("Cluster does not have an InfrastructureRef, skipping mapping.")
   417  			return nil
   418  		}
   419  
   420  		if c.Spec.InfrastructureRef.GroupVersionKind().Kind != "AWSCluster" {
   421  			log.V(4).Info("Cluster has an InfrastructureRef for a different type, skipping mapping.")
   422  			return nil
   423  		}
   424  
   425  		awsCluster := &infrav1.AWSCluster{}
   426  		key := types.NamespacedName{Namespace: c.Spec.InfrastructureRef.Namespace, Name: c.Spec.InfrastructureRef.Name}
   427  
   428  		if err := r.Get(ctx, key, awsCluster); err != nil {
   429  			log.V(4).Error(err, "Failed to get AWS cluster")
   430  			return nil
   431  		}
   432  
   433  		if capiannotations.IsExternallyManaged(awsCluster) {
   434  			log.V(4).Info("AWSCluster is externally managed, skipping mapping.")
   435  			return nil
   436  		}
   437  
   438  		log.V(4).Info("Adding request.", "awsCluster", c.Spec.InfrastructureRef.Name)
   439  		return []ctrl.Request{
   440  			{
   441  				NamespacedName: client.ObjectKey{Namespace: c.Namespace, Name: c.Spec.InfrastructureRef.Name},
   442  			},
   443  		}
   444  	}
   445  }