sigs.k8s.io/cluster-api-provider-aws@v1.5.5/exp/controllers/awsmachinepool_controller.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controllers
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  
    23  	"github.com/go-logr/logr"
    24  	"github.com/google/go-cmp/cmp"
    25  	"github.com/pkg/errors"
    26  	corev1 "k8s.io/api/core/v1"
    27  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/apimachinery/pkg/runtime/schema"
    30  	"k8s.io/client-go/tools/record"
    31  	ctrl "sigs.k8s.io/controller-runtime"
    32  	"sigs.k8s.io/controller-runtime/pkg/client"
    33  	"sigs.k8s.io/controller-runtime/pkg/controller"
    34  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    35  	"sigs.k8s.io/controller-runtime/pkg/handler"
    36  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    37  	"sigs.k8s.io/controller-runtime/pkg/source"
    38  
    39  	infrav1 "sigs.k8s.io/cluster-api-provider-aws/api/v1beta1"
    40  	"sigs.k8s.io/cluster-api-provider-aws/controllers"
    41  	ekscontrolplanev1 "sigs.k8s.io/cluster-api-provider-aws/controlplane/eks/api/v1beta1"
    42  	expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/exp/api/v1beta1"
    43  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud"
    44  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/scope"
    45  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services"
    46  	asg "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/autoscaling"
    47  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/ec2"
    48  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/userdata"
    49  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    50  	expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
    51  	"sigs.k8s.io/cluster-api/util"
    52  	"sigs.k8s.io/cluster-api/util/conditions"
    53  	"sigs.k8s.io/cluster-api/util/predicates"
    54  )
    55  
    56  // AWSMachinePoolReconciler reconciles a AWSMachinePool object.
    57  type AWSMachinePoolReconciler struct {
    58  	client.Client
    59  	Recorder          record.EventRecorder
    60  	WatchFilterValue  string
    61  	asgServiceFactory func(cloud.ClusterScoper) services.ASGInterface
    62  	ec2ServiceFactory func(scope.EC2Scope) services.EC2Interface
    63  }
    64  
    65  func (r *AWSMachinePoolReconciler) getASGService(scope cloud.ClusterScoper) services.ASGInterface {
    66  	if r.asgServiceFactory != nil {
    67  		return r.asgServiceFactory(scope)
    68  	}
    69  	return asg.NewService(scope)
    70  }
    71  
    72  func (r *AWSMachinePoolReconciler) getEC2Service(scope scope.EC2Scope) services.EC2Interface {
    73  	if r.ec2ServiceFactory != nil {
    74  		return r.ec2ServiceFactory(scope)
    75  	}
    76  
    77  	return ec2.NewService(scope)
    78  }
    79  
    80  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachinepools,verbs=get;list;watch;create;update;patch;delete
    81  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachinepools/status,verbs=get;update;patch
    82  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch;patch
    83  // +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch
    84  // +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch
    85  // +kubebuilder:rbac:groups="",resources=namespaces,verbs=get;list;watch
    86  
    87  // Reconcile is the reconciliation loop for AWSMachinePool.
    88  func (r *AWSMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
    89  	log := ctrl.LoggerFrom(ctx)
    90  
    91  	// Fetch the AWSMachinePool .
    92  	awsMachinePool := &expinfrav1.AWSMachinePool{}
    93  	err := r.Get(ctx, req.NamespacedName, awsMachinePool)
    94  	if err != nil {
    95  		if apierrors.IsNotFound(err) {
    96  			return ctrl.Result{}, nil
    97  		}
    98  		return ctrl.Result{}, err
    99  	}
   100  
   101  	// Fetch the CAPI MachinePool
   102  	machinePool, err := getOwnerMachinePool(ctx, r.Client, awsMachinePool.ObjectMeta)
   103  	if err != nil {
   104  		return reconcile.Result{}, err
   105  	}
   106  	if machinePool == nil {
   107  		log.Info("MachinePool Controller has not yet set OwnerRef")
   108  		return reconcile.Result{}, nil
   109  	}
   110  	log = log.WithValues("machinePool", machinePool.Name)
   111  
   112  	// Fetch the Cluster.
   113  	cluster, err := util.GetClusterFromMetadata(ctx, r.Client, machinePool.ObjectMeta)
   114  	if err != nil {
   115  		log.Info("MachinePool is missing cluster label or cluster does not exist")
   116  		return reconcile.Result{}, nil
   117  	}
   118  
   119  	log = log.WithValues("cluster", cluster.Name)
   120  
   121  	infraCluster, err := r.getInfraCluster(ctx, log, cluster, awsMachinePool)
   122  	if err != nil {
   123  		return ctrl.Result{}, errors.New("error getting infra provider cluster or control plane object")
   124  	}
   125  	if infraCluster == nil {
   126  		log.Info("AWSCluster or AWSManagedControlPlane is not ready yet")
   127  		return ctrl.Result{}, nil
   128  	}
   129  
   130  	// Create the machine pool scope
   131  	machinePoolScope, err := scope.NewMachinePoolScope(scope.MachinePoolScopeParams{
   132  		Client:         r.Client,
   133  		Cluster:        cluster,
   134  		MachinePool:    machinePool,
   135  		InfraCluster:   infraCluster,
   136  		AWSMachinePool: awsMachinePool,
   137  	})
   138  	if err != nil {
   139  		log.Error(err, "failed to create scope")
   140  		return ctrl.Result{}, err
   141  	}
   142  
   143  	// Always close the scope when exiting this function so we can persist any AWSMachine changes.
   144  	defer func() {
   145  		// set Ready condition before AWSMachinePool is patched
   146  		conditions.SetSummary(machinePoolScope.AWSMachinePool,
   147  			conditions.WithConditions(
   148  				expinfrav1.ASGReadyCondition,
   149  				expinfrav1.LaunchTemplateReadyCondition,
   150  			),
   151  			conditions.WithStepCounterIfOnly(
   152  				expinfrav1.ASGReadyCondition,
   153  				expinfrav1.LaunchTemplateReadyCondition,
   154  			),
   155  		)
   156  
   157  		if err := machinePoolScope.Close(); err != nil && reterr == nil {
   158  			reterr = err
   159  		}
   160  	}()
   161  
   162  	switch infraScope := infraCluster.(type) {
   163  	case *scope.ManagedControlPlaneScope:
   164  		if !awsMachinePool.ObjectMeta.DeletionTimestamp.IsZero() {
   165  			return r.reconcileDelete(machinePoolScope, infraScope, infraScope)
   166  		}
   167  
   168  		return r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope)
   169  	case *scope.ClusterScope:
   170  		if !awsMachinePool.ObjectMeta.DeletionTimestamp.IsZero() {
   171  			return r.reconcileDelete(machinePoolScope, infraScope, infraScope)
   172  		}
   173  
   174  		return r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope)
   175  	default:
   176  		return ctrl.Result{}, errors.New("infraCluster has unknown type")
   177  	}
   178  }
   179  
   180  func (r *AWSMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
   181  	return ctrl.NewControllerManagedBy(mgr).
   182  		WithOptions(options).
   183  		For(&expinfrav1.AWSMachinePool{}).
   184  		Watches(
   185  			&source.Kind{Type: &expclusterv1.MachinePool{}},
   186  			handler.EnqueueRequestsFromMapFunc(machinePoolToInfrastructureMapFunc(expinfrav1.GroupVersion.WithKind("AWSMachinePool"))),
   187  		).
   188  		WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)).
   189  		Complete(r)
   190  }
   191  
   192  func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope) (ctrl.Result, error) {
   193  	clusterScope.Info("Reconciling AWSMachinePool")
   194  
   195  	// If the AWSMachine is in an error state, return early.
   196  	if machinePoolScope.HasFailed() {
   197  		machinePoolScope.Info("Error state detected, skipping reconciliation")
   198  
   199  		// TODO: If we are in a failed state, delete the secret regardless of instance state
   200  
   201  		return ctrl.Result{}, nil
   202  	}
   203  
   204  	// If the AWSMachinepool doesn't have our finalizer, add it
   205  	controllerutil.AddFinalizer(machinePoolScope.AWSMachinePool, expinfrav1.MachinePoolFinalizer)
   206  
   207  	// Register finalizer immediately to avoid orphaning AWS resources
   208  	if err := machinePoolScope.PatchObject(); err != nil {
   209  		return ctrl.Result{}, err
   210  	}
   211  
   212  	if !machinePoolScope.Cluster.Status.InfrastructureReady {
   213  		machinePoolScope.Info("Cluster infrastructure is not ready yet")
   214  		conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, infrav1.WaitingForClusterInfrastructureReason, clusterv1.ConditionSeverityInfo, "")
   215  		return ctrl.Result{}, nil
   216  	}
   217  
   218  	// Make sure bootstrap data is available and populated
   219  	if machinePoolScope.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil {
   220  		machinePoolScope.Info("Bootstrap data secret reference is not yet available")
   221  		conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, infrav1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "")
   222  		return ctrl.Result{}, nil
   223  	}
   224  
   225  	if err := r.reconcileLaunchTemplate(machinePoolScope, ec2Scope); err != nil {
   226  		r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "FailedLaunchTemplateReconcile", "Failed to reconcile launch template: %v", err)
   227  		machinePoolScope.Error(err, "failed to reconcile launch template")
   228  		return ctrl.Result{}, err
   229  	}
   230  
   231  	// set the LaunchTemplateReady condition
   232  	conditions.MarkTrue(machinePoolScope.AWSMachinePool, expinfrav1.LaunchTemplateReadyCondition)
   233  
   234  	// Initialize ASG client
   235  	asgsvc := r.getASGService(clusterScope)
   236  
   237  	// Find existing ASG
   238  	asg, err := r.findASG(machinePoolScope, asgsvc)
   239  	if err != nil {
   240  		conditions.MarkUnknown(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, expinfrav1.ASGNotFoundReason, err.Error())
   241  		return ctrl.Result{}, err
   242  	}
   243  
   244  	if asg == nil {
   245  		// Create new ASG
   246  		if _, err := r.createPool(machinePoolScope, clusterScope); err != nil {
   247  			conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, expinfrav1.ASGProvisionFailedReason, clusterv1.ConditionSeverityError, err.Error())
   248  			return ctrl.Result{}, err
   249  		}
   250  		return ctrl.Result{}, nil
   251  	}
   252  
   253  	if scope.ReplicasExternallyManaged(machinePoolScope.MachinePool) {
   254  		// Set MachinePool replicas to the ASG DesiredCapacity
   255  		if *machinePoolScope.MachinePool.Spec.Replicas != *asg.DesiredCapacity {
   256  			machinePoolScope.Info("Setting MachinePool replicas to ASG DesiredCapacity",
   257  				"local", machinePoolScope.MachinePool.Spec.Replicas,
   258  				"external", asg.DesiredCapacity)
   259  			machinePoolScope.MachinePool.Spec.Replicas = asg.DesiredCapacity
   260  			if err := machinePoolScope.PatchCAPIMachinePoolObject(ctx); err != nil {
   261  				return ctrl.Result{}, err
   262  			}
   263  		}
   264  	}
   265  
   266  	if err := r.updatePool(machinePoolScope, clusterScope, asg); err != nil {
   267  		machinePoolScope.Error(err, "error updating AWSMachinePool")
   268  		return ctrl.Result{}, err
   269  	}
   270  
   271  	err = r.reconcileTags(machinePoolScope, clusterScope, ec2Scope)
   272  	if err != nil {
   273  		return ctrl.Result{}, errors.Wrap(err, "error updating tags")
   274  	}
   275  
   276  	// Make sure Spec.ProviderID is always set.
   277  	machinePoolScope.AWSMachinePool.Spec.ProviderID = asg.ID
   278  	providerIDList := make([]string, len(asg.Instances))
   279  
   280  	for i, ec2 := range asg.Instances {
   281  		providerIDList[i] = fmt.Sprintf("aws:///%s/%s", ec2.AvailabilityZone, ec2.ID)
   282  	}
   283  
   284  	machinePoolScope.SetAnnotation("cluster-api-provider-aws", "true")
   285  
   286  	machinePoolScope.AWSMachinePool.Spec.ProviderIDList = providerIDList
   287  	machinePoolScope.AWSMachinePool.Status.Replicas = int32(len(providerIDList))
   288  	machinePoolScope.AWSMachinePool.Status.Ready = true
   289  	conditions.MarkTrue(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition)
   290  
   291  	err = machinePoolScope.UpdateInstanceStatuses(ctx, asg.Instances)
   292  	if err != nil {
   293  		machinePoolScope.Info("Failed updating instances", "instances", asg.Instances)
   294  	}
   295  
   296  	return ctrl.Result{}, nil
   297  }
   298  
   299  func (r *AWSMachinePoolReconciler) reconcileDelete(machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope) (ctrl.Result, error) {
   300  	clusterScope.Info("Handling deleted AWSMachinePool")
   301  
   302  	ec2Svc := r.getEC2Service(ec2Scope)
   303  	asgSvc := r.getASGService(clusterScope)
   304  
   305  	asg, err := r.findASG(machinePoolScope, asgSvc)
   306  	if err != nil {
   307  		return ctrl.Result{}, err
   308  	}
   309  
   310  	if asg == nil {
   311  		machinePoolScope.V(2).Info("Unable to locate ASG")
   312  		r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeNormal, "NoASGFound", "Unable to find matching ASG")
   313  	} else {
   314  		machinePoolScope.SetASGStatus(asg.Status)
   315  		switch asg.Status {
   316  		case expinfrav1.ASGStatusDeleteInProgress:
   317  			// ASG is already deleting
   318  			machinePoolScope.SetNotReady()
   319  			conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, expinfrav1.ASGDeletionInProgress, clusterv1.ConditionSeverityWarning, "")
   320  			r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "DeletionInProgress", "ASG deletion in progress: %q", asg.Name)
   321  			machinePoolScope.Info("ASG is already deleting", "name", asg.Name)
   322  		default:
   323  			machinePoolScope.Info("Deleting ASG", "id", asg.Name, "status", asg.Status)
   324  			if err := asgSvc.DeleteASGAndWait(asg.Name); err != nil {
   325  				r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "FailedDelete", "Failed to delete ASG %q: %v", asg.Name, err)
   326  				return ctrl.Result{}, errors.Wrap(err, "failed to delete ASG")
   327  			}
   328  		}
   329  	}
   330  
   331  	launchTemplateID := machinePoolScope.AWSMachinePool.Status.LaunchTemplateID
   332  	launchTemplate, _, err := ec2Svc.GetLaunchTemplate(machinePoolScope.Name())
   333  	if err != nil {
   334  		return ctrl.Result{}, err
   335  	}
   336  
   337  	if launchTemplate == nil {
   338  		machinePoolScope.V(2).Info("Unable to locate launch template")
   339  		r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeNormal, "NoASGFound", "Unable to find matching ASG")
   340  		controllerutil.RemoveFinalizer(machinePoolScope.AWSMachinePool, expinfrav1.MachinePoolFinalizer)
   341  		return ctrl.Result{}, nil
   342  	}
   343  
   344  	machinePoolScope.Info("deleting launch template", "name", launchTemplate.Name)
   345  	if err := ec2Svc.DeleteLaunchTemplate(launchTemplateID); err != nil {
   346  		r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "FailedDelete", "Failed to delete launch template %q: %v", launchTemplate.Name, err)
   347  		return ctrl.Result{}, errors.Wrap(err, "failed to delete ASG")
   348  	}
   349  
   350  	machinePoolScope.Info("successfully deleted AutoScalingGroup and Launch Template")
   351  
   352  	// remove finalizer
   353  	controllerutil.RemoveFinalizer(machinePoolScope.AWSMachinePool, expinfrav1.MachinePoolFinalizer)
   354  
   355  	return ctrl.Result{}, nil
   356  }
   357  
   358  func (r *AWSMachinePoolReconciler) updatePool(machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, existingASG *expinfrav1.AutoScalingGroup) error {
   359  	if asgNeedsUpdates(machinePoolScope, existingASG) {
   360  		machinePoolScope.Info("updating AutoScalingGroup")
   361  		asgSvc := r.getASGService(clusterScope)
   362  
   363  		if err := asgSvc.UpdateASG(machinePoolScope); err != nil {
   364  			r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "FailedUpdate", "Failed to update ASG: %v", err)
   365  			return errors.Wrap(err, "unable to update ASG")
   366  		}
   367  	}
   368  
   369  	return nil
   370  }
   371  
   372  func (r *AWSMachinePoolReconciler) createPool(machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper) (*expinfrav1.AutoScalingGroup, error) {
   373  	clusterScope.Info("Initializing ASG client")
   374  
   375  	asgsvc := r.getASGService(clusterScope)
   376  
   377  	machinePoolScope.Info("Creating Autoscaling Group")
   378  	asg, err := asgsvc.CreateASG(machinePoolScope)
   379  	if err != nil {
   380  		return nil, errors.Wrapf(err, "failed to create AWSMachinePool")
   381  	}
   382  
   383  	return asg, nil
   384  }
   385  
   386  func (r *AWSMachinePoolReconciler) findASG(machinePoolScope *scope.MachinePoolScope, asgsvc services.ASGInterface) (*expinfrav1.AutoScalingGroup, error) {
   387  	// Query the instance using tags.
   388  	asg, err := asgsvc.GetASGByName(machinePoolScope)
   389  	if err != nil {
   390  		return nil, errors.Wrapf(err, "failed to query AWSMachinePool by name")
   391  	}
   392  
   393  	return asg, nil
   394  }
   395  
   396  func (r *AWSMachinePoolReconciler) reconcileLaunchTemplate(machinePoolScope *scope.MachinePoolScope, ec2Scope scope.EC2Scope) error {
   397  	bootstrapData, err := machinePoolScope.GetRawBootstrapData()
   398  	if err != nil {
   399  		r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "FailedGetBootstrapData", err.Error())
   400  	}
   401  	bootstrapDataHash := userdata.ComputeHash(bootstrapData)
   402  
   403  	ec2svc := r.getEC2Service(ec2Scope)
   404  
   405  	machinePoolScope.Info("checking for existing launch template")
   406  	launchTemplate, launchTemplateUserDataHash, err := ec2svc.GetLaunchTemplate(machinePoolScope.Name())
   407  	if err != nil {
   408  		conditions.MarkUnknown(machinePoolScope.AWSMachinePool, expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateNotFoundReason, err.Error())
   409  		return err
   410  	}
   411  
   412  	imageID, err := ec2svc.DiscoverLaunchTemplateAMI(machinePoolScope)
   413  	if err != nil {
   414  		conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateCreateFailedReason, clusterv1.ConditionSeverityError, err.Error())
   415  		return err
   416  	}
   417  
   418  	if launchTemplate == nil {
   419  		machinePoolScope.Info("no existing launch template found, creating")
   420  		launchTemplateID, err := ec2svc.CreateLaunchTemplate(machinePoolScope, imageID, bootstrapData)
   421  		if err != nil {
   422  			conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateCreateFailedReason, clusterv1.ConditionSeverityError, err.Error())
   423  			return err
   424  		}
   425  
   426  		machinePoolScope.SetLaunchTemplateIDStatus(launchTemplateID)
   427  		return machinePoolScope.PatchObject()
   428  	}
   429  
   430  	// LaunchTemplateID is set during LaunchTemplate creation, but for a scenario such as `clusterctl move`, status fields become blank.
   431  	// If launchTemplate already exists but LaunchTemplateID field in the status is empty, get the ID and update the status.
   432  	if machinePoolScope.AWSMachinePool.Status.LaunchTemplateID == "" {
   433  		launchTemplateID, err := ec2svc.GetLaunchTemplateID(machinePoolScope.Name())
   434  		if err != nil {
   435  			conditions.MarkUnknown(machinePoolScope.AWSMachinePool, expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateNotFoundReason, err.Error())
   436  			return err
   437  		}
   438  		machinePoolScope.SetLaunchTemplateIDStatus(launchTemplateID)
   439  		return machinePoolScope.PatchObject()
   440  	}
   441  
   442  	annotation, err := r.machinePoolAnnotationJSON(machinePoolScope.AWSMachinePool, TagsLastAppliedAnnotation)
   443  	if err != nil {
   444  		return err
   445  	}
   446  
   447  	// Check if the instance tags were changed. If they were, create a new LaunchTemplate.
   448  	tagsChanged, _, _, _ := tagsChanged(annotation, machinePoolScope.AdditionalTags()) // nolint:dogsled
   449  
   450  	needsUpdate, err := ec2svc.LaunchTemplateNeedsUpdate(machinePoolScope, &machinePoolScope.AWSMachinePool.Spec.AWSLaunchTemplate, launchTemplate)
   451  	if err != nil {
   452  		return err
   453  	}
   454  
   455  	// If there is a change: before changing the template, check if there exist an ongoing instance refresh,
   456  	// because only 1 instance refresh can be "InProgress". If template is updated when refresh cannot be started,
   457  	// that change will not trigger a refresh. Do not start an instance refresh if only userdata changed.
   458  	if needsUpdate || tagsChanged || *imageID != *launchTemplate.AMI.ID {
   459  		asgSvc := r.getASGService(ec2Scope)
   460  		canStart, err := asgSvc.CanStartASGInstanceRefresh(machinePoolScope)
   461  		if err != nil {
   462  			return err
   463  		}
   464  		if !canStart {
   465  			conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.InstanceRefreshStartedCondition, expinfrav1.InstanceRefreshNotReadyReason, clusterv1.ConditionSeverityWarning, "")
   466  			return errors.New("Cannot start a new instance refresh. Unfinished instance refresh exist")
   467  		}
   468  	}
   469  
   470  	// Create a new launch template version if there's a difference in configuration, tags,
   471  	// userdata, OR we've discovered a new AMI ID.
   472  	if needsUpdate || tagsChanged || *imageID != *launchTemplate.AMI.ID || launchTemplateUserDataHash != bootstrapDataHash {
   473  		machinePoolScope.Info("creating new version for launch template", "existing", launchTemplate, "incoming", machinePoolScope.AWSMachinePool.Spec.AWSLaunchTemplate)
   474  		// There is a limit to the number of Launch Template Versions.
   475  		// We ensure that the number of versions does not grow without bound by following a simple rule: Before we create a new version, we delete one old version, if there is at least one old version that is not in use.
   476  		if err := ec2svc.PruneLaunchTemplateVersions(machinePoolScope.AWSMachinePool.Status.LaunchTemplateID); err != nil {
   477  			return err
   478  		}
   479  		if err := ec2svc.CreateLaunchTemplateVersion(machinePoolScope, imageID, bootstrapData); err != nil {
   480  			return err
   481  		}
   482  	}
   483  
   484  	// After creating a new version of launch template, instance refresh is required
   485  	// to trigger a rolling replacement of all previously launched instances.
   486  	// If ONLY the userdata changed, previously launched instances continue to use the old launch
   487  	// template.
   488  	//
   489  	// FIXME(dlipovetsky,sedefsavas): If the controller terminates, or the StartASGInstanceRefresh returns an error,
   490  	// this conditional will not evaluate to true the next reconcile. If any machines use an older
   491  	// Launch Template version, and the difference between the older and current versions is _more_
   492  	// than userdata, we should start an Instance Refresh.
   493  	if needsUpdate || tagsChanged || *imageID != *launchTemplate.AMI.ID {
   494  		machinePoolScope.Info("starting instance refresh", "number of instances", machinePoolScope.MachinePool.Spec.Replicas)
   495  		asgSvc := r.getASGService(ec2Scope)
   496  		if err := asgSvc.StartASGInstanceRefresh(machinePoolScope); err != nil {
   497  			conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.InstanceRefreshStartedCondition, expinfrav1.InstanceRefreshFailedReason, clusterv1.ConditionSeverityError, err.Error())
   498  			return err
   499  		}
   500  		conditions.MarkTrue(machinePoolScope.AWSMachinePool, expinfrav1.InstanceRefreshStartedCondition)
   501  	}
   502  
   503  	return nil
   504  }
   505  
   506  func (r *AWSMachinePoolReconciler) reconcileTags(machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope) error {
   507  	ec2Svc := r.getEC2Service(ec2Scope)
   508  	asgSvc := r.getASGService(clusterScope)
   509  
   510  	launchTemplateID := machinePoolScope.AWSMachinePool.Status.LaunchTemplateID
   511  	asgName := machinePoolScope.Name()
   512  	additionalTags := machinePoolScope.AdditionalTags()
   513  
   514  	tagsChanged, err := r.ensureTags(ec2Svc, asgSvc, machinePoolScope.AWSMachinePool, &launchTemplateID, &asgName, additionalTags)
   515  	if err != nil {
   516  		return err
   517  	}
   518  	if tagsChanged {
   519  		r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeNormal, "UpdatedTags", "updated tags on resources")
   520  	}
   521  	return nil
   522  }
   523  
   524  // asgNeedsUpdates compares incoming AWSMachinePool and compares against existing ASG.
   525  func asgNeedsUpdates(machinePoolScope *scope.MachinePoolScope, existingASG *expinfrav1.AutoScalingGroup) bool {
   526  	if !scope.ReplicasExternallyManaged(machinePoolScope.MachinePool) {
   527  		if machinePoolScope.MachinePool.Spec.Replicas != nil {
   528  			if existingASG.DesiredCapacity == nil || *machinePoolScope.MachinePool.Spec.Replicas != *existingASG.DesiredCapacity {
   529  				return true
   530  			}
   531  		} else if existingASG.DesiredCapacity != nil {
   532  			return true
   533  		}
   534  	}
   535  
   536  	if machinePoolScope.AWSMachinePool.Spec.MaxSize != existingASG.MaxSize {
   537  		return true
   538  	}
   539  
   540  	if machinePoolScope.AWSMachinePool.Spec.MinSize != existingASG.MinSize {
   541  		return true
   542  	}
   543  
   544  	if machinePoolScope.AWSMachinePool.Spec.CapacityRebalance != existingASG.CapacityRebalance {
   545  		return true
   546  	}
   547  
   548  	if !cmp.Equal(machinePoolScope.AWSMachinePool.Spec.MixedInstancesPolicy, existingASG.MixedInstancesPolicy) {
   549  		machinePoolScope.Info("got a mixed diff here", "incoming", machinePoolScope.AWSMachinePool.Spec.MixedInstancesPolicy, "existing", existingASG.MixedInstancesPolicy)
   550  		return true
   551  	}
   552  
   553  	// todo subnet diff
   554  
   555  	return false
   556  }
   557  
   558  // getOwnerMachinePool returns the MachinePool object owning the current resource.
   559  func getOwnerMachinePool(ctx context.Context, c client.Client, obj metav1.ObjectMeta) (*expclusterv1.MachinePool, error) {
   560  	for _, ref := range obj.OwnerReferences {
   561  		if ref.Kind != "MachinePool" {
   562  			continue
   563  		}
   564  		gv, err := schema.ParseGroupVersion(ref.APIVersion)
   565  		if err != nil {
   566  			return nil, errors.WithStack(err)
   567  		}
   568  		if gv.Group == expclusterv1.GroupVersion.Group {
   569  			return getMachinePoolByName(ctx, c, obj.Namespace, ref.Name)
   570  		}
   571  	}
   572  	return nil, nil
   573  }
   574  
   575  // getMachinePoolByName finds and return a Machine object using the specified params.
   576  func getMachinePoolByName(ctx context.Context, c client.Client, namespace, name string) (*expclusterv1.MachinePool, error) {
   577  	m := &expclusterv1.MachinePool{}
   578  	key := client.ObjectKey{Name: name, Namespace: namespace}
   579  	if err := c.Get(ctx, key, m); err != nil {
   580  		return nil, err
   581  	}
   582  	return m, nil
   583  }
   584  
   585  func machinePoolToInfrastructureMapFunc(gvk schema.GroupVersionKind) handler.MapFunc {
   586  	return func(o client.Object) []reconcile.Request {
   587  		m, ok := o.(*expclusterv1.MachinePool)
   588  		if !ok {
   589  			panic(fmt.Sprintf("Expected a MachinePool but got a %T", o))
   590  		}
   591  
   592  		gk := gvk.GroupKind()
   593  		// Return early if the GroupKind doesn't match what we expect
   594  		infraGK := m.Spec.Template.Spec.InfrastructureRef.GroupVersionKind().GroupKind()
   595  		if gk != infraGK {
   596  			return nil
   597  		}
   598  
   599  		return []reconcile.Request{
   600  			{
   601  				NamespacedName: client.ObjectKey{
   602  					Namespace: m.Namespace,
   603  					Name:      m.Spec.Template.Spec.InfrastructureRef.Name,
   604  				},
   605  			},
   606  		}
   607  	}
   608  }
   609  
   610  func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log logr.Logger, cluster *clusterv1.Cluster, awsMachinePool *expinfrav1.AWSMachinePool) (scope.EC2Scope, error) {
   611  	var clusterScope *scope.ClusterScope
   612  	var managedControlPlaneScope *scope.ManagedControlPlaneScope
   613  	var err error
   614  
   615  	if cluster.Spec.ControlPlaneRef != nil && cluster.Spec.ControlPlaneRef.Kind == controllers.AWSManagedControlPlaneRefKind {
   616  		controlPlane := &ekscontrolplanev1.AWSManagedControlPlane{}
   617  		controlPlaneName := client.ObjectKey{
   618  			Namespace: awsMachinePool.Namespace,
   619  			Name:      cluster.Spec.ControlPlaneRef.Name,
   620  		}
   621  
   622  		if err := r.Get(ctx, controlPlaneName, controlPlane); err != nil {
   623  			// AWSManagedControlPlane is not ready
   624  			return nil, nil // nolint:nilerr
   625  		}
   626  
   627  		managedControlPlaneScope, err = scope.NewManagedControlPlaneScope(scope.ManagedControlPlaneScopeParams{
   628  			Client:         r.Client,
   629  			Logger:         &log,
   630  			Cluster:        cluster,
   631  			ControlPlane:   controlPlane,
   632  			ControllerName: "awsManagedControlPlane",
   633  		})
   634  		if err != nil {
   635  			return nil, err
   636  		}
   637  
   638  		return managedControlPlaneScope, nil
   639  	}
   640  
   641  	awsCluster := &infrav1.AWSCluster{}
   642  
   643  	infraClusterName := client.ObjectKey{
   644  		Namespace: awsMachinePool.Namespace,
   645  		Name:      cluster.Spec.InfrastructureRef.Name,
   646  	}
   647  
   648  	if err := r.Client.Get(ctx, infraClusterName, awsCluster); err != nil {
   649  		// AWSCluster is not ready
   650  		return nil, nil // nolint:nilerr
   651  	}
   652  
   653  	// Create the cluster scope
   654  	clusterScope, err = scope.NewClusterScope(scope.ClusterScopeParams{
   655  		Client:         r.Client,
   656  		Logger:         &log,
   657  		Cluster:        cluster,
   658  		AWSCluster:     awsCluster,
   659  		ControllerName: "awsmachine",
   660  	})
   661  	if err != nil {
   662  		return nil, err
   663  	}
   664  
   665  	return clusterScope, nil
   666  }