sigs.k8s.io/cluster-api-provider-aws@v1.5.5/exp/controllers/awsmachinepool_controller.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package controllers 18 19 import ( 20 "context" 21 "fmt" 22 23 "github.com/go-logr/logr" 24 "github.com/google/go-cmp/cmp" 25 "github.com/pkg/errors" 26 corev1 "k8s.io/api/core/v1" 27 apierrors "k8s.io/apimachinery/pkg/api/errors" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/apimachinery/pkg/runtime/schema" 30 "k8s.io/client-go/tools/record" 31 ctrl "sigs.k8s.io/controller-runtime" 32 "sigs.k8s.io/controller-runtime/pkg/client" 33 "sigs.k8s.io/controller-runtime/pkg/controller" 34 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 35 "sigs.k8s.io/controller-runtime/pkg/handler" 36 "sigs.k8s.io/controller-runtime/pkg/reconcile" 37 "sigs.k8s.io/controller-runtime/pkg/source" 38 39 infrav1 "sigs.k8s.io/cluster-api-provider-aws/api/v1beta1" 40 "sigs.k8s.io/cluster-api-provider-aws/controllers" 41 ekscontrolplanev1 "sigs.k8s.io/cluster-api-provider-aws/controlplane/eks/api/v1beta1" 42 expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/exp/api/v1beta1" 43 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud" 44 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/scope" 45 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services" 46 asg "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/autoscaling" 47 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/ec2" 48 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/userdata" 49 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 50 expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" 51 "sigs.k8s.io/cluster-api/util" 52 "sigs.k8s.io/cluster-api/util/conditions" 53 "sigs.k8s.io/cluster-api/util/predicates" 54 ) 55 56 // AWSMachinePoolReconciler reconciles a AWSMachinePool object. 57 type AWSMachinePoolReconciler struct { 58 client.Client 59 Recorder record.EventRecorder 60 WatchFilterValue string 61 asgServiceFactory func(cloud.ClusterScoper) services.ASGInterface 62 ec2ServiceFactory func(scope.EC2Scope) services.EC2Interface 63 } 64 65 func (r *AWSMachinePoolReconciler) getASGService(scope cloud.ClusterScoper) services.ASGInterface { 66 if r.asgServiceFactory != nil { 67 return r.asgServiceFactory(scope) 68 } 69 return asg.NewService(scope) 70 } 71 72 func (r *AWSMachinePoolReconciler) getEC2Service(scope scope.EC2Scope) services.EC2Interface { 73 if r.ec2ServiceFactory != nil { 74 return r.ec2ServiceFactory(scope) 75 } 76 77 return ec2.NewService(scope) 78 } 79 80 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachinepools,verbs=get;list;watch;create;update;patch;delete 81 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachinepools/status,verbs=get;update;patch 82 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch;patch 83 // +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch 84 // +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch 85 // +kubebuilder:rbac:groups="",resources=namespaces,verbs=get;list;watch 86 87 // Reconcile is the reconciliation loop for AWSMachinePool. 88 func (r *AWSMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { 89 log := ctrl.LoggerFrom(ctx) 90 91 // Fetch the AWSMachinePool . 92 awsMachinePool := &expinfrav1.AWSMachinePool{} 93 err := r.Get(ctx, req.NamespacedName, awsMachinePool) 94 if err != nil { 95 if apierrors.IsNotFound(err) { 96 return ctrl.Result{}, nil 97 } 98 return ctrl.Result{}, err 99 } 100 101 // Fetch the CAPI MachinePool 102 machinePool, err := getOwnerMachinePool(ctx, r.Client, awsMachinePool.ObjectMeta) 103 if err != nil { 104 return reconcile.Result{}, err 105 } 106 if machinePool == nil { 107 log.Info("MachinePool Controller has not yet set OwnerRef") 108 return reconcile.Result{}, nil 109 } 110 log = log.WithValues("machinePool", machinePool.Name) 111 112 // Fetch the Cluster. 113 cluster, err := util.GetClusterFromMetadata(ctx, r.Client, machinePool.ObjectMeta) 114 if err != nil { 115 log.Info("MachinePool is missing cluster label or cluster does not exist") 116 return reconcile.Result{}, nil 117 } 118 119 log = log.WithValues("cluster", cluster.Name) 120 121 infraCluster, err := r.getInfraCluster(ctx, log, cluster, awsMachinePool) 122 if err != nil { 123 return ctrl.Result{}, errors.New("error getting infra provider cluster or control plane object") 124 } 125 if infraCluster == nil { 126 log.Info("AWSCluster or AWSManagedControlPlane is not ready yet") 127 return ctrl.Result{}, nil 128 } 129 130 // Create the machine pool scope 131 machinePoolScope, err := scope.NewMachinePoolScope(scope.MachinePoolScopeParams{ 132 Client: r.Client, 133 Cluster: cluster, 134 MachinePool: machinePool, 135 InfraCluster: infraCluster, 136 AWSMachinePool: awsMachinePool, 137 }) 138 if err != nil { 139 log.Error(err, "failed to create scope") 140 return ctrl.Result{}, err 141 } 142 143 // Always close the scope when exiting this function so we can persist any AWSMachine changes. 144 defer func() { 145 // set Ready condition before AWSMachinePool is patched 146 conditions.SetSummary(machinePoolScope.AWSMachinePool, 147 conditions.WithConditions( 148 expinfrav1.ASGReadyCondition, 149 expinfrav1.LaunchTemplateReadyCondition, 150 ), 151 conditions.WithStepCounterIfOnly( 152 expinfrav1.ASGReadyCondition, 153 expinfrav1.LaunchTemplateReadyCondition, 154 ), 155 ) 156 157 if err := machinePoolScope.Close(); err != nil && reterr == nil { 158 reterr = err 159 } 160 }() 161 162 switch infraScope := infraCluster.(type) { 163 case *scope.ManagedControlPlaneScope: 164 if !awsMachinePool.ObjectMeta.DeletionTimestamp.IsZero() { 165 return r.reconcileDelete(machinePoolScope, infraScope, infraScope) 166 } 167 168 return r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope) 169 case *scope.ClusterScope: 170 if !awsMachinePool.ObjectMeta.DeletionTimestamp.IsZero() { 171 return r.reconcileDelete(machinePoolScope, infraScope, infraScope) 172 } 173 174 return r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope) 175 default: 176 return ctrl.Result{}, errors.New("infraCluster has unknown type") 177 } 178 } 179 180 func (r *AWSMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { 181 return ctrl.NewControllerManagedBy(mgr). 182 WithOptions(options). 183 For(&expinfrav1.AWSMachinePool{}). 184 Watches( 185 &source.Kind{Type: &expclusterv1.MachinePool{}}, 186 handler.EnqueueRequestsFromMapFunc(machinePoolToInfrastructureMapFunc(expinfrav1.GroupVersion.WithKind("AWSMachinePool"))), 187 ). 188 WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)). 189 Complete(r) 190 } 191 192 func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope) (ctrl.Result, error) { 193 clusterScope.Info("Reconciling AWSMachinePool") 194 195 // If the AWSMachine is in an error state, return early. 196 if machinePoolScope.HasFailed() { 197 machinePoolScope.Info("Error state detected, skipping reconciliation") 198 199 // TODO: If we are in a failed state, delete the secret regardless of instance state 200 201 return ctrl.Result{}, nil 202 } 203 204 // If the AWSMachinepool doesn't have our finalizer, add it 205 controllerutil.AddFinalizer(machinePoolScope.AWSMachinePool, expinfrav1.MachinePoolFinalizer) 206 207 // Register finalizer immediately to avoid orphaning AWS resources 208 if err := machinePoolScope.PatchObject(); err != nil { 209 return ctrl.Result{}, err 210 } 211 212 if !machinePoolScope.Cluster.Status.InfrastructureReady { 213 machinePoolScope.Info("Cluster infrastructure is not ready yet") 214 conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, infrav1.WaitingForClusterInfrastructureReason, clusterv1.ConditionSeverityInfo, "") 215 return ctrl.Result{}, nil 216 } 217 218 // Make sure bootstrap data is available and populated 219 if machinePoolScope.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { 220 machinePoolScope.Info("Bootstrap data secret reference is not yet available") 221 conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, infrav1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "") 222 return ctrl.Result{}, nil 223 } 224 225 if err := r.reconcileLaunchTemplate(machinePoolScope, ec2Scope); err != nil { 226 r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "FailedLaunchTemplateReconcile", "Failed to reconcile launch template: %v", err) 227 machinePoolScope.Error(err, "failed to reconcile launch template") 228 return ctrl.Result{}, err 229 } 230 231 // set the LaunchTemplateReady condition 232 conditions.MarkTrue(machinePoolScope.AWSMachinePool, expinfrav1.LaunchTemplateReadyCondition) 233 234 // Initialize ASG client 235 asgsvc := r.getASGService(clusterScope) 236 237 // Find existing ASG 238 asg, err := r.findASG(machinePoolScope, asgsvc) 239 if err != nil { 240 conditions.MarkUnknown(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, expinfrav1.ASGNotFoundReason, err.Error()) 241 return ctrl.Result{}, err 242 } 243 244 if asg == nil { 245 // Create new ASG 246 if _, err := r.createPool(machinePoolScope, clusterScope); err != nil { 247 conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, expinfrav1.ASGProvisionFailedReason, clusterv1.ConditionSeverityError, err.Error()) 248 return ctrl.Result{}, err 249 } 250 return ctrl.Result{}, nil 251 } 252 253 if scope.ReplicasExternallyManaged(machinePoolScope.MachinePool) { 254 // Set MachinePool replicas to the ASG DesiredCapacity 255 if *machinePoolScope.MachinePool.Spec.Replicas != *asg.DesiredCapacity { 256 machinePoolScope.Info("Setting MachinePool replicas to ASG DesiredCapacity", 257 "local", machinePoolScope.MachinePool.Spec.Replicas, 258 "external", asg.DesiredCapacity) 259 machinePoolScope.MachinePool.Spec.Replicas = asg.DesiredCapacity 260 if err := machinePoolScope.PatchCAPIMachinePoolObject(ctx); err != nil { 261 return ctrl.Result{}, err 262 } 263 } 264 } 265 266 if err := r.updatePool(machinePoolScope, clusterScope, asg); err != nil { 267 machinePoolScope.Error(err, "error updating AWSMachinePool") 268 return ctrl.Result{}, err 269 } 270 271 err = r.reconcileTags(machinePoolScope, clusterScope, ec2Scope) 272 if err != nil { 273 return ctrl.Result{}, errors.Wrap(err, "error updating tags") 274 } 275 276 // Make sure Spec.ProviderID is always set. 277 machinePoolScope.AWSMachinePool.Spec.ProviderID = asg.ID 278 providerIDList := make([]string, len(asg.Instances)) 279 280 for i, ec2 := range asg.Instances { 281 providerIDList[i] = fmt.Sprintf("aws:///%s/%s", ec2.AvailabilityZone, ec2.ID) 282 } 283 284 machinePoolScope.SetAnnotation("cluster-api-provider-aws", "true") 285 286 machinePoolScope.AWSMachinePool.Spec.ProviderIDList = providerIDList 287 machinePoolScope.AWSMachinePool.Status.Replicas = int32(len(providerIDList)) 288 machinePoolScope.AWSMachinePool.Status.Ready = true 289 conditions.MarkTrue(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition) 290 291 err = machinePoolScope.UpdateInstanceStatuses(ctx, asg.Instances) 292 if err != nil { 293 machinePoolScope.Info("Failed updating instances", "instances", asg.Instances) 294 } 295 296 return ctrl.Result{}, nil 297 } 298 299 func (r *AWSMachinePoolReconciler) reconcileDelete(machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope) (ctrl.Result, error) { 300 clusterScope.Info("Handling deleted AWSMachinePool") 301 302 ec2Svc := r.getEC2Service(ec2Scope) 303 asgSvc := r.getASGService(clusterScope) 304 305 asg, err := r.findASG(machinePoolScope, asgSvc) 306 if err != nil { 307 return ctrl.Result{}, err 308 } 309 310 if asg == nil { 311 machinePoolScope.V(2).Info("Unable to locate ASG") 312 r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeNormal, "NoASGFound", "Unable to find matching ASG") 313 } else { 314 machinePoolScope.SetASGStatus(asg.Status) 315 switch asg.Status { 316 case expinfrav1.ASGStatusDeleteInProgress: 317 // ASG is already deleting 318 machinePoolScope.SetNotReady() 319 conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, expinfrav1.ASGDeletionInProgress, clusterv1.ConditionSeverityWarning, "") 320 r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "DeletionInProgress", "ASG deletion in progress: %q", asg.Name) 321 machinePoolScope.Info("ASG is already deleting", "name", asg.Name) 322 default: 323 machinePoolScope.Info("Deleting ASG", "id", asg.Name, "status", asg.Status) 324 if err := asgSvc.DeleteASGAndWait(asg.Name); err != nil { 325 r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "FailedDelete", "Failed to delete ASG %q: %v", asg.Name, err) 326 return ctrl.Result{}, errors.Wrap(err, "failed to delete ASG") 327 } 328 } 329 } 330 331 launchTemplateID := machinePoolScope.AWSMachinePool.Status.LaunchTemplateID 332 launchTemplate, _, err := ec2Svc.GetLaunchTemplate(machinePoolScope.Name()) 333 if err != nil { 334 return ctrl.Result{}, err 335 } 336 337 if launchTemplate == nil { 338 machinePoolScope.V(2).Info("Unable to locate launch template") 339 r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeNormal, "NoASGFound", "Unable to find matching ASG") 340 controllerutil.RemoveFinalizer(machinePoolScope.AWSMachinePool, expinfrav1.MachinePoolFinalizer) 341 return ctrl.Result{}, nil 342 } 343 344 machinePoolScope.Info("deleting launch template", "name", launchTemplate.Name) 345 if err := ec2Svc.DeleteLaunchTemplate(launchTemplateID); err != nil { 346 r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "FailedDelete", "Failed to delete launch template %q: %v", launchTemplate.Name, err) 347 return ctrl.Result{}, errors.Wrap(err, "failed to delete ASG") 348 } 349 350 machinePoolScope.Info("successfully deleted AutoScalingGroup and Launch Template") 351 352 // remove finalizer 353 controllerutil.RemoveFinalizer(machinePoolScope.AWSMachinePool, expinfrav1.MachinePoolFinalizer) 354 355 return ctrl.Result{}, nil 356 } 357 358 func (r *AWSMachinePoolReconciler) updatePool(machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, existingASG *expinfrav1.AutoScalingGroup) error { 359 if asgNeedsUpdates(machinePoolScope, existingASG) { 360 machinePoolScope.Info("updating AutoScalingGroup") 361 asgSvc := r.getASGService(clusterScope) 362 363 if err := asgSvc.UpdateASG(machinePoolScope); err != nil { 364 r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "FailedUpdate", "Failed to update ASG: %v", err) 365 return errors.Wrap(err, "unable to update ASG") 366 } 367 } 368 369 return nil 370 } 371 372 func (r *AWSMachinePoolReconciler) createPool(machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper) (*expinfrav1.AutoScalingGroup, error) { 373 clusterScope.Info("Initializing ASG client") 374 375 asgsvc := r.getASGService(clusterScope) 376 377 machinePoolScope.Info("Creating Autoscaling Group") 378 asg, err := asgsvc.CreateASG(machinePoolScope) 379 if err != nil { 380 return nil, errors.Wrapf(err, "failed to create AWSMachinePool") 381 } 382 383 return asg, nil 384 } 385 386 func (r *AWSMachinePoolReconciler) findASG(machinePoolScope *scope.MachinePoolScope, asgsvc services.ASGInterface) (*expinfrav1.AutoScalingGroup, error) { 387 // Query the instance using tags. 388 asg, err := asgsvc.GetASGByName(machinePoolScope) 389 if err != nil { 390 return nil, errors.Wrapf(err, "failed to query AWSMachinePool by name") 391 } 392 393 return asg, nil 394 } 395 396 func (r *AWSMachinePoolReconciler) reconcileLaunchTemplate(machinePoolScope *scope.MachinePoolScope, ec2Scope scope.EC2Scope) error { 397 bootstrapData, err := machinePoolScope.GetRawBootstrapData() 398 if err != nil { 399 r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "FailedGetBootstrapData", err.Error()) 400 } 401 bootstrapDataHash := userdata.ComputeHash(bootstrapData) 402 403 ec2svc := r.getEC2Service(ec2Scope) 404 405 machinePoolScope.Info("checking for existing launch template") 406 launchTemplate, launchTemplateUserDataHash, err := ec2svc.GetLaunchTemplate(machinePoolScope.Name()) 407 if err != nil { 408 conditions.MarkUnknown(machinePoolScope.AWSMachinePool, expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateNotFoundReason, err.Error()) 409 return err 410 } 411 412 imageID, err := ec2svc.DiscoverLaunchTemplateAMI(machinePoolScope) 413 if err != nil { 414 conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateCreateFailedReason, clusterv1.ConditionSeverityError, err.Error()) 415 return err 416 } 417 418 if launchTemplate == nil { 419 machinePoolScope.Info("no existing launch template found, creating") 420 launchTemplateID, err := ec2svc.CreateLaunchTemplate(machinePoolScope, imageID, bootstrapData) 421 if err != nil { 422 conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateCreateFailedReason, clusterv1.ConditionSeverityError, err.Error()) 423 return err 424 } 425 426 machinePoolScope.SetLaunchTemplateIDStatus(launchTemplateID) 427 return machinePoolScope.PatchObject() 428 } 429 430 // LaunchTemplateID is set during LaunchTemplate creation, but for a scenario such as `clusterctl move`, status fields become blank. 431 // If launchTemplate already exists but LaunchTemplateID field in the status is empty, get the ID and update the status. 432 if machinePoolScope.AWSMachinePool.Status.LaunchTemplateID == "" { 433 launchTemplateID, err := ec2svc.GetLaunchTemplateID(machinePoolScope.Name()) 434 if err != nil { 435 conditions.MarkUnknown(machinePoolScope.AWSMachinePool, expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateNotFoundReason, err.Error()) 436 return err 437 } 438 machinePoolScope.SetLaunchTemplateIDStatus(launchTemplateID) 439 return machinePoolScope.PatchObject() 440 } 441 442 annotation, err := r.machinePoolAnnotationJSON(machinePoolScope.AWSMachinePool, TagsLastAppliedAnnotation) 443 if err != nil { 444 return err 445 } 446 447 // Check if the instance tags were changed. If they were, create a new LaunchTemplate. 448 tagsChanged, _, _, _ := tagsChanged(annotation, machinePoolScope.AdditionalTags()) // nolint:dogsled 449 450 needsUpdate, err := ec2svc.LaunchTemplateNeedsUpdate(machinePoolScope, &machinePoolScope.AWSMachinePool.Spec.AWSLaunchTemplate, launchTemplate) 451 if err != nil { 452 return err 453 } 454 455 // If there is a change: before changing the template, check if there exist an ongoing instance refresh, 456 // because only 1 instance refresh can be "InProgress". If template is updated when refresh cannot be started, 457 // that change will not trigger a refresh. Do not start an instance refresh if only userdata changed. 458 if needsUpdate || tagsChanged || *imageID != *launchTemplate.AMI.ID { 459 asgSvc := r.getASGService(ec2Scope) 460 canStart, err := asgSvc.CanStartASGInstanceRefresh(machinePoolScope) 461 if err != nil { 462 return err 463 } 464 if !canStart { 465 conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.InstanceRefreshStartedCondition, expinfrav1.InstanceRefreshNotReadyReason, clusterv1.ConditionSeverityWarning, "") 466 return errors.New("Cannot start a new instance refresh. Unfinished instance refresh exist") 467 } 468 } 469 470 // Create a new launch template version if there's a difference in configuration, tags, 471 // userdata, OR we've discovered a new AMI ID. 472 if needsUpdate || tagsChanged || *imageID != *launchTemplate.AMI.ID || launchTemplateUserDataHash != bootstrapDataHash { 473 machinePoolScope.Info("creating new version for launch template", "existing", launchTemplate, "incoming", machinePoolScope.AWSMachinePool.Spec.AWSLaunchTemplate) 474 // There is a limit to the number of Launch Template Versions. 475 // We ensure that the number of versions does not grow without bound by following a simple rule: Before we create a new version, we delete one old version, if there is at least one old version that is not in use. 476 if err := ec2svc.PruneLaunchTemplateVersions(machinePoolScope.AWSMachinePool.Status.LaunchTemplateID); err != nil { 477 return err 478 } 479 if err := ec2svc.CreateLaunchTemplateVersion(machinePoolScope, imageID, bootstrapData); err != nil { 480 return err 481 } 482 } 483 484 // After creating a new version of launch template, instance refresh is required 485 // to trigger a rolling replacement of all previously launched instances. 486 // If ONLY the userdata changed, previously launched instances continue to use the old launch 487 // template. 488 // 489 // FIXME(dlipovetsky,sedefsavas): If the controller terminates, or the StartASGInstanceRefresh returns an error, 490 // this conditional will not evaluate to true the next reconcile. If any machines use an older 491 // Launch Template version, and the difference between the older and current versions is _more_ 492 // than userdata, we should start an Instance Refresh. 493 if needsUpdate || tagsChanged || *imageID != *launchTemplate.AMI.ID { 494 machinePoolScope.Info("starting instance refresh", "number of instances", machinePoolScope.MachinePool.Spec.Replicas) 495 asgSvc := r.getASGService(ec2Scope) 496 if err := asgSvc.StartASGInstanceRefresh(machinePoolScope); err != nil { 497 conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.InstanceRefreshStartedCondition, expinfrav1.InstanceRefreshFailedReason, clusterv1.ConditionSeverityError, err.Error()) 498 return err 499 } 500 conditions.MarkTrue(machinePoolScope.AWSMachinePool, expinfrav1.InstanceRefreshStartedCondition) 501 } 502 503 return nil 504 } 505 506 func (r *AWSMachinePoolReconciler) reconcileTags(machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope) error { 507 ec2Svc := r.getEC2Service(ec2Scope) 508 asgSvc := r.getASGService(clusterScope) 509 510 launchTemplateID := machinePoolScope.AWSMachinePool.Status.LaunchTemplateID 511 asgName := machinePoolScope.Name() 512 additionalTags := machinePoolScope.AdditionalTags() 513 514 tagsChanged, err := r.ensureTags(ec2Svc, asgSvc, machinePoolScope.AWSMachinePool, &launchTemplateID, &asgName, additionalTags) 515 if err != nil { 516 return err 517 } 518 if tagsChanged { 519 r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeNormal, "UpdatedTags", "updated tags on resources") 520 } 521 return nil 522 } 523 524 // asgNeedsUpdates compares incoming AWSMachinePool and compares against existing ASG. 525 func asgNeedsUpdates(machinePoolScope *scope.MachinePoolScope, existingASG *expinfrav1.AutoScalingGroup) bool { 526 if !scope.ReplicasExternallyManaged(machinePoolScope.MachinePool) { 527 if machinePoolScope.MachinePool.Spec.Replicas != nil { 528 if existingASG.DesiredCapacity == nil || *machinePoolScope.MachinePool.Spec.Replicas != *existingASG.DesiredCapacity { 529 return true 530 } 531 } else if existingASG.DesiredCapacity != nil { 532 return true 533 } 534 } 535 536 if machinePoolScope.AWSMachinePool.Spec.MaxSize != existingASG.MaxSize { 537 return true 538 } 539 540 if machinePoolScope.AWSMachinePool.Spec.MinSize != existingASG.MinSize { 541 return true 542 } 543 544 if machinePoolScope.AWSMachinePool.Spec.CapacityRebalance != existingASG.CapacityRebalance { 545 return true 546 } 547 548 if !cmp.Equal(machinePoolScope.AWSMachinePool.Spec.MixedInstancesPolicy, existingASG.MixedInstancesPolicy) { 549 machinePoolScope.Info("got a mixed diff here", "incoming", machinePoolScope.AWSMachinePool.Spec.MixedInstancesPolicy, "existing", existingASG.MixedInstancesPolicy) 550 return true 551 } 552 553 // todo subnet diff 554 555 return false 556 } 557 558 // getOwnerMachinePool returns the MachinePool object owning the current resource. 559 func getOwnerMachinePool(ctx context.Context, c client.Client, obj metav1.ObjectMeta) (*expclusterv1.MachinePool, error) { 560 for _, ref := range obj.OwnerReferences { 561 if ref.Kind != "MachinePool" { 562 continue 563 } 564 gv, err := schema.ParseGroupVersion(ref.APIVersion) 565 if err != nil { 566 return nil, errors.WithStack(err) 567 } 568 if gv.Group == expclusterv1.GroupVersion.Group { 569 return getMachinePoolByName(ctx, c, obj.Namespace, ref.Name) 570 } 571 } 572 return nil, nil 573 } 574 575 // getMachinePoolByName finds and return a Machine object using the specified params. 576 func getMachinePoolByName(ctx context.Context, c client.Client, namespace, name string) (*expclusterv1.MachinePool, error) { 577 m := &expclusterv1.MachinePool{} 578 key := client.ObjectKey{Name: name, Namespace: namespace} 579 if err := c.Get(ctx, key, m); err != nil { 580 return nil, err 581 } 582 return m, nil 583 } 584 585 func machinePoolToInfrastructureMapFunc(gvk schema.GroupVersionKind) handler.MapFunc { 586 return func(o client.Object) []reconcile.Request { 587 m, ok := o.(*expclusterv1.MachinePool) 588 if !ok { 589 panic(fmt.Sprintf("Expected a MachinePool but got a %T", o)) 590 } 591 592 gk := gvk.GroupKind() 593 // Return early if the GroupKind doesn't match what we expect 594 infraGK := m.Spec.Template.Spec.InfrastructureRef.GroupVersionKind().GroupKind() 595 if gk != infraGK { 596 return nil 597 } 598 599 return []reconcile.Request{ 600 { 601 NamespacedName: client.ObjectKey{ 602 Namespace: m.Namespace, 603 Name: m.Spec.Template.Spec.InfrastructureRef.Name, 604 }, 605 }, 606 } 607 } 608 } 609 610 func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log logr.Logger, cluster *clusterv1.Cluster, awsMachinePool *expinfrav1.AWSMachinePool) (scope.EC2Scope, error) { 611 var clusterScope *scope.ClusterScope 612 var managedControlPlaneScope *scope.ManagedControlPlaneScope 613 var err error 614 615 if cluster.Spec.ControlPlaneRef != nil && cluster.Spec.ControlPlaneRef.Kind == controllers.AWSManagedControlPlaneRefKind { 616 controlPlane := &ekscontrolplanev1.AWSManagedControlPlane{} 617 controlPlaneName := client.ObjectKey{ 618 Namespace: awsMachinePool.Namespace, 619 Name: cluster.Spec.ControlPlaneRef.Name, 620 } 621 622 if err := r.Get(ctx, controlPlaneName, controlPlane); err != nil { 623 // AWSManagedControlPlane is not ready 624 return nil, nil // nolint:nilerr 625 } 626 627 managedControlPlaneScope, err = scope.NewManagedControlPlaneScope(scope.ManagedControlPlaneScopeParams{ 628 Client: r.Client, 629 Logger: &log, 630 Cluster: cluster, 631 ControlPlane: controlPlane, 632 ControllerName: "awsManagedControlPlane", 633 }) 634 if err != nil { 635 return nil, err 636 } 637 638 return managedControlPlaneScope, nil 639 } 640 641 awsCluster := &infrav1.AWSCluster{} 642 643 infraClusterName := client.ObjectKey{ 644 Namespace: awsMachinePool.Namespace, 645 Name: cluster.Spec.InfrastructureRef.Name, 646 } 647 648 if err := r.Client.Get(ctx, infraClusterName, awsCluster); err != nil { 649 // AWSCluster is not ready 650 return nil, nil // nolint:nilerr 651 } 652 653 // Create the cluster scope 654 clusterScope, err = scope.NewClusterScope(scope.ClusterScopeParams{ 655 Client: r.Client, 656 Logger: &log, 657 Cluster: cluster, 658 AWSCluster: awsCluster, 659 ControllerName: "awsmachine", 660 }) 661 if err != nil { 662 return nil, err 663 } 664 665 return clusterScope, nil 666 }