sigs.k8s.io/cluster-api@v1.6.3/internal/controllers/machineset/machineset_controller.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package machineset
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"strings"
    23  	"time"
    24  
    25  	"github.com/pkg/errors"
    26  	corev1 "k8s.io/api/core/v1"
    27  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    30  	"k8s.io/apimachinery/pkg/labels"
    31  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    32  	"k8s.io/apimachinery/pkg/util/wait"
    33  	"k8s.io/apiserver/pkg/storage/names"
    34  	"k8s.io/client-go/tools/record"
    35  	"k8s.io/klog/v2"
    36  	ctrl "sigs.k8s.io/controller-runtime"
    37  	"sigs.k8s.io/controller-runtime/pkg/builder"
    38  	"sigs.k8s.io/controller-runtime/pkg/client"
    39  	"sigs.k8s.io/controller-runtime/pkg/controller"
    40  	"sigs.k8s.io/controller-runtime/pkg/handler"
    41  
    42  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    43  	"sigs.k8s.io/cluster-api/controllers/external"
    44  	"sigs.k8s.io/cluster-api/controllers/noderefutil"
    45  	"sigs.k8s.io/cluster-api/controllers/remote"
    46  	"sigs.k8s.io/cluster-api/internal/contract"
    47  	"sigs.k8s.io/cluster-api/internal/controllers/machine"
    48  	"sigs.k8s.io/cluster-api/internal/util/ssa"
    49  	"sigs.k8s.io/cluster-api/util"
    50  	"sigs.k8s.io/cluster-api/util/annotations"
    51  	"sigs.k8s.io/cluster-api/util/collections"
    52  	"sigs.k8s.io/cluster-api/util/conditions"
    53  	utilconversion "sigs.k8s.io/cluster-api/util/conversion"
    54  	"sigs.k8s.io/cluster-api/util/labels/format"
    55  	clog "sigs.k8s.io/cluster-api/util/log"
    56  	"sigs.k8s.io/cluster-api/util/patch"
    57  	"sigs.k8s.io/cluster-api/util/predicates"
    58  )
    59  
    60  var (
    61  	// machineSetKind contains the schema.GroupVersionKind for the MachineSet type.
    62  	machineSetKind = clusterv1.GroupVersion.WithKind("MachineSet")
    63  
    64  	// stateConfirmationTimeout is the amount of time allowed to wait for desired state.
    65  	stateConfirmationTimeout = 10 * time.Second
    66  
    67  	// stateConfirmationInterval is the amount of time between polling for the desired state.
    68  	// The polling is against a local memory cache.
    69  	stateConfirmationInterval = 100 * time.Millisecond
    70  )
    71  
    72  const machineSetManagerName = "capi-machineset"
    73  
    74  // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch
    75  // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch
    76  // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete
    77  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io;bootstrap.cluster.x-k8s.io,resources=*,verbs=get;list;watch;create;update;patch;delete
    78  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinesets;machinesets/status;machinesets/finalizers,verbs=get;list;watch;create;update;patch;delete
    79  
    80  // Reconciler reconciles a MachineSet object.
    81  type Reconciler struct {
    82  	Client                    client.Client
    83  	UnstructuredCachingClient client.Client
    84  	APIReader                 client.Reader
    85  	Tracker                   *remote.ClusterCacheTracker
    86  
    87  	// WatchFilterValue is the label value used to filter events prior to reconciliation.
    88  	WatchFilterValue string
    89  
    90  	ssaCache ssa.Cache
    91  	recorder record.EventRecorder
    92  }
    93  
    94  func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
    95  	clusterToMachineSets, err := util.ClusterToTypedObjectsMapper(mgr.GetClient(), &clusterv1.MachineSetList{}, mgr.GetScheme())
    96  	if err != nil {
    97  		return err
    98  	}
    99  
   100  	err = ctrl.NewControllerManagedBy(mgr).
   101  		For(&clusterv1.MachineSet{}).
   102  		Owns(&clusterv1.Machine{}).
   103  		Watches(
   104  			&clusterv1.Machine{},
   105  			handler.EnqueueRequestsFromMapFunc(r.MachineToMachineSets),
   106  		).
   107  		WithOptions(options).
   108  		WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)).
   109  		Watches(
   110  			&clusterv1.Cluster{},
   111  			handler.EnqueueRequestsFromMapFunc(clusterToMachineSets),
   112  			builder.WithPredicates(
   113  				// TODO: should this wait for Cluster.Status.InfrastructureReady similar to Infra Machine resources?
   114  				predicates.All(ctrl.LoggerFrom(ctx),
   115  					predicates.ClusterUnpaused(ctrl.LoggerFrom(ctx)),
   116  					predicates.ResourceHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue),
   117  				),
   118  			),
   119  		).Complete(r)
   120  	if err != nil {
   121  		return errors.Wrap(err, "failed setting up with a controller manager")
   122  	}
   123  
   124  	r.recorder = mgr.GetEventRecorderFor("machineset-controller")
   125  	r.ssaCache = ssa.NewCache()
   126  	return nil
   127  }
   128  
   129  func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
   130  	machineSet := &clusterv1.MachineSet{}
   131  	if err := r.Client.Get(ctx, req.NamespacedName, machineSet); err != nil {
   132  		if apierrors.IsNotFound(err) {
   133  			// Object not found, return. Created objects are automatically garbage collected.
   134  			// For additional cleanup logic use finalizers.
   135  			return ctrl.Result{}, nil
   136  		}
   137  		// Error reading the object - requeue the request.
   138  		return ctrl.Result{}, err
   139  	}
   140  
   141  	// AddOwners adds the owners of MachineSet as k/v pairs to the logger.
   142  	// Specifically, it will add MachineDeployment.
   143  	ctx, log, err := clog.AddOwners(ctx, r.Client, machineSet)
   144  	if err != nil {
   145  		return ctrl.Result{}, err
   146  	}
   147  
   148  	log = log.WithValues("Cluster", klog.KRef(machineSet.ObjectMeta.Namespace, machineSet.Spec.ClusterName))
   149  	ctx = ctrl.LoggerInto(ctx, log)
   150  
   151  	cluster, err := util.GetClusterByName(ctx, r.Client, machineSet.ObjectMeta.Namespace, machineSet.Spec.ClusterName)
   152  	if err != nil {
   153  		return ctrl.Result{}, err
   154  	}
   155  
   156  	// Return early if the object or Cluster is paused.
   157  	if annotations.IsPaused(cluster, machineSet) {
   158  		log.Info("Reconciliation is paused for this object")
   159  		return ctrl.Result{}, nil
   160  	}
   161  
   162  	// Initialize the patch helper
   163  	patchHelper, err := patch.NewHelper(machineSet, r.Client)
   164  	if err != nil {
   165  		return ctrl.Result{}, err
   166  	}
   167  
   168  	defer func() {
   169  		// Always attempt to patch the object and status after each reconciliation.
   170  		if err := patchMachineSet(ctx, patchHelper, machineSet); err != nil {
   171  			reterr = kerrors.NewAggregate([]error{reterr, err})
   172  		}
   173  	}()
   174  
   175  	// Ignore deleted MachineSets, this can happen when foregroundDeletion
   176  	// is enabled
   177  	if !machineSet.DeletionTimestamp.IsZero() {
   178  		return ctrl.Result{}, nil
   179  	}
   180  
   181  	result, err := r.reconcile(ctx, cluster, machineSet)
   182  	if err != nil {
   183  		// Requeue if the reconcile failed because the ClusterCacheTracker was locked for
   184  		// the current cluster because of concurrent access.
   185  		if errors.Is(err, remote.ErrClusterLocked) {
   186  			log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker")
   187  			return ctrl.Result{Requeue: true}, nil
   188  		}
   189  		r.recorder.Eventf(machineSet, corev1.EventTypeWarning, "ReconcileError", "%v", err)
   190  	}
   191  	return result, err
   192  }
   193  
   194  func patchMachineSet(ctx context.Context, patchHelper *patch.Helper, machineSet *clusterv1.MachineSet, options ...patch.Option) error {
   195  	// Always update the readyCondition by summarizing the state of other conditions.
   196  	conditions.SetSummary(machineSet,
   197  		conditions.WithConditions(
   198  			clusterv1.MachinesCreatedCondition,
   199  			clusterv1.ResizedCondition,
   200  			clusterv1.MachinesReadyCondition,
   201  		),
   202  	)
   203  
   204  	// Patch the object, ignoring conflicts on the conditions owned by this controller.
   205  	options = append(options,
   206  		patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
   207  			clusterv1.ReadyCondition,
   208  			clusterv1.MachinesCreatedCondition,
   209  			clusterv1.ResizedCondition,
   210  			clusterv1.MachinesReadyCondition,
   211  		}},
   212  	)
   213  	return patchHelper.Patch(ctx, machineSet, options...)
   214  }
   215  
   216  func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, machineSet *clusterv1.MachineSet) (ctrl.Result, error) {
   217  	log := ctrl.LoggerFrom(ctx)
   218  
   219  	// Reconcile and retrieve the Cluster object.
   220  	if machineSet.Labels == nil {
   221  		machineSet.Labels = make(map[string]string)
   222  	}
   223  	machineSet.Labels[clusterv1.ClusterNameLabel] = machineSet.Spec.ClusterName
   224  
   225  	// If the machine set is a stand alone one, meaning not originated from a MachineDeployment, then set it as directly
   226  	// owned by the Cluster (if not already present).
   227  	if r.shouldAdopt(machineSet) {
   228  		machineSet.SetOwnerReferences(util.EnsureOwnerRef(machineSet.GetOwnerReferences(), metav1.OwnerReference{
   229  			APIVersion: clusterv1.GroupVersion.String(),
   230  			Kind:       "Cluster",
   231  			Name:       cluster.Name,
   232  			UID:        cluster.UID,
   233  		}))
   234  	}
   235  
   236  	// Make sure to reconcile the external infrastructure reference.
   237  	if err := reconcileExternalTemplateReference(ctx, r.UnstructuredCachingClient, cluster, &machineSet.Spec.Template.Spec.InfrastructureRef); err != nil {
   238  		return ctrl.Result{}, err
   239  	}
   240  	// Make sure to reconcile the external bootstrap reference, if any.
   241  	if machineSet.Spec.Template.Spec.Bootstrap.ConfigRef != nil {
   242  		if err := reconcileExternalTemplateReference(ctx, r.UnstructuredCachingClient, cluster, machineSet.Spec.Template.Spec.Bootstrap.ConfigRef); err != nil {
   243  			return ctrl.Result{}, err
   244  		}
   245  	}
   246  
   247  	// Make sure selector and template to be in the same cluster.
   248  	if machineSet.Spec.Selector.MatchLabels == nil {
   249  		machineSet.Spec.Selector.MatchLabels = make(map[string]string)
   250  	}
   251  
   252  	if machineSet.Spec.Template.Labels == nil {
   253  		machineSet.Spec.Template.Labels = make(map[string]string)
   254  	}
   255  
   256  	machineSet.Spec.Selector.MatchLabels[clusterv1.ClusterNameLabel] = machineSet.Spec.ClusterName
   257  	machineSet.Spec.Template.Labels[clusterv1.ClusterNameLabel] = machineSet.Spec.ClusterName
   258  
   259  	selectorMap, err := metav1.LabelSelectorAsMap(&machineSet.Spec.Selector)
   260  	if err != nil {
   261  		return ctrl.Result{}, errors.Wrapf(err, "failed to convert MachineSet %q label selector to a map", machineSet.Name)
   262  	}
   263  
   264  	// Get all Machines linked to this MachineSet.
   265  	allMachines := &clusterv1.MachineList{}
   266  	err = r.Client.List(ctx,
   267  		allMachines,
   268  		client.InNamespace(machineSet.Namespace),
   269  		client.MatchingLabels(selectorMap),
   270  	)
   271  	if err != nil {
   272  		return ctrl.Result{}, errors.Wrap(err, "failed to list machines")
   273  	}
   274  
   275  	// Filter out irrelevant machines (i.e. IsControlledBy something else) and claim orphaned machines.
   276  	// Machines in deleted state are deliberately not excluded https://github.com/kubernetes-sigs/cluster-api/pull/3434.
   277  	filteredMachines := make([]*clusterv1.Machine, 0, len(allMachines.Items))
   278  	for idx := range allMachines.Items {
   279  		machine := &allMachines.Items[idx]
   280  		log := log.WithValues("Machine", klog.KObj(machine))
   281  		if shouldExcludeMachine(machineSet, machine) {
   282  			continue
   283  		}
   284  
   285  		// Attempt to adopt machine if it meets previous conditions and it has no controller references.
   286  		if metav1.GetControllerOf(machine) == nil {
   287  			if err := r.adoptOrphan(ctx, machineSet, machine); err != nil {
   288  				log.Error(err, "Failed to adopt Machine")
   289  				r.recorder.Eventf(machineSet, corev1.EventTypeWarning, "FailedAdopt", "Failed to adopt Machine %q: %v", machine.Name, err)
   290  				continue
   291  			}
   292  			log.Info("Adopted Machine")
   293  			r.recorder.Eventf(machineSet, corev1.EventTypeNormal, "SuccessfulAdopt", "Adopted Machine %q", machine.Name)
   294  		}
   295  
   296  		filteredMachines = append(filteredMachines, machine)
   297  	}
   298  
   299  	result := ctrl.Result{}
   300  
   301  	reconcileUnhealthyMachinesResult, err := r.reconcileUnhealthyMachines(ctx, cluster, machineSet, filteredMachines)
   302  	if err != nil {
   303  		return ctrl.Result{}, errors.Wrap(err, "failed to reconcile unhealthy machines")
   304  	}
   305  	result = util.LowestNonZeroResult(result, reconcileUnhealthyMachinesResult)
   306  
   307  	if err := r.syncMachines(ctx, machineSet, filteredMachines); err != nil {
   308  		return ctrl.Result{}, errors.Wrap(err, "failed to update Machines")
   309  	}
   310  
   311  	syncReplicasResult, syncErr := r.syncReplicas(ctx, cluster, machineSet, filteredMachines)
   312  	result = util.LowestNonZeroResult(result, syncReplicasResult)
   313  
   314  	// Always updates status as machines come up or die.
   315  	if err := r.updateStatus(ctx, cluster, machineSet, filteredMachines); err != nil {
   316  		return ctrl.Result{}, errors.Wrapf(kerrors.NewAggregate([]error{err, syncErr}), "failed to update MachineSet's Status")
   317  	}
   318  
   319  	if syncErr != nil {
   320  		return ctrl.Result{}, errors.Wrapf(syncErr, "failed to sync MachineSet replicas")
   321  	}
   322  
   323  	var replicas int32
   324  	if machineSet.Spec.Replicas != nil {
   325  		replicas = *machineSet.Spec.Replicas
   326  	}
   327  
   328  	// Resync the MachineSet after MinReadySeconds as a last line of defense to guard against clock-skew.
   329  	// Clock-skew is an issue as it may impact whether an available replica is counted as a ready replica.
   330  	// A replica is available if the amount of time since last transition exceeds MinReadySeconds.
   331  	// If there was a clock skew, checking whether the amount of time since last transition to ready state
   332  	// exceeds MinReadySeconds could be incorrect.
   333  	// To avoid an available replica stuck in the ready state, we force a reconcile after MinReadySeconds,
   334  	// at which point it should confirm any available replica to be available.
   335  	if machineSet.Spec.MinReadySeconds > 0 &&
   336  		machineSet.Status.ReadyReplicas == replicas &&
   337  		machineSet.Status.AvailableReplicas != replicas {
   338  		minReadyResult := ctrl.Result{RequeueAfter: time.Duration(machineSet.Spec.MinReadySeconds) * time.Second}
   339  		result = util.LowestNonZeroResult(result, minReadyResult)
   340  		return result, nil
   341  	}
   342  
   343  	// Quickly reconcile until the nodes become Ready.
   344  	if machineSet.Status.ReadyReplicas != replicas {
   345  		result = util.LowestNonZeroResult(result, ctrl.Result{RequeueAfter: 15 * time.Second})
   346  		return result, nil
   347  	}
   348  
   349  	return result, nil
   350  }
   351  
   352  // syncMachines updates Machines, InfrastructureMachine and BootstrapConfig to propagate in-place mutable fields
   353  // from the MachineSet.
   354  // Note: It also cleans up managed fields of all Machines so that Machines that were
   355  // created/patched before (< v1.4.0) the controller adopted Server-Side-Apply (SSA) can also work with SSA.
   356  // Note: For InfrastructureMachines and BootstrapConfigs it also drops ownership of "metadata.labels" and
   357  // "metadata.annotations" from "manager" so that "capi-machineset" can own these fields and can work with SSA.
   358  // Otherwise fields would be co-owned by our "old" "manager" and "capi-machineset" and then we would not be
   359  // able to e.g. drop labels and annotations.
   360  func (r *Reconciler) syncMachines(ctx context.Context, machineSet *clusterv1.MachineSet, machines []*clusterv1.Machine) error {
   361  	log := ctrl.LoggerFrom(ctx)
   362  	for i := range machines {
   363  		m := machines[i]
   364  		// If the machine is already being deleted, we don't need to update it.
   365  		if !m.DeletionTimestamp.IsZero() {
   366  			continue
   367  		}
   368  
   369  		// Cleanup managed fields of all Machines.
   370  		// We do this so that Machines that were created/patched before the controller adopted Server-Side-Apply (SSA)
   371  		// (< v1.4.0) can also work with SSA. Otherwise, fields would be co-owned by our "old" "manager" and
   372  		// "capi-machineset" and then we would not be able to e.g. drop labels and annotations.
   373  		if err := ssa.CleanUpManagedFieldsForSSAAdoption(ctx, r.Client, m, machineSetManagerName); err != nil {
   374  			return errors.Wrapf(err, "failed to update machine: failed to adjust the managedFields of the Machine %q", m.Name)
   375  		}
   376  
   377  		// Update Machine to propagate in-place mutable fields from the MachineSet.
   378  		updatedMachine := r.computeDesiredMachine(machineSet, m)
   379  		err := ssa.Patch(ctx, r.Client, machineSetManagerName, updatedMachine, ssa.WithCachingProxy{Cache: r.ssaCache, Original: m})
   380  		if err != nil {
   381  			log.Error(err, "failed to update Machine", "Machine", klog.KObj(updatedMachine))
   382  			return errors.Wrapf(err, "failed to update Machine %q", klog.KObj(updatedMachine))
   383  		}
   384  		machines[i] = updatedMachine
   385  
   386  		infraMachine, err := external.Get(ctx, r.UnstructuredCachingClient, &updatedMachine.Spec.InfrastructureRef, updatedMachine.Namespace)
   387  		if err != nil {
   388  			return errors.Wrapf(err, "failed to get InfrastructureMachine %s",
   389  				klog.KRef(updatedMachine.Spec.InfrastructureRef.Namespace, updatedMachine.Spec.InfrastructureRef.Name))
   390  		}
   391  		// Cleanup managed fields of all InfrastructureMachines to drop ownership of labels and annotations
   392  		// from "manager". We do this so that InfrastructureMachines that are created using the Create method
   393  		// can also work with SSA. Otherwise, labels and annotations would be co-owned by our "old" "manager"
   394  		// and "capi-machineset" and then we would not be able to e.g. drop labels and annotations.
   395  		labelsAndAnnotationsManagedFieldPaths := []contract.Path{
   396  			{"f:metadata", "f:annotations"},
   397  			{"f:metadata", "f:labels"},
   398  		}
   399  		if err := ssa.DropManagedFields(ctx, r.Client, infraMachine, machineSetManagerName, labelsAndAnnotationsManagedFieldPaths); err != nil {
   400  			return errors.Wrapf(err, "failed to update machine: failed to adjust the managedFields of the InfrastructureMachine %s", klog.KObj(infraMachine))
   401  		}
   402  		// Update in-place mutating fields on InfrastructureMachine.
   403  		if err := r.updateExternalObject(ctx, infraMachine, machineSet); err != nil {
   404  			return errors.Wrapf(err, "failed to update InfrastructureMachine %s", klog.KObj(infraMachine))
   405  		}
   406  
   407  		if updatedMachine.Spec.Bootstrap.ConfigRef != nil {
   408  			bootstrapConfig, err := external.Get(ctx, r.UnstructuredCachingClient, updatedMachine.Spec.Bootstrap.ConfigRef, updatedMachine.Namespace)
   409  			if err != nil {
   410  				return errors.Wrapf(err, "failed to get BootstrapConfig %s",
   411  					klog.KRef(updatedMachine.Spec.Bootstrap.ConfigRef.Namespace, updatedMachine.Spec.Bootstrap.ConfigRef.Name))
   412  			}
   413  			// Cleanup managed fields of all BootstrapConfigs to drop ownership of labels and annotations
   414  			// from "manager". We do this so that BootstrapConfigs that are created using the Create method
   415  			// can also work with SSA. Otherwise, labels and annotations would be co-owned by our "old" "manager"
   416  			// and "capi-machineset" and then we would not be able to e.g. drop labels and annotations.
   417  			if err := ssa.DropManagedFields(ctx, r.Client, bootstrapConfig, machineSetManagerName, labelsAndAnnotationsManagedFieldPaths); err != nil {
   418  				return errors.Wrapf(err, "failed to update machine: failed to adjust the managedFields of the BootstrapConfig %s", klog.KObj(bootstrapConfig))
   419  			}
   420  			// Update in-place mutating fields on BootstrapConfig.
   421  			if err := r.updateExternalObject(ctx, bootstrapConfig, machineSet); err != nil {
   422  				return errors.Wrapf(err, "failed to update BootstrapConfig %s", klog.KObj(bootstrapConfig))
   423  			}
   424  		}
   425  	}
   426  	return nil
   427  }
   428  
   429  // syncReplicas scales Machine resources up or down.
   430  func (r *Reconciler) syncReplicas(ctx context.Context, cluster *clusterv1.Cluster, ms *clusterv1.MachineSet, machines []*clusterv1.Machine) (ctrl.Result, error) {
   431  	log := ctrl.LoggerFrom(ctx)
   432  	if ms.Spec.Replicas == nil {
   433  		return ctrl.Result{}, errors.Errorf("the Replicas field in Spec for machineset %v is nil, this should not be allowed", ms.Name)
   434  	}
   435  	diff := len(machines) - int(*(ms.Spec.Replicas))
   436  	switch {
   437  	case diff < 0:
   438  		diff *= -1
   439  		log.Info(fmt.Sprintf("MachineSet is scaling up to %d replicas by creating %d machines", *(ms.Spec.Replicas), diff), "replicas", *(ms.Spec.Replicas), "machineCount", len(machines))
   440  		if ms.Annotations != nil {
   441  			if _, ok := ms.Annotations[clusterv1.DisableMachineCreateAnnotation]; ok {
   442  				log.Info("Automatic creation of new machines disabled for machine set")
   443  				return ctrl.Result{}, nil
   444  			}
   445  		}
   446  
   447  		result, preflightCheckErrMessage, err := r.runPreflightChecks(ctx, cluster, ms, "Scale up")
   448  		if err != nil || !result.IsZero() {
   449  			if err != nil {
   450  				// If the error is not nil use that as the message for the condition.
   451  				preflightCheckErrMessage = err.Error()
   452  			}
   453  			conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.PreflightCheckFailedReason, clusterv1.ConditionSeverityError, preflightCheckErrMessage)
   454  			return result, err
   455  		}
   456  
   457  		var (
   458  			machineList []*clusterv1.Machine
   459  			errs        []error
   460  		)
   461  
   462  		for i := 0; i < diff; i++ {
   463  			// Create a new logger so the global logger is not modified.
   464  			log := log
   465  			machine := r.computeDesiredMachine(ms, nil)
   466  			// Clone and set the infrastructure and bootstrap references.
   467  			var (
   468  				infraRef, bootstrapRef *corev1.ObjectReference
   469  				err                    error
   470  			)
   471  
   472  			// Create the BootstrapConfig if necessary.
   473  			if ms.Spec.Template.Spec.Bootstrap.ConfigRef != nil {
   474  				bootstrapRef, err = external.CreateFromTemplate(ctx, &external.CreateFromTemplateInput{
   475  					Client:      r.UnstructuredCachingClient,
   476  					TemplateRef: ms.Spec.Template.Spec.Bootstrap.ConfigRef,
   477  					Namespace:   machine.Namespace,
   478  					ClusterName: machine.Spec.ClusterName,
   479  					Labels:      machine.Labels,
   480  					Annotations: machine.Annotations,
   481  					OwnerRef: &metav1.OwnerReference{
   482  						APIVersion: clusterv1.GroupVersion.String(),
   483  						Kind:       "MachineSet",
   484  						Name:       ms.Name,
   485  						UID:        ms.UID,
   486  					},
   487  				})
   488  				if err != nil {
   489  					conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.BootstrapTemplateCloningFailedReason, clusterv1.ConditionSeverityError, err.Error())
   490  					return ctrl.Result{}, errors.Wrapf(err, "failed to clone bootstrap configuration from %s %s while creating a machine",
   491  						ms.Spec.Template.Spec.Bootstrap.ConfigRef.Kind,
   492  						klog.KRef(ms.Spec.Template.Spec.Bootstrap.ConfigRef.Namespace, ms.Spec.Template.Spec.Bootstrap.ConfigRef.Name))
   493  				}
   494  				machine.Spec.Bootstrap.ConfigRef = bootstrapRef
   495  				log = log.WithValues(bootstrapRef.Kind, klog.KRef(bootstrapRef.Namespace, bootstrapRef.Name))
   496  			}
   497  
   498  			// Create the InfraMachine.
   499  			infraRef, err = external.CreateFromTemplate(ctx, &external.CreateFromTemplateInput{
   500  				Client:      r.UnstructuredCachingClient,
   501  				TemplateRef: &ms.Spec.Template.Spec.InfrastructureRef,
   502  				Namespace:   machine.Namespace,
   503  				ClusterName: machine.Spec.ClusterName,
   504  				Labels:      machine.Labels,
   505  				Annotations: machine.Annotations,
   506  				OwnerRef: &metav1.OwnerReference{
   507  					APIVersion: clusterv1.GroupVersion.String(),
   508  					Kind:       "MachineSet",
   509  					Name:       ms.Name,
   510  					UID:        ms.UID,
   511  				},
   512  			})
   513  			if err != nil {
   514  				conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.InfrastructureTemplateCloningFailedReason, clusterv1.ConditionSeverityError, err.Error())
   515  				return ctrl.Result{}, errors.Wrapf(err, "failed to clone infrastructure machine from %s %s while creating a machine",
   516  					ms.Spec.Template.Spec.InfrastructureRef.Kind,
   517  					klog.KRef(ms.Spec.Template.Spec.InfrastructureRef.Namespace, ms.Spec.Template.Spec.InfrastructureRef.Name))
   518  			}
   519  			log = log.WithValues(infraRef.Kind, klog.KRef(infraRef.Namespace, infraRef.Name))
   520  			machine.Spec.InfrastructureRef = *infraRef
   521  
   522  			// Create the Machine.
   523  			if err := ssa.Patch(ctx, r.Client, machineSetManagerName, machine); err != nil {
   524  				log.Error(err, "Error while creating a machine")
   525  				r.recorder.Eventf(ms, corev1.EventTypeWarning, "FailedCreate", "Failed to create machine: %v", err)
   526  				errs = append(errs, err)
   527  				conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.MachineCreationFailedReason,
   528  					clusterv1.ConditionSeverityError, err.Error())
   529  
   530  				// Try to cleanup the external objects if the Machine creation failed.
   531  				if err := r.Client.Delete(ctx, util.ObjectReferenceToUnstructured(*infraRef)); !apierrors.IsNotFound(err) {
   532  					log.Error(err, "Failed to cleanup infrastructure machine object after Machine creation error", infraRef.Kind, klog.KRef(infraRef.Namespace, infraRef.Name))
   533  				}
   534  				if bootstrapRef != nil {
   535  					if err := r.Client.Delete(ctx, util.ObjectReferenceToUnstructured(*bootstrapRef)); !apierrors.IsNotFound(err) {
   536  						log.Error(err, "Failed to cleanup bootstrap configuration object after Machine creation error", bootstrapRef.Kind, klog.KRef(bootstrapRef.Namespace, bootstrapRef.Name))
   537  					}
   538  				}
   539  				continue
   540  			}
   541  
   542  			log.Info(fmt.Sprintf("Created machine %d of %d", i+1, diff), "Machine", klog.KObj(machine))
   543  			r.recorder.Eventf(ms, corev1.EventTypeNormal, "SuccessfulCreate", "Created machine %q", machine.Name)
   544  			machineList = append(machineList, machine)
   545  		}
   546  
   547  		if len(errs) > 0 {
   548  			return ctrl.Result{}, kerrors.NewAggregate(errs)
   549  		}
   550  		return ctrl.Result{}, r.waitForMachineCreation(ctx, machineList)
   551  	case diff > 0:
   552  		log.Info(fmt.Sprintf("MachineSet is scaling down to %d replicas by deleting %d machines", *(ms.Spec.Replicas), diff), "replicas", *(ms.Spec.Replicas), "machineCount", len(machines), "deletePolicy", ms.Spec.DeletePolicy)
   553  
   554  		deletePriorityFunc, err := getDeletePriorityFunc(ms)
   555  		if err != nil {
   556  			return ctrl.Result{}, err
   557  		}
   558  
   559  		var errs []error
   560  		machinesToDelete := getMachinesToDeletePrioritized(machines, diff, deletePriorityFunc)
   561  		for i, machine := range machinesToDelete {
   562  			log := log.WithValues("Machine", klog.KObj(machine))
   563  			if machine.GetDeletionTimestamp().IsZero() {
   564  				log.Info(fmt.Sprintf("Deleting machine %d of %d", i+1, diff))
   565  				if err := r.Client.Delete(ctx, machine); err != nil {
   566  					log.Error(err, "Unable to delete Machine")
   567  					r.recorder.Eventf(ms, corev1.EventTypeWarning, "FailedDelete", "Failed to delete machine %q: %v", machine.Name, err)
   568  					errs = append(errs, err)
   569  					continue
   570  				}
   571  				r.recorder.Eventf(ms, corev1.EventTypeNormal, "SuccessfulDelete", "Deleted machine %q", machine.Name)
   572  			} else {
   573  				log.Info(fmt.Sprintf("Waiting for machine %d of %d to be deleted", i+1, diff))
   574  			}
   575  		}
   576  
   577  		if len(errs) > 0 {
   578  			return ctrl.Result{}, kerrors.NewAggregate(errs)
   579  		}
   580  		return ctrl.Result{}, r.waitForMachineDeletion(ctx, machinesToDelete)
   581  	}
   582  
   583  	return ctrl.Result{}, nil
   584  }
   585  
   586  // computeDesiredMachine computes the desired Machine.
   587  // This Machine will be used during reconciliation to:
   588  // * create a Machine
   589  // * update an existing Machine
   590  // Because we are using Server-Side-Apply we always have to calculate the full object.
   591  // There are small differences in how we calculate the Machine depending on if it
   592  // is a create or update. Example: for a new Machine we have to calculate a new name,
   593  // while for an existing Machine we have to use the name of the existing Machine.
   594  func (r *Reconciler) computeDesiredMachine(machineSet *clusterv1.MachineSet, existingMachine *clusterv1.Machine) *clusterv1.Machine {
   595  	desiredMachine := &clusterv1.Machine{
   596  		TypeMeta: metav1.TypeMeta{
   597  			APIVersion: clusterv1.GroupVersion.String(),
   598  			Kind:       "Machine",
   599  		},
   600  		ObjectMeta: metav1.ObjectMeta{
   601  			Name:      names.SimpleNameGenerator.GenerateName(fmt.Sprintf("%s-", machineSet.Name)),
   602  			Namespace: machineSet.Namespace,
   603  			// Note: By setting the ownerRef on creation we signal to the Machine controller that this is not a stand-alone Machine.
   604  			OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(machineSet, machineSetKind)},
   605  			Labels:          map[string]string{},
   606  			Annotations:     map[string]string{},
   607  			Finalizers:      []string{clusterv1.MachineFinalizer},
   608  		},
   609  		Spec: *machineSet.Spec.Template.Spec.DeepCopy(),
   610  	}
   611  	// Set ClusterName.
   612  	desiredMachine.Spec.ClusterName = machineSet.Spec.ClusterName
   613  
   614  	// Clean up the refs to the incorrect objects.
   615  	// The InfrastructureRef and the Bootstrap.ConfigRef in Machine should point to the InfrastructureMachine
   616  	// and the BootstrapConfig objects. In the MachineSet these values point to InfrastructureMachineTemplate
   617  	// BootstrapConfigTemplate. Drop the values that were copied over from MachineSet during DeepCopy
   618  	// to make sure to not point to incorrect refs.
   619  	// Note: During Machine creation, these refs will be updated with the correct values after the corresponding
   620  	// objects are created.
   621  	desiredMachine.Spec.InfrastructureRef = corev1.ObjectReference{}
   622  	desiredMachine.Spec.Bootstrap.ConfigRef = nil
   623  
   624  	// If we are updating an existing Machine reuse the name, uid, infrastructureRef and bootstrap.configRef
   625  	// from the existingMachine.
   626  	// Note: we use UID to force SSA to update the existing Machine and to not accidentally create a new Machine.
   627  	// infrastructureRef and bootstrap.configRef remain the same for an existing Machine.
   628  	if existingMachine != nil {
   629  		desiredMachine.SetName(existingMachine.Name)
   630  		desiredMachine.SetUID(existingMachine.UID)
   631  		desiredMachine.Spec.Bootstrap.ConfigRef = existingMachine.Spec.Bootstrap.ConfigRef
   632  		desiredMachine.Spec.InfrastructureRef = existingMachine.Spec.InfrastructureRef
   633  	}
   634  
   635  	// Set the in-place mutable fields.
   636  	// When we create a new Machine we will just create the Machine with those fields.
   637  	// When we update an existing Machine will we update the fields on the existing Machine (in-place mutate).
   638  
   639  	// Set Labels
   640  	desiredMachine.Labels = machineLabelsFromMachineSet(machineSet)
   641  
   642  	// Set Annotations
   643  	desiredMachine.Annotations = machineAnnotationsFromMachineSet(machineSet)
   644  
   645  	// Set all other in-place mutable fields.
   646  	desiredMachine.Spec.NodeDrainTimeout = machineSet.Spec.Template.Spec.NodeDrainTimeout
   647  	desiredMachine.Spec.NodeDeletionTimeout = machineSet.Spec.Template.Spec.NodeDeletionTimeout
   648  	desiredMachine.Spec.NodeVolumeDetachTimeout = machineSet.Spec.Template.Spec.NodeVolumeDetachTimeout
   649  
   650  	return desiredMachine
   651  }
   652  
   653  // updateExternalObject updates the external object passed in with the
   654  // updated labels and annotations from the MachineSet.
   655  func (r *Reconciler) updateExternalObject(ctx context.Context, obj client.Object, machineSet *clusterv1.MachineSet) error {
   656  	updatedObject := &unstructured.Unstructured{}
   657  	updatedObject.SetGroupVersionKind(obj.GetObjectKind().GroupVersionKind())
   658  	updatedObject.SetNamespace(obj.GetNamespace())
   659  	updatedObject.SetName(obj.GetName())
   660  	// Set the UID to ensure that Server-Side-Apply only performs an update
   661  	// and does not perform an accidental create.
   662  	updatedObject.SetUID(obj.GetUID())
   663  
   664  	updatedObject.SetLabels(machineLabelsFromMachineSet(machineSet))
   665  	updatedObject.SetAnnotations(machineAnnotationsFromMachineSet(machineSet))
   666  
   667  	if err := ssa.Patch(ctx, r.Client, machineSetManagerName, updatedObject, ssa.WithCachingProxy{Cache: r.ssaCache, Original: obj}); err != nil {
   668  		return errors.Wrapf(err, "failed to update %s", klog.KObj(obj))
   669  	}
   670  	return nil
   671  }
   672  
   673  // machineLabelsFromMachineSet computes the labels the Machine created from this MachineSet should have.
   674  func machineLabelsFromMachineSet(machineSet *clusterv1.MachineSet) map[string]string {
   675  	machineLabels := map[string]string{}
   676  	// Note: We can't just set `machineSet.Spec.Template.Labels` directly and thus "share" the labels
   677  	// map between Machine and machineSet.Spec.Template.Labels. This would mean that adding the
   678  	// MachineSetNameLabel and MachineDeploymentNameLabel later on the Machine would also add the labels
   679  	// to machineSet.Spec.Template.Labels and thus modify the labels of the MachineSet.
   680  	for k, v := range machineSet.Spec.Template.Labels {
   681  		machineLabels[k] = v
   682  	}
   683  	// Always set the MachineSetNameLabel.
   684  	// Note: If a client tries to create a MachineSet without a selector, the MachineSet webhook
   685  	// will add this label automatically. But we want this label to always be present even if the MachineSet
   686  	// has a selector which doesn't include it. Therefore, we have to set it here explicitly.
   687  	machineLabels[clusterv1.MachineSetNameLabel] = format.MustFormatValue(machineSet.Name)
   688  	// Propagate the MachineDeploymentNameLabel from MachineSet to Machine if it exists.
   689  	if mdName, ok := machineSet.Labels[clusterv1.MachineDeploymentNameLabel]; ok {
   690  		machineLabels[clusterv1.MachineDeploymentNameLabel] = mdName
   691  	}
   692  	return machineLabels
   693  }
   694  
   695  // machineAnnotationsFromMachineSet computes the annotations the Machine created from this MachineSet should have.
   696  func machineAnnotationsFromMachineSet(machineSet *clusterv1.MachineSet) map[string]string {
   697  	annotations := map[string]string{}
   698  	for k, v := range machineSet.Spec.Template.Annotations {
   699  		annotations[k] = v
   700  	}
   701  	return annotations
   702  }
   703  
   704  // shouldExcludeMachine returns true if the machine should be filtered out, false otherwise.
   705  func shouldExcludeMachine(machineSet *clusterv1.MachineSet, machine *clusterv1.Machine) bool {
   706  	if metav1.GetControllerOf(machine) != nil && !metav1.IsControlledBy(machine, machineSet) {
   707  		return true
   708  	}
   709  
   710  	return false
   711  }
   712  
   713  // adoptOrphan sets the MachineSet as a controller OwnerReference to the Machine.
   714  func (r *Reconciler) adoptOrphan(ctx context.Context, machineSet *clusterv1.MachineSet, machine *clusterv1.Machine) error {
   715  	patch := client.MergeFrom(machine.DeepCopy())
   716  	newRef := *metav1.NewControllerRef(machineSet, machineSetKind)
   717  	machine.SetOwnerReferences(util.EnsureOwnerRef(machine.GetOwnerReferences(), newRef))
   718  	return r.Client.Patch(ctx, machine, patch)
   719  }
   720  
   721  func (r *Reconciler) waitForMachineCreation(ctx context.Context, machineList []*clusterv1.Machine) error {
   722  	log := ctrl.LoggerFrom(ctx)
   723  
   724  	for i := 0; i < len(machineList); i++ {
   725  		machine := machineList[i]
   726  		pollErr := wait.PollUntilContextTimeout(ctx, stateConfirmationInterval, stateConfirmationTimeout, true, func(ctx context.Context) (bool, error) {
   727  			key := client.ObjectKey{Namespace: machine.Namespace, Name: machine.Name}
   728  			if err := r.Client.Get(ctx, key, &clusterv1.Machine{}); err != nil {
   729  				if apierrors.IsNotFound(err) {
   730  					return false, nil
   731  				}
   732  				return false, err
   733  			}
   734  
   735  			return true, nil
   736  		})
   737  
   738  		if pollErr != nil {
   739  			log.Error(pollErr, "Failed waiting for machine object to be created")
   740  			return errors.Wrap(pollErr, "failed waiting for machine object to be created")
   741  		}
   742  	}
   743  
   744  	return nil
   745  }
   746  
   747  func (r *Reconciler) waitForMachineDeletion(ctx context.Context, machineList []*clusterv1.Machine) error {
   748  	log := ctrl.LoggerFrom(ctx)
   749  
   750  	for i := 0; i < len(machineList); i++ {
   751  		machine := machineList[i]
   752  		pollErr := wait.PollUntilContextTimeout(ctx, stateConfirmationInterval, stateConfirmationTimeout, true, func(ctx context.Context) (bool, error) {
   753  			m := &clusterv1.Machine{}
   754  			key := client.ObjectKey{Namespace: machine.Namespace, Name: machine.Name}
   755  			err := r.Client.Get(ctx, key, m)
   756  			if apierrors.IsNotFound(err) || !m.DeletionTimestamp.IsZero() {
   757  				return true, nil
   758  			}
   759  			return false, err
   760  		})
   761  
   762  		if pollErr != nil {
   763  			log.Error(pollErr, "Failed waiting for machine object to be deleted")
   764  			return errors.Wrap(pollErr, "failed waiting for machine object to be deleted")
   765  		}
   766  	}
   767  	return nil
   768  }
   769  
   770  // MachineToMachineSets is a handler.ToRequestsFunc to be used to enqueue requests for reconciliation
   771  // for MachineSets that might adopt an orphaned Machine.
   772  func (r *Reconciler) MachineToMachineSets(ctx context.Context, o client.Object) []ctrl.Request {
   773  	result := []ctrl.Request{}
   774  
   775  	m, ok := o.(*clusterv1.Machine)
   776  	if !ok {
   777  		panic(fmt.Sprintf("Expected a Machine but got a %T", o))
   778  	}
   779  
   780  	log := ctrl.LoggerFrom(ctx, "Machine", klog.KObj(m))
   781  
   782  	// Check if the controller reference is already set and
   783  	// return an empty result when one is found.
   784  	for _, ref := range m.ObjectMeta.GetOwnerReferences() {
   785  		if ref.Controller != nil && *ref.Controller {
   786  			return result
   787  		}
   788  	}
   789  
   790  	mss, err := r.getMachineSetsForMachine(ctx, m)
   791  	if err != nil {
   792  		log.Error(err, "Failed getting MachineSets for Machine")
   793  		return nil
   794  	}
   795  	if len(mss) == 0 {
   796  		return nil
   797  	}
   798  
   799  	for _, ms := range mss {
   800  		name := client.ObjectKey{Namespace: ms.Namespace, Name: ms.Name}
   801  		result = append(result, ctrl.Request{NamespacedName: name})
   802  	}
   803  
   804  	return result
   805  }
   806  
   807  func (r *Reconciler) getMachineSetsForMachine(ctx context.Context, m *clusterv1.Machine) ([]*clusterv1.MachineSet, error) {
   808  	if len(m.Labels) == 0 {
   809  		return nil, fmt.Errorf("machine %v has no labels, this is unexpected", client.ObjectKeyFromObject(m))
   810  	}
   811  
   812  	msList := &clusterv1.MachineSetList{}
   813  	if err := r.Client.List(ctx, msList, client.InNamespace(m.Namespace)); err != nil {
   814  		return nil, errors.Wrapf(err, "failed to list MachineSets")
   815  	}
   816  
   817  	var mss []*clusterv1.MachineSet
   818  	for idx := range msList.Items {
   819  		ms := &msList.Items[idx]
   820  		if machine.HasMatchingLabels(ms.Spec.Selector, m.Labels) {
   821  			mss = append(mss, ms)
   822  		}
   823  	}
   824  
   825  	return mss, nil
   826  }
   827  
   828  // shouldAdopt returns true if the MachineSet should be adopted as a stand-alone MachineSet directly owned by the Cluster.
   829  func (r *Reconciler) shouldAdopt(ms *clusterv1.MachineSet) bool {
   830  	// if the MachineSet is controlled by a MachineDeployment, or if it is a stand-alone MachinesSet directly owned by the Cluster, then no-op.
   831  	if util.HasOwner(ms.GetOwnerReferences(), clusterv1.GroupVersion.String(), []string{"MachineDeployment", "Cluster"}) {
   832  		return false
   833  	}
   834  
   835  	// If the MachineSet is originated by a MachineDeployment object, it should not be adopted directly by the Cluster as a stand-alone MachineSet.
   836  	// Note: this is required because after restore from a backup both the MachineSet controller and the
   837  	// MachineDeployment controller are racing to adopt MachineSets, see https://github.com/kubernetes-sigs/cluster-api/issues/7529
   838  	if _, ok := ms.Labels[clusterv1.MachineDeploymentNameLabel]; ok {
   839  		return false
   840  	}
   841  	return true
   842  }
   843  
   844  // updateStatus updates the Status field for the MachineSet
   845  // It checks for the current state of the replicas and updates the Status of the MachineSet.
   846  func (r *Reconciler) updateStatus(ctx context.Context, cluster *clusterv1.Cluster, ms *clusterv1.MachineSet, filteredMachines []*clusterv1.Machine) error {
   847  	log := ctrl.LoggerFrom(ctx)
   848  	newStatus := ms.Status.DeepCopy()
   849  
   850  	// Copy label selector to its status counterpart in string format.
   851  	// This is necessary for CRDs including scale subresources.
   852  	selector, err := metav1.LabelSelectorAsSelector(&ms.Spec.Selector)
   853  	if err != nil {
   854  		return errors.Wrapf(err, "failed to update status for MachineSet %s/%s", ms.Namespace, ms.Name)
   855  	}
   856  	newStatus.Selector = selector.String()
   857  
   858  	// Count the number of machines that have labels matching the labels of the machine
   859  	// template of the replica set, the matching machines may have more
   860  	// labels than are in the template. Because the label of machineTemplateSpec is
   861  	// a superset of the selector of the replica set, so the possible
   862  	// matching machines must be part of the filteredMachines.
   863  	fullyLabeledReplicasCount := 0
   864  	readyReplicasCount := 0
   865  	availableReplicasCount := 0
   866  	desiredReplicas := *ms.Spec.Replicas
   867  	templateLabel := labels.Set(ms.Spec.Template.Labels).AsSelectorPreValidated()
   868  
   869  	for _, machine := range filteredMachines {
   870  		log := log.WithValues("Machine", klog.KObj(machine))
   871  
   872  		if templateLabel.Matches(labels.Set(machine.Labels)) {
   873  			fullyLabeledReplicasCount++
   874  		}
   875  
   876  		if machine.Status.NodeRef == nil {
   877  			log.V(4).Info("Waiting for the machine controller to set status.NodeRef on the Machine")
   878  			continue
   879  		}
   880  
   881  		node, err := r.getMachineNode(ctx, cluster, machine)
   882  		if err != nil && machine.GetDeletionTimestamp().IsZero() {
   883  			log.Error(err, "Unable to retrieve Node status", "node", klog.KObj(node))
   884  			continue
   885  		}
   886  
   887  		if noderefutil.IsNodeReady(node) {
   888  			readyReplicasCount++
   889  			if noderefutil.IsNodeAvailable(node, ms.Spec.MinReadySeconds, metav1.Now()) {
   890  				availableReplicasCount++
   891  			}
   892  		} else if machine.GetDeletionTimestamp().IsZero() {
   893  			log.V(4).Info("Waiting for the Kubernetes node on the machine to report ready state")
   894  		}
   895  	}
   896  
   897  	newStatus.Replicas = int32(len(filteredMachines))
   898  	newStatus.FullyLabeledReplicas = int32(fullyLabeledReplicasCount)
   899  	newStatus.ReadyReplicas = int32(readyReplicasCount)
   900  	newStatus.AvailableReplicas = int32(availableReplicasCount)
   901  
   902  	// Copy the newly calculated status into the machineset
   903  	if ms.Status.Replicas != newStatus.Replicas ||
   904  		ms.Status.FullyLabeledReplicas != newStatus.FullyLabeledReplicas ||
   905  		ms.Status.ReadyReplicas != newStatus.ReadyReplicas ||
   906  		ms.Status.AvailableReplicas != newStatus.AvailableReplicas ||
   907  		ms.Generation != ms.Status.ObservedGeneration {
   908  		log.V(4).Info("Updating status: " +
   909  			fmt.Sprintf("replicas %d->%d (need %d), ", ms.Status.Replicas, newStatus.Replicas, desiredReplicas) +
   910  			fmt.Sprintf("fullyLabeledReplicas %d->%d, ", ms.Status.FullyLabeledReplicas, newStatus.FullyLabeledReplicas) +
   911  			fmt.Sprintf("readyReplicas %d->%d, ", ms.Status.ReadyReplicas, newStatus.ReadyReplicas) +
   912  			fmt.Sprintf("availableReplicas %d->%d, ", ms.Status.AvailableReplicas, newStatus.AvailableReplicas) +
   913  			fmt.Sprintf("observedGeneration %v->%v", ms.Status.ObservedGeneration, ms.Generation))
   914  
   915  		// Save the generation number we acted on, otherwise we might wrongfully indicate
   916  		// that we've seen a spec update when we retry.
   917  		newStatus.ObservedGeneration = ms.Generation
   918  		newStatus.DeepCopyInto(&ms.Status)
   919  	}
   920  	switch {
   921  	// We are scaling up
   922  	case newStatus.Replicas < desiredReplicas:
   923  		conditions.MarkFalse(ms, clusterv1.ResizedCondition, clusterv1.ScalingUpReason, clusterv1.ConditionSeverityWarning, "Scaling up MachineSet to %d replicas (actual %d)", desiredReplicas, newStatus.Replicas)
   924  	// We are scaling down
   925  	case newStatus.Replicas > desiredReplicas:
   926  		conditions.MarkFalse(ms, clusterv1.ResizedCondition, clusterv1.ScalingDownReason, clusterv1.ConditionSeverityWarning, "Scaling down MachineSet to %d replicas (actual %d)", desiredReplicas, newStatus.Replicas)
   927  		// This means that there was no error in generating the desired number of machine objects
   928  		conditions.MarkTrue(ms, clusterv1.MachinesCreatedCondition)
   929  	default:
   930  		// Make sure last resize operation is marked as completed.
   931  		// NOTE: we are checking the number of machines ready so we report resize completed only when the machines
   932  		// are actually provisioned (vs reporting completed immediately after the last machine object is created). This convention is also used by KCP.
   933  		if newStatus.ReadyReplicas == newStatus.Replicas {
   934  			if conditions.IsFalse(ms, clusterv1.ResizedCondition) {
   935  				log.Info("All the replicas are ready", "replicas", newStatus.ReadyReplicas)
   936  			}
   937  			conditions.MarkTrue(ms, clusterv1.ResizedCondition)
   938  		}
   939  		// This means that there was no error in generating the desired number of machine objects
   940  		conditions.MarkTrue(ms, clusterv1.MachinesCreatedCondition)
   941  	}
   942  
   943  	// Aggregate the operational state of all the machines; while aggregating we are adding the
   944  	// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
   945  	conditions.SetAggregate(ms, clusterv1.MachinesReadyCondition, collections.FromMachines(filteredMachines...).ConditionGetters(), conditions.AddSourceRef(), conditions.WithStepCounterIf(false))
   946  
   947  	return nil
   948  }
   949  
   950  func (r *Reconciler) getMachineNode(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine) (*corev1.Node, error) {
   951  	remoteClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster))
   952  	if err != nil {
   953  		return nil, err
   954  	}
   955  	node := &corev1.Node{}
   956  	if err := remoteClient.Get(ctx, client.ObjectKey{Name: machine.Status.NodeRef.Name}, node); err != nil {
   957  		return nil, errors.Wrapf(err, "error retrieving node %s for machine %s/%s", machine.Status.NodeRef.Name, machine.Namespace, machine.Name)
   958  	}
   959  	return node, nil
   960  }
   961  
   962  func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, cluster *clusterv1.Cluster, ms *clusterv1.MachineSet, filteredMachines []*clusterv1.Machine) (ctrl.Result, error) {
   963  	log := ctrl.LoggerFrom(ctx)
   964  	// List all unhealthy machines.
   965  	machinesToRemediate := make([]*clusterv1.Machine, 0, len(filteredMachines))
   966  	for _, m := range filteredMachines {
   967  		// filteredMachines contains machines in deleting status to calculate correct status.
   968  		// skip remediation for those in deleting status.
   969  		if !m.DeletionTimestamp.IsZero() {
   970  			continue
   971  		}
   972  		if conditions.IsFalse(m, clusterv1.MachineOwnerRemediatedCondition) {
   973  			machinesToRemediate = append(machinesToRemediate, m)
   974  		}
   975  	}
   976  
   977  	// If there are no machines to remediate return early.
   978  	if len(machinesToRemediate) == 0 {
   979  		return ctrl.Result{}, nil
   980  	}
   981  
   982  	preflightChecksResult, preflightCheckErrMessage, err := r.runPreflightChecks(ctx, cluster, ms, "Machine Remediation")
   983  	if err != nil {
   984  		// If err is not nil use that as the preflightCheckErrMessage
   985  		preflightCheckErrMessage = err.Error()
   986  	}
   987  
   988  	preflightChecksFailed := err != nil || !preflightChecksResult.IsZero()
   989  	if preflightChecksFailed {
   990  		// PreflightChecks did not pass. Update the MachineOwnerRemediated condition on the unhealthy Machines with
   991  		// WaitingForRemediationReason reason.
   992  		var errs []error
   993  		for _, m := range machinesToRemediate {
   994  			patchHelper, err := patch.NewHelper(m, r.Client)
   995  			if err != nil {
   996  				errs = append(errs, errors.Wrapf(err, "failed to create patch helper for Machine %s", klog.KObj(m)))
   997  				continue
   998  			}
   999  			conditions.MarkFalse(m, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, preflightCheckErrMessage)
  1000  			if err := patchHelper.Patch(ctx, m); err != nil {
  1001  				errs = append(errs, errors.Wrapf(err, "failed to patch Machine %s", klog.KObj(m)))
  1002  			}
  1003  		}
  1004  
  1005  		if len(errs) > 0 {
  1006  			return ctrl.Result{}, errors.Wrapf(kerrors.NewAggregate(errs), "failed to patch unhealthy Machines")
  1007  		}
  1008  		return preflightChecksResult, nil
  1009  	}
  1010  
  1011  	// PreflightChecks passed, so it is safe to remediate unhealthy machines.
  1012  	// Remediate unhealthy machines by deleting them.
  1013  	var errs []error
  1014  	for _, m := range machinesToRemediate {
  1015  		log.Info(fmt.Sprintf("Deleting Machine %s because it was marked as unhealthy by the MachineHealthCheck controller", klog.KObj(m)))
  1016  		patch := client.MergeFrom(m.DeepCopy())
  1017  		if err := r.Client.Delete(ctx, m); err != nil {
  1018  			errs = append(errs, errors.Wrapf(err, "failed to delete Machine %s", klog.KObj(m)))
  1019  			continue
  1020  		}
  1021  		conditions.MarkTrue(m, clusterv1.MachineOwnerRemediatedCondition)
  1022  		if err := r.Client.Status().Patch(ctx, m, patch); err != nil && !apierrors.IsNotFound(err) {
  1023  			errs = append(errs, errors.Wrapf(err, "failed to update status of Machine %s", klog.KObj(m)))
  1024  		}
  1025  	}
  1026  
  1027  	if len(errs) > 0 {
  1028  		return ctrl.Result{}, errors.Wrapf(kerrors.NewAggregate(errs), "failed to delete unhealthy Machines")
  1029  	}
  1030  
  1031  	return ctrl.Result{}, nil
  1032  }
  1033  
  1034  func reconcileExternalTemplateReference(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, ref *corev1.ObjectReference) error {
  1035  	if !strings.HasSuffix(ref.Kind, clusterv1.TemplateSuffix) {
  1036  		return nil
  1037  	}
  1038  
  1039  	if err := utilconversion.UpdateReferenceAPIContract(ctx, c, ref); err != nil {
  1040  		return err
  1041  	}
  1042  
  1043  	obj, err := external.Get(ctx, c, ref, cluster.Namespace)
  1044  	if err != nil {
  1045  		return err
  1046  	}
  1047  
  1048  	patchHelper, err := patch.NewHelper(obj, c)
  1049  	if err != nil {
  1050  		return err
  1051  	}
  1052  
  1053  	obj.SetOwnerReferences(util.EnsureOwnerRef(obj.GetOwnerReferences(), metav1.OwnerReference{
  1054  		APIVersion: clusterv1.GroupVersion.String(),
  1055  		Kind:       "Cluster",
  1056  		Name:       cluster.Name,
  1057  		UID:        cluster.UID,
  1058  	}))
  1059  
  1060  	return patchHelper.Patch(ctx, obj)
  1061  }