sigs.k8s.io/cluster-api@v1.7.1/internal/controllers/machineset/machineset_controller.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package machineset
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"strings"
    23  	"time"
    24  
    25  	"github.com/pkg/errors"
    26  	corev1 "k8s.io/api/core/v1"
    27  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    30  	"k8s.io/apimachinery/pkg/labels"
    31  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    32  	"k8s.io/apimachinery/pkg/util/wait"
    33  	"k8s.io/apiserver/pkg/storage/names"
    34  	"k8s.io/client-go/tools/record"
    35  	"k8s.io/klog/v2"
    36  	ctrl "sigs.k8s.io/controller-runtime"
    37  	"sigs.k8s.io/controller-runtime/pkg/builder"
    38  	"sigs.k8s.io/controller-runtime/pkg/client"
    39  	"sigs.k8s.io/controller-runtime/pkg/controller"
    40  	"sigs.k8s.io/controller-runtime/pkg/handler"
    41  
    42  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    43  	"sigs.k8s.io/cluster-api/controllers/external"
    44  	"sigs.k8s.io/cluster-api/controllers/noderefutil"
    45  	"sigs.k8s.io/cluster-api/controllers/remote"
    46  	"sigs.k8s.io/cluster-api/internal/contract"
    47  	"sigs.k8s.io/cluster-api/internal/controllers/machine"
    48  	"sigs.k8s.io/cluster-api/internal/util/ssa"
    49  	"sigs.k8s.io/cluster-api/util"
    50  	"sigs.k8s.io/cluster-api/util/annotations"
    51  	"sigs.k8s.io/cluster-api/util/collections"
    52  	"sigs.k8s.io/cluster-api/util/conditions"
    53  	utilconversion "sigs.k8s.io/cluster-api/util/conversion"
    54  	"sigs.k8s.io/cluster-api/util/labels/format"
    55  	clog "sigs.k8s.io/cluster-api/util/log"
    56  	"sigs.k8s.io/cluster-api/util/patch"
    57  	"sigs.k8s.io/cluster-api/util/predicates"
    58  )
    59  
    60  var (
    61  	// machineSetKind contains the schema.GroupVersionKind for the MachineSet type.
    62  	machineSetKind = clusterv1.GroupVersion.WithKind("MachineSet")
    63  
    64  	// stateConfirmationTimeout is the amount of time allowed to wait for desired state.
    65  	stateConfirmationTimeout = 10 * time.Second
    66  
    67  	// stateConfirmationInterval is the amount of time between polling for the desired state.
    68  	// The polling is against a local memory cache.
    69  	stateConfirmationInterval = 100 * time.Millisecond
    70  )
    71  
    72  const machineSetManagerName = "capi-machineset"
    73  
    74  // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch
    75  // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch
    76  // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete
    77  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io;bootstrap.cluster.x-k8s.io,resources=*,verbs=get;list;watch;create;update;patch;delete
    78  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinesets;machinesets/status;machinesets/finalizers,verbs=get;list;watch;create;update;patch;delete
    79  
    80  // Reconciler reconciles a MachineSet object.
    81  type Reconciler struct {
    82  	Client                    client.Client
    83  	UnstructuredCachingClient client.Client
    84  	APIReader                 client.Reader
    85  	Tracker                   *remote.ClusterCacheTracker
    86  
    87  	// WatchFilterValue is the label value used to filter events prior to reconciliation.
    88  	WatchFilterValue string
    89  
    90  	ssaCache ssa.Cache
    91  	recorder record.EventRecorder
    92  }
    93  
    94  func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
    95  	clusterToMachineSets, err := util.ClusterToTypedObjectsMapper(mgr.GetClient(), &clusterv1.MachineSetList{}, mgr.GetScheme())
    96  	if err != nil {
    97  		return err
    98  	}
    99  
   100  	err = ctrl.NewControllerManagedBy(mgr).
   101  		For(&clusterv1.MachineSet{}).
   102  		Owns(&clusterv1.Machine{}).
   103  		Watches(
   104  			&clusterv1.Machine{},
   105  			handler.EnqueueRequestsFromMapFunc(r.MachineToMachineSets),
   106  		).
   107  		WithOptions(options).
   108  		WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)).
   109  		Watches(
   110  			&clusterv1.Cluster{},
   111  			handler.EnqueueRequestsFromMapFunc(clusterToMachineSets),
   112  			builder.WithPredicates(
   113  				// TODO: should this wait for Cluster.Status.InfrastructureReady similar to Infra Machine resources?
   114  				predicates.All(ctrl.LoggerFrom(ctx),
   115  					predicates.ClusterUnpaused(ctrl.LoggerFrom(ctx)),
   116  					predicates.ResourceHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue),
   117  				),
   118  			),
   119  		).Complete(r)
   120  	if err != nil {
   121  		return errors.Wrap(err, "failed setting up with a controller manager")
   122  	}
   123  
   124  	r.recorder = mgr.GetEventRecorderFor("machineset-controller")
   125  	r.ssaCache = ssa.NewCache()
   126  	return nil
   127  }
   128  
   129  func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
   130  	machineSet := &clusterv1.MachineSet{}
   131  	if err := r.Client.Get(ctx, req.NamespacedName, machineSet); err != nil {
   132  		if apierrors.IsNotFound(err) {
   133  			// Object not found, return. Created objects are automatically garbage collected.
   134  			// For additional cleanup logic use finalizers.
   135  			return ctrl.Result{}, nil
   136  		}
   137  		// Error reading the object - requeue the request.
   138  		return ctrl.Result{}, err
   139  	}
   140  
   141  	// AddOwners adds the owners of MachineSet as k/v pairs to the logger.
   142  	// Specifically, it will add MachineDeployment.
   143  	ctx, log, err := clog.AddOwners(ctx, r.Client, machineSet)
   144  	if err != nil {
   145  		return ctrl.Result{}, err
   146  	}
   147  
   148  	log = log.WithValues("Cluster", klog.KRef(machineSet.ObjectMeta.Namespace, machineSet.Spec.ClusterName))
   149  	ctx = ctrl.LoggerInto(ctx, log)
   150  
   151  	cluster, err := util.GetClusterByName(ctx, r.Client, machineSet.ObjectMeta.Namespace, machineSet.Spec.ClusterName)
   152  	if err != nil {
   153  		return ctrl.Result{}, err
   154  	}
   155  
   156  	// Return early if the object or Cluster is paused.
   157  	if annotations.IsPaused(cluster, machineSet) {
   158  		log.Info("Reconciliation is paused for this object")
   159  		return ctrl.Result{}, nil
   160  	}
   161  
   162  	// Initialize the patch helper
   163  	patchHelper, err := patch.NewHelper(machineSet, r.Client)
   164  	if err != nil {
   165  		return ctrl.Result{}, err
   166  	}
   167  
   168  	defer func() {
   169  		// Always attempt to patch the object and status after each reconciliation.
   170  		if err := patchMachineSet(ctx, patchHelper, machineSet); err != nil {
   171  			reterr = kerrors.NewAggregate([]error{reterr, err})
   172  		}
   173  	}()
   174  
   175  	// Ignore deleted MachineSets, this can happen when foregroundDeletion
   176  	// is enabled
   177  	if !machineSet.DeletionTimestamp.IsZero() {
   178  		return ctrl.Result{}, nil
   179  	}
   180  
   181  	result, err := r.reconcile(ctx, cluster, machineSet)
   182  	if err != nil {
   183  		// Requeue if the reconcile failed because the ClusterCacheTracker was locked for
   184  		// the current cluster because of concurrent access.
   185  		if errors.Is(err, remote.ErrClusterLocked) {
   186  			log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker")
   187  			return ctrl.Result{RequeueAfter: time.Minute}, nil
   188  		}
   189  		r.recorder.Eventf(machineSet, corev1.EventTypeWarning, "ReconcileError", "%v", err)
   190  	}
   191  	return result, err
   192  }
   193  
   194  func patchMachineSet(ctx context.Context, patchHelper *patch.Helper, machineSet *clusterv1.MachineSet, options ...patch.Option) error {
   195  	// Always update the readyCondition by summarizing the state of other conditions.
   196  	conditions.SetSummary(machineSet,
   197  		conditions.WithConditions(
   198  			clusterv1.MachinesCreatedCondition,
   199  			clusterv1.ResizedCondition,
   200  			clusterv1.MachinesReadyCondition,
   201  		),
   202  	)
   203  
   204  	// Patch the object, ignoring conflicts on the conditions owned by this controller.
   205  	options = append(options,
   206  		patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
   207  			clusterv1.ReadyCondition,
   208  			clusterv1.MachinesCreatedCondition,
   209  			clusterv1.ResizedCondition,
   210  			clusterv1.MachinesReadyCondition,
   211  		}},
   212  	)
   213  	return patchHelper.Patch(ctx, machineSet, options...)
   214  }
   215  
   216  func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, machineSet *clusterv1.MachineSet) (ctrl.Result, error) {
   217  	log := ctrl.LoggerFrom(ctx)
   218  
   219  	// Reconcile and retrieve the Cluster object.
   220  	if machineSet.Labels == nil {
   221  		machineSet.Labels = make(map[string]string)
   222  	}
   223  	machineSet.Labels[clusterv1.ClusterNameLabel] = machineSet.Spec.ClusterName
   224  
   225  	// If the machine set is a stand alone one, meaning not originated from a MachineDeployment, then set it as directly
   226  	// owned by the Cluster (if not already present).
   227  	if r.shouldAdopt(machineSet) {
   228  		machineSet.SetOwnerReferences(util.EnsureOwnerRef(machineSet.GetOwnerReferences(), metav1.OwnerReference{
   229  			APIVersion: clusterv1.GroupVersion.String(),
   230  			Kind:       "Cluster",
   231  			Name:       cluster.Name,
   232  			UID:        cluster.UID,
   233  		}))
   234  	}
   235  
   236  	// Make sure to reconcile the external infrastructure reference.
   237  	if err := reconcileExternalTemplateReference(ctx, r.UnstructuredCachingClient, cluster, &machineSet.Spec.Template.Spec.InfrastructureRef); err != nil {
   238  		return ctrl.Result{}, err
   239  	}
   240  	// Make sure to reconcile the external bootstrap reference, if any.
   241  	if machineSet.Spec.Template.Spec.Bootstrap.ConfigRef != nil {
   242  		if err := reconcileExternalTemplateReference(ctx, r.UnstructuredCachingClient, cluster, machineSet.Spec.Template.Spec.Bootstrap.ConfigRef); err != nil {
   243  			return ctrl.Result{}, err
   244  		}
   245  	}
   246  
   247  	// Make sure selector and template to be in the same cluster.
   248  	if machineSet.Spec.Selector.MatchLabels == nil {
   249  		machineSet.Spec.Selector.MatchLabels = make(map[string]string)
   250  	}
   251  
   252  	if machineSet.Spec.Template.Labels == nil {
   253  		machineSet.Spec.Template.Labels = make(map[string]string)
   254  	}
   255  
   256  	machineSet.Spec.Selector.MatchLabels[clusterv1.ClusterNameLabel] = machineSet.Spec.ClusterName
   257  	machineSet.Spec.Template.Labels[clusterv1.ClusterNameLabel] = machineSet.Spec.ClusterName
   258  
   259  	selectorMap, err := metav1.LabelSelectorAsMap(&machineSet.Spec.Selector)
   260  	if err != nil {
   261  		return ctrl.Result{}, errors.Wrapf(err, "failed to convert MachineSet %q label selector to a map", machineSet.Name)
   262  	}
   263  
   264  	// Get all Machines linked to this MachineSet.
   265  	allMachines := &clusterv1.MachineList{}
   266  	err = r.Client.List(ctx,
   267  		allMachines,
   268  		client.InNamespace(machineSet.Namespace),
   269  		client.MatchingLabels(selectorMap),
   270  	)
   271  	if err != nil {
   272  		return ctrl.Result{}, errors.Wrap(err, "failed to list machines")
   273  	}
   274  
   275  	// Filter out irrelevant machines (i.e. IsControlledBy something else) and claim orphaned machines.
   276  	// Machines in deleted state are deliberately not excluded https://github.com/kubernetes-sigs/cluster-api/pull/3434.
   277  	filteredMachines := make([]*clusterv1.Machine, 0, len(allMachines.Items))
   278  	for idx := range allMachines.Items {
   279  		machine := &allMachines.Items[idx]
   280  		log := log.WithValues("Machine", klog.KObj(machine))
   281  		if shouldExcludeMachine(machineSet, machine) {
   282  			continue
   283  		}
   284  
   285  		// Attempt to adopt machine if it meets previous conditions and it has no controller references.
   286  		if metav1.GetControllerOf(machine) == nil {
   287  			if err := r.adoptOrphan(ctx, machineSet, machine); err != nil {
   288  				log.Error(err, "Failed to adopt Machine")
   289  				r.recorder.Eventf(machineSet, corev1.EventTypeWarning, "FailedAdopt", "Failed to adopt Machine %q: %v", machine.Name, err)
   290  				continue
   291  			}
   292  			log.Info("Adopted Machine")
   293  			r.recorder.Eventf(machineSet, corev1.EventTypeNormal, "SuccessfulAdopt", "Adopted Machine %q", machine.Name)
   294  		}
   295  
   296  		filteredMachines = append(filteredMachines, machine)
   297  	}
   298  
   299  	result := ctrl.Result{}
   300  
   301  	reconcileUnhealthyMachinesResult, err := r.reconcileUnhealthyMachines(ctx, cluster, machineSet, filteredMachines)
   302  	if err != nil {
   303  		return ctrl.Result{}, errors.Wrap(err, "failed to reconcile unhealthy machines")
   304  	}
   305  	result = util.LowestNonZeroResult(result, reconcileUnhealthyMachinesResult)
   306  
   307  	if err := r.syncMachines(ctx, machineSet, filteredMachines); err != nil {
   308  		return ctrl.Result{}, errors.Wrap(err, "failed to update Machines")
   309  	}
   310  
   311  	syncReplicasResult, syncErr := r.syncReplicas(ctx, cluster, machineSet, filteredMachines)
   312  	result = util.LowestNonZeroResult(result, syncReplicasResult)
   313  
   314  	// Always updates status as machines come up or die.
   315  	if err := r.updateStatus(ctx, cluster, machineSet, filteredMachines); err != nil {
   316  		return ctrl.Result{}, errors.Wrapf(kerrors.NewAggregate([]error{err, syncErr}), "failed to update MachineSet's Status")
   317  	}
   318  
   319  	if syncErr != nil {
   320  		return ctrl.Result{}, errors.Wrapf(syncErr, "failed to sync MachineSet replicas")
   321  	}
   322  
   323  	var replicas int32
   324  	if machineSet.Spec.Replicas != nil {
   325  		replicas = *machineSet.Spec.Replicas
   326  	}
   327  
   328  	// Resync the MachineSet after MinReadySeconds as a last line of defense to guard against clock-skew.
   329  	// Clock-skew is an issue as it may impact whether an available replica is counted as a ready replica.
   330  	// A replica is available if the amount of time since last transition exceeds MinReadySeconds.
   331  	// If there was a clock skew, checking whether the amount of time since last transition to ready state
   332  	// exceeds MinReadySeconds could be incorrect.
   333  	// To avoid an available replica stuck in the ready state, we force a reconcile after MinReadySeconds,
   334  	// at which point it should confirm any available replica to be available.
   335  	if machineSet.Spec.MinReadySeconds > 0 &&
   336  		machineSet.Status.ReadyReplicas == replicas &&
   337  		machineSet.Status.AvailableReplicas != replicas {
   338  		minReadyResult := ctrl.Result{RequeueAfter: time.Duration(machineSet.Spec.MinReadySeconds) * time.Second}
   339  		result = util.LowestNonZeroResult(result, minReadyResult)
   340  		return result, nil
   341  	}
   342  
   343  	// Quickly reconcile until the nodes become Ready.
   344  	if machineSet.Status.ReadyReplicas != replicas {
   345  		result = util.LowestNonZeroResult(result, ctrl.Result{RequeueAfter: 15 * time.Second})
   346  		return result, nil
   347  	}
   348  
   349  	return result, nil
   350  }
   351  
   352  // syncMachines updates Machines, InfrastructureMachine and BootstrapConfig to propagate in-place mutable fields
   353  // from the MachineSet.
   354  // Note: It also cleans up managed fields of all Machines so that Machines that were
   355  // created/patched before (< v1.4.0) the controller adopted Server-Side-Apply (SSA) can also work with SSA.
   356  // Note: For InfrastructureMachines and BootstrapConfigs it also drops ownership of "metadata.labels" and
   357  // "metadata.annotations" from "manager" so that "capi-machineset" can own these fields and can work with SSA.
   358  // Otherwise fields would be co-owned by our "old" "manager" and "capi-machineset" and then we would not be
   359  // able to e.g. drop labels and annotations.
   360  func (r *Reconciler) syncMachines(ctx context.Context, machineSet *clusterv1.MachineSet, machines []*clusterv1.Machine) error {
   361  	log := ctrl.LoggerFrom(ctx)
   362  	for i := range machines {
   363  		m := machines[i]
   364  		// If the machine is already being deleted, we don't need to update it.
   365  		if !m.DeletionTimestamp.IsZero() {
   366  			continue
   367  		}
   368  
   369  		// Cleanup managed fields of all Machines.
   370  		// We do this so that Machines that were created/patched before the controller adopted Server-Side-Apply (SSA)
   371  		// (< v1.4.0) can also work with SSA. Otherwise, fields would be co-owned by our "old" "manager" and
   372  		// "capi-machineset" and then we would not be able to e.g. drop labels and annotations.
   373  		if err := ssa.CleanUpManagedFieldsForSSAAdoption(ctx, r.Client, m, machineSetManagerName); err != nil {
   374  			return errors.Wrapf(err, "failed to update machine: failed to adjust the managedFields of the Machine %q", m.Name)
   375  		}
   376  
   377  		// Update Machine to propagate in-place mutable fields from the MachineSet.
   378  		updatedMachine := r.computeDesiredMachine(machineSet, m)
   379  		err := ssa.Patch(ctx, r.Client, machineSetManagerName, updatedMachine, ssa.WithCachingProxy{Cache: r.ssaCache, Original: m})
   380  		if err != nil {
   381  			log.Error(err, "failed to update Machine", "Machine", klog.KObj(updatedMachine))
   382  			return errors.Wrapf(err, "failed to update Machine %q", klog.KObj(updatedMachine))
   383  		}
   384  		machines[i] = updatedMachine
   385  
   386  		infraMachine, err := external.Get(ctx, r.UnstructuredCachingClient, &updatedMachine.Spec.InfrastructureRef, updatedMachine.Namespace)
   387  		if err != nil {
   388  			return errors.Wrapf(err, "failed to get InfrastructureMachine %s",
   389  				klog.KRef(updatedMachine.Spec.InfrastructureRef.Namespace, updatedMachine.Spec.InfrastructureRef.Name))
   390  		}
   391  		// Cleanup managed fields of all InfrastructureMachines to drop ownership of labels and annotations
   392  		// from "manager". We do this so that InfrastructureMachines that are created using the Create method
   393  		// can also work with SSA. Otherwise, labels and annotations would be co-owned by our "old" "manager"
   394  		// and "capi-machineset" and then we would not be able to e.g. drop labels and annotations.
   395  		labelsAndAnnotationsManagedFieldPaths := []contract.Path{
   396  			{"f:metadata", "f:annotations"},
   397  			{"f:metadata", "f:labels"},
   398  		}
   399  		if err := ssa.DropManagedFields(ctx, r.Client, infraMachine, machineSetManagerName, labelsAndAnnotationsManagedFieldPaths); err != nil {
   400  			return errors.Wrapf(err, "failed to update machine: failed to adjust the managedFields of the InfrastructureMachine %s", klog.KObj(infraMachine))
   401  		}
   402  		// Update in-place mutating fields on InfrastructureMachine.
   403  		if err := r.updateExternalObject(ctx, infraMachine, machineSet); err != nil {
   404  			return errors.Wrapf(err, "failed to update InfrastructureMachine %s", klog.KObj(infraMachine))
   405  		}
   406  
   407  		if updatedMachine.Spec.Bootstrap.ConfigRef != nil {
   408  			bootstrapConfig, err := external.Get(ctx, r.UnstructuredCachingClient, updatedMachine.Spec.Bootstrap.ConfigRef, updatedMachine.Namespace)
   409  			if err != nil {
   410  				return errors.Wrapf(err, "failed to get BootstrapConfig %s",
   411  					klog.KRef(updatedMachine.Spec.Bootstrap.ConfigRef.Namespace, updatedMachine.Spec.Bootstrap.ConfigRef.Name))
   412  			}
   413  			// Cleanup managed fields of all BootstrapConfigs to drop ownership of labels and annotations
   414  			// from "manager". We do this so that BootstrapConfigs that are created using the Create method
   415  			// can also work with SSA. Otherwise, labels and annotations would be co-owned by our "old" "manager"
   416  			// and "capi-machineset" and then we would not be able to e.g. drop labels and annotations.
   417  			if err := ssa.DropManagedFields(ctx, r.Client, bootstrapConfig, machineSetManagerName, labelsAndAnnotationsManagedFieldPaths); err != nil {
   418  				return errors.Wrapf(err, "failed to update machine: failed to adjust the managedFields of the BootstrapConfig %s", klog.KObj(bootstrapConfig))
   419  			}
   420  			// Update in-place mutating fields on BootstrapConfig.
   421  			if err := r.updateExternalObject(ctx, bootstrapConfig, machineSet); err != nil {
   422  				return errors.Wrapf(err, "failed to update BootstrapConfig %s", klog.KObj(bootstrapConfig))
   423  			}
   424  		}
   425  	}
   426  	return nil
   427  }
   428  
   429  // syncReplicas scales Machine resources up or down.
   430  func (r *Reconciler) syncReplicas(ctx context.Context, cluster *clusterv1.Cluster, ms *clusterv1.MachineSet, machines []*clusterv1.Machine) (ctrl.Result, error) {
   431  	log := ctrl.LoggerFrom(ctx)
   432  	if ms.Spec.Replicas == nil {
   433  		return ctrl.Result{}, errors.Errorf("the Replicas field in Spec for machineset %v is nil, this should not be allowed", ms.Name)
   434  	}
   435  	diff := len(machines) - int(*(ms.Spec.Replicas))
   436  	switch {
   437  	case diff < 0:
   438  		diff *= -1
   439  		log.Info(fmt.Sprintf("MachineSet is scaling up to %d replicas by creating %d machines", *(ms.Spec.Replicas), diff), "replicas", *(ms.Spec.Replicas), "machineCount", len(machines))
   440  		if ms.Annotations != nil {
   441  			if _, ok := ms.Annotations[clusterv1.DisableMachineCreateAnnotation]; ok {
   442  				log.Info("Automatic creation of new machines disabled for machine set")
   443  				return ctrl.Result{}, nil
   444  			}
   445  		}
   446  
   447  		result, preflightCheckErrMessage, err := r.runPreflightChecks(ctx, cluster, ms, "Scale up")
   448  		if err != nil || !result.IsZero() {
   449  			if err != nil {
   450  				// If the error is not nil use that as the message for the condition.
   451  				preflightCheckErrMessage = err.Error()
   452  			}
   453  			conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.PreflightCheckFailedReason, clusterv1.ConditionSeverityError, preflightCheckErrMessage)
   454  			return result, err
   455  		}
   456  
   457  		var (
   458  			machineList []*clusterv1.Machine
   459  			errs        []error
   460  		)
   461  
   462  		for i := 0; i < diff; i++ {
   463  			// Create a new logger so the global logger is not modified.
   464  			log := log
   465  			machine := r.computeDesiredMachine(ms, nil)
   466  			// Clone and set the infrastructure and bootstrap references.
   467  			var (
   468  				infraRef, bootstrapRef *corev1.ObjectReference
   469  				err                    error
   470  			)
   471  
   472  			// Create the BootstrapConfig if necessary.
   473  			if ms.Spec.Template.Spec.Bootstrap.ConfigRef != nil {
   474  				bootstrapRef, err = external.CreateFromTemplate(ctx, &external.CreateFromTemplateInput{
   475  					Client:      r.UnstructuredCachingClient,
   476  					TemplateRef: ms.Spec.Template.Spec.Bootstrap.ConfigRef,
   477  					Namespace:   machine.Namespace,
   478  					Name:        machine.Name,
   479  					ClusterName: machine.Spec.ClusterName,
   480  					Labels:      machine.Labels,
   481  					Annotations: machine.Annotations,
   482  					OwnerRef: &metav1.OwnerReference{
   483  						APIVersion: clusterv1.GroupVersion.String(),
   484  						Kind:       "MachineSet",
   485  						Name:       ms.Name,
   486  						UID:        ms.UID,
   487  					},
   488  				})
   489  				if err != nil {
   490  					conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.BootstrapTemplateCloningFailedReason, clusterv1.ConditionSeverityError, err.Error())
   491  					return ctrl.Result{}, errors.Wrapf(err, "failed to clone bootstrap configuration from %s %s while creating a machine",
   492  						ms.Spec.Template.Spec.Bootstrap.ConfigRef.Kind,
   493  						klog.KRef(ms.Spec.Template.Spec.Bootstrap.ConfigRef.Namespace, ms.Spec.Template.Spec.Bootstrap.ConfigRef.Name))
   494  				}
   495  				machine.Spec.Bootstrap.ConfigRef = bootstrapRef
   496  				log = log.WithValues(bootstrapRef.Kind, klog.KRef(bootstrapRef.Namespace, bootstrapRef.Name))
   497  			}
   498  
   499  			// Create the InfraMachine.
   500  			infraRef, err = external.CreateFromTemplate(ctx, &external.CreateFromTemplateInput{
   501  				Client:      r.UnstructuredCachingClient,
   502  				TemplateRef: &ms.Spec.Template.Spec.InfrastructureRef,
   503  				Namespace:   machine.Namespace,
   504  				Name:        machine.Name,
   505  				ClusterName: machine.Spec.ClusterName,
   506  				Labels:      machine.Labels,
   507  				Annotations: machine.Annotations,
   508  				OwnerRef: &metav1.OwnerReference{
   509  					APIVersion: clusterv1.GroupVersion.String(),
   510  					Kind:       "MachineSet",
   511  					Name:       ms.Name,
   512  					UID:        ms.UID,
   513  				},
   514  			})
   515  			if err != nil {
   516  				conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.InfrastructureTemplateCloningFailedReason, clusterv1.ConditionSeverityError, err.Error())
   517  				return ctrl.Result{}, errors.Wrapf(err, "failed to clone infrastructure machine from %s %s while creating a machine",
   518  					ms.Spec.Template.Spec.InfrastructureRef.Kind,
   519  					klog.KRef(ms.Spec.Template.Spec.InfrastructureRef.Namespace, ms.Spec.Template.Spec.InfrastructureRef.Name))
   520  			}
   521  			log = log.WithValues(infraRef.Kind, klog.KRef(infraRef.Namespace, infraRef.Name))
   522  			machine.Spec.InfrastructureRef = *infraRef
   523  
   524  			// Create the Machine.
   525  			if err := ssa.Patch(ctx, r.Client, machineSetManagerName, machine); err != nil {
   526  				log.Error(err, "Error while creating a machine")
   527  				r.recorder.Eventf(ms, corev1.EventTypeWarning, "FailedCreate", "Failed to create machine: %v", err)
   528  				errs = append(errs, err)
   529  				conditions.MarkFalse(ms, clusterv1.MachinesCreatedCondition, clusterv1.MachineCreationFailedReason,
   530  					clusterv1.ConditionSeverityError, err.Error())
   531  
   532  				// Try to cleanup the external objects if the Machine creation failed.
   533  				if err := r.Client.Delete(ctx, util.ObjectReferenceToUnstructured(*infraRef)); !apierrors.IsNotFound(err) {
   534  					log.Error(err, "Failed to cleanup infrastructure machine object after Machine creation error", infraRef.Kind, klog.KRef(infraRef.Namespace, infraRef.Name))
   535  				}
   536  				if bootstrapRef != nil {
   537  					if err := r.Client.Delete(ctx, util.ObjectReferenceToUnstructured(*bootstrapRef)); !apierrors.IsNotFound(err) {
   538  						log.Error(err, "Failed to cleanup bootstrap configuration object after Machine creation error", bootstrapRef.Kind, klog.KRef(bootstrapRef.Namespace, bootstrapRef.Name))
   539  					}
   540  				}
   541  				continue
   542  			}
   543  
   544  			log.Info(fmt.Sprintf("Created machine %d of %d", i+1, diff), "Machine", klog.KObj(machine))
   545  			r.recorder.Eventf(ms, corev1.EventTypeNormal, "SuccessfulCreate", "Created machine %q", machine.Name)
   546  			machineList = append(machineList, machine)
   547  		}
   548  
   549  		if len(errs) > 0 {
   550  			return ctrl.Result{}, kerrors.NewAggregate(errs)
   551  		}
   552  		return ctrl.Result{}, r.waitForMachineCreation(ctx, machineList)
   553  	case diff > 0:
   554  		log.Info(fmt.Sprintf("MachineSet is scaling down to %d replicas by deleting %d machines", *(ms.Spec.Replicas), diff), "replicas", *(ms.Spec.Replicas), "machineCount", len(machines), "deletePolicy", ms.Spec.DeletePolicy)
   555  
   556  		deletePriorityFunc, err := getDeletePriorityFunc(ms)
   557  		if err != nil {
   558  			return ctrl.Result{}, err
   559  		}
   560  
   561  		var errs []error
   562  		machinesToDelete := getMachinesToDeletePrioritized(machines, diff, deletePriorityFunc)
   563  		for i, machine := range machinesToDelete {
   564  			log := log.WithValues("Machine", klog.KObj(machine))
   565  			if machine.GetDeletionTimestamp().IsZero() {
   566  				log.Info(fmt.Sprintf("Deleting machine %d of %d", i+1, diff))
   567  				if err := r.Client.Delete(ctx, machine); err != nil {
   568  					log.Error(err, "Unable to delete Machine")
   569  					r.recorder.Eventf(ms, corev1.EventTypeWarning, "FailedDelete", "Failed to delete machine %q: %v", machine.Name, err)
   570  					errs = append(errs, err)
   571  					continue
   572  				}
   573  				r.recorder.Eventf(ms, corev1.EventTypeNormal, "SuccessfulDelete", "Deleted machine %q", machine.Name)
   574  			} else {
   575  				log.Info(fmt.Sprintf("Waiting for machine %d of %d to be deleted", i+1, diff))
   576  			}
   577  		}
   578  
   579  		if len(errs) > 0 {
   580  			return ctrl.Result{}, kerrors.NewAggregate(errs)
   581  		}
   582  		return ctrl.Result{}, r.waitForMachineDeletion(ctx, machinesToDelete)
   583  	}
   584  
   585  	return ctrl.Result{}, nil
   586  }
   587  
   588  // computeDesiredMachine computes the desired Machine.
   589  // This Machine will be used during reconciliation to:
   590  // * create a Machine
   591  // * update an existing Machine
   592  // Because we are using Server-Side-Apply we always have to calculate the full object.
   593  // There are small differences in how we calculate the Machine depending on if it
   594  // is a create or update. Example: for a new Machine we have to calculate a new name,
   595  // while for an existing Machine we have to use the name of the existing Machine.
   596  func (r *Reconciler) computeDesiredMachine(machineSet *clusterv1.MachineSet, existingMachine *clusterv1.Machine) *clusterv1.Machine {
   597  	desiredMachine := &clusterv1.Machine{
   598  		TypeMeta: metav1.TypeMeta{
   599  			APIVersion: clusterv1.GroupVersion.String(),
   600  			Kind:       "Machine",
   601  		},
   602  		ObjectMeta: metav1.ObjectMeta{
   603  			Name:      names.SimpleNameGenerator.GenerateName(fmt.Sprintf("%s-", machineSet.Name)),
   604  			Namespace: machineSet.Namespace,
   605  			// Note: By setting the ownerRef on creation we signal to the Machine controller that this is not a stand-alone Machine.
   606  			OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(machineSet, machineSetKind)},
   607  			Labels:          map[string]string{},
   608  			Annotations:     map[string]string{},
   609  			Finalizers:      []string{clusterv1.MachineFinalizer},
   610  		},
   611  		Spec: *machineSet.Spec.Template.Spec.DeepCopy(),
   612  	}
   613  	// Set ClusterName.
   614  	desiredMachine.Spec.ClusterName = machineSet.Spec.ClusterName
   615  
   616  	// Clean up the refs to the incorrect objects.
   617  	// The InfrastructureRef and the Bootstrap.ConfigRef in Machine should point to the InfrastructureMachine
   618  	// and the BootstrapConfig objects. In the MachineSet these values point to InfrastructureMachineTemplate
   619  	// BootstrapConfigTemplate. Drop the values that were copied over from MachineSet during DeepCopy
   620  	// to make sure to not point to incorrect refs.
   621  	// Note: During Machine creation, these refs will be updated with the correct values after the corresponding
   622  	// objects are created.
   623  	desiredMachine.Spec.InfrastructureRef = corev1.ObjectReference{}
   624  	desiredMachine.Spec.Bootstrap.ConfigRef = nil
   625  
   626  	// If we are updating an existing Machine reuse the name, uid, infrastructureRef and bootstrap.configRef
   627  	// from the existingMachine.
   628  	// Note: we use UID to force SSA to update the existing Machine and to not accidentally create a new Machine.
   629  	// infrastructureRef and bootstrap.configRef remain the same for an existing Machine.
   630  	if existingMachine != nil {
   631  		desiredMachine.SetName(existingMachine.Name)
   632  		desiredMachine.SetUID(existingMachine.UID)
   633  		desiredMachine.Spec.Bootstrap.ConfigRef = existingMachine.Spec.Bootstrap.ConfigRef
   634  		desiredMachine.Spec.InfrastructureRef = existingMachine.Spec.InfrastructureRef
   635  	}
   636  
   637  	// Set the in-place mutable fields.
   638  	// When we create a new Machine we will just create the Machine with those fields.
   639  	// When we update an existing Machine will we update the fields on the existing Machine (in-place mutate).
   640  
   641  	// Set Labels
   642  	desiredMachine.Labels = machineLabelsFromMachineSet(machineSet)
   643  
   644  	// Set Annotations
   645  	desiredMachine.Annotations = machineAnnotationsFromMachineSet(machineSet)
   646  
   647  	// Set all other in-place mutable fields.
   648  	desiredMachine.Spec.NodeDrainTimeout = machineSet.Spec.Template.Spec.NodeDrainTimeout
   649  	desiredMachine.Spec.NodeDeletionTimeout = machineSet.Spec.Template.Spec.NodeDeletionTimeout
   650  	desiredMachine.Spec.NodeVolumeDetachTimeout = machineSet.Spec.Template.Spec.NodeVolumeDetachTimeout
   651  
   652  	return desiredMachine
   653  }
   654  
   655  // updateExternalObject updates the external object passed in with the
   656  // updated labels and annotations from the MachineSet.
   657  func (r *Reconciler) updateExternalObject(ctx context.Context, obj client.Object, machineSet *clusterv1.MachineSet) error {
   658  	updatedObject := &unstructured.Unstructured{}
   659  	updatedObject.SetGroupVersionKind(obj.GetObjectKind().GroupVersionKind())
   660  	updatedObject.SetNamespace(obj.GetNamespace())
   661  	updatedObject.SetName(obj.GetName())
   662  	// Set the UID to ensure that Server-Side-Apply only performs an update
   663  	// and does not perform an accidental create.
   664  	updatedObject.SetUID(obj.GetUID())
   665  
   666  	updatedObject.SetLabels(machineLabelsFromMachineSet(machineSet))
   667  	updatedObject.SetAnnotations(machineAnnotationsFromMachineSet(machineSet))
   668  
   669  	if err := ssa.Patch(ctx, r.Client, machineSetManagerName, updatedObject, ssa.WithCachingProxy{Cache: r.ssaCache, Original: obj}); err != nil {
   670  		return errors.Wrapf(err, "failed to update %s", klog.KObj(obj))
   671  	}
   672  	return nil
   673  }
   674  
   675  // machineLabelsFromMachineSet computes the labels the Machine created from this MachineSet should have.
   676  func machineLabelsFromMachineSet(machineSet *clusterv1.MachineSet) map[string]string {
   677  	machineLabels := map[string]string{}
   678  	// Note: We can't just set `machineSet.Spec.Template.Labels` directly and thus "share" the labels
   679  	// map between Machine and machineSet.Spec.Template.Labels. This would mean that adding the
   680  	// MachineSetNameLabel and MachineDeploymentNameLabel later on the Machine would also add the labels
   681  	// to machineSet.Spec.Template.Labels and thus modify the labels of the MachineSet.
   682  	for k, v := range machineSet.Spec.Template.Labels {
   683  		machineLabels[k] = v
   684  	}
   685  	// Always set the MachineSetNameLabel.
   686  	// Note: If a client tries to create a MachineSet without a selector, the MachineSet webhook
   687  	// will add this label automatically. But we want this label to always be present even if the MachineSet
   688  	// has a selector which doesn't include it. Therefore, we have to set it here explicitly.
   689  	machineLabels[clusterv1.MachineSetNameLabel] = format.MustFormatValue(machineSet.Name)
   690  	// Propagate the MachineDeploymentNameLabel from MachineSet to Machine if it exists.
   691  	if mdName, ok := machineSet.Labels[clusterv1.MachineDeploymentNameLabel]; ok {
   692  		machineLabels[clusterv1.MachineDeploymentNameLabel] = mdName
   693  	}
   694  	return machineLabels
   695  }
   696  
   697  // machineAnnotationsFromMachineSet computes the annotations the Machine created from this MachineSet should have.
   698  func machineAnnotationsFromMachineSet(machineSet *clusterv1.MachineSet) map[string]string {
   699  	annotations := map[string]string{}
   700  	for k, v := range machineSet.Spec.Template.Annotations {
   701  		annotations[k] = v
   702  	}
   703  	return annotations
   704  }
   705  
   706  // shouldExcludeMachine returns true if the machine should be filtered out, false otherwise.
   707  func shouldExcludeMachine(machineSet *clusterv1.MachineSet, machine *clusterv1.Machine) bool {
   708  	if metav1.GetControllerOf(machine) != nil && !metav1.IsControlledBy(machine, machineSet) {
   709  		return true
   710  	}
   711  
   712  	return false
   713  }
   714  
   715  // adoptOrphan sets the MachineSet as a controller OwnerReference to the Machine.
   716  func (r *Reconciler) adoptOrphan(ctx context.Context, machineSet *clusterv1.MachineSet, machine *clusterv1.Machine) error {
   717  	patch := client.MergeFrom(machine.DeepCopy())
   718  	newRef := *metav1.NewControllerRef(machineSet, machineSetKind)
   719  	machine.SetOwnerReferences(util.EnsureOwnerRef(machine.GetOwnerReferences(), newRef))
   720  	return r.Client.Patch(ctx, machine, patch)
   721  }
   722  
   723  func (r *Reconciler) waitForMachineCreation(ctx context.Context, machineList []*clusterv1.Machine) error {
   724  	log := ctrl.LoggerFrom(ctx)
   725  
   726  	for i := 0; i < len(machineList); i++ {
   727  		machine := machineList[i]
   728  		pollErr := wait.PollUntilContextTimeout(ctx, stateConfirmationInterval, stateConfirmationTimeout, true, func(ctx context.Context) (bool, error) {
   729  			key := client.ObjectKey{Namespace: machine.Namespace, Name: machine.Name}
   730  			if err := r.Client.Get(ctx, key, &clusterv1.Machine{}); err != nil {
   731  				if apierrors.IsNotFound(err) {
   732  					return false, nil
   733  				}
   734  				return false, err
   735  			}
   736  
   737  			return true, nil
   738  		})
   739  
   740  		if pollErr != nil {
   741  			log.Error(pollErr, "Failed waiting for machine object to be created")
   742  			return errors.Wrap(pollErr, "failed waiting for machine object to be created")
   743  		}
   744  	}
   745  
   746  	return nil
   747  }
   748  
   749  func (r *Reconciler) waitForMachineDeletion(ctx context.Context, machineList []*clusterv1.Machine) error {
   750  	log := ctrl.LoggerFrom(ctx)
   751  
   752  	for i := 0; i < len(machineList); i++ {
   753  		machine := machineList[i]
   754  		pollErr := wait.PollUntilContextTimeout(ctx, stateConfirmationInterval, stateConfirmationTimeout, true, func(ctx context.Context) (bool, error) {
   755  			m := &clusterv1.Machine{}
   756  			key := client.ObjectKey{Namespace: machine.Namespace, Name: machine.Name}
   757  			err := r.Client.Get(ctx, key, m)
   758  			if apierrors.IsNotFound(err) || !m.DeletionTimestamp.IsZero() {
   759  				return true, nil
   760  			}
   761  			return false, err
   762  		})
   763  
   764  		if pollErr != nil {
   765  			log.Error(pollErr, "Failed waiting for machine object to be deleted")
   766  			return errors.Wrap(pollErr, "failed waiting for machine object to be deleted")
   767  		}
   768  	}
   769  	return nil
   770  }
   771  
   772  // MachineToMachineSets is a handler.ToRequestsFunc to be used to enqueue requests for reconciliation
   773  // for MachineSets that might adopt an orphaned Machine.
   774  func (r *Reconciler) MachineToMachineSets(ctx context.Context, o client.Object) []ctrl.Request {
   775  	result := []ctrl.Request{}
   776  
   777  	m, ok := o.(*clusterv1.Machine)
   778  	if !ok {
   779  		panic(fmt.Sprintf("Expected a Machine but got a %T", o))
   780  	}
   781  
   782  	log := ctrl.LoggerFrom(ctx, "Machine", klog.KObj(m))
   783  
   784  	// Check if the controller reference is already set and
   785  	// return an empty result when one is found.
   786  	for _, ref := range m.ObjectMeta.GetOwnerReferences() {
   787  		if ref.Controller != nil && *ref.Controller {
   788  			return result
   789  		}
   790  	}
   791  
   792  	mss, err := r.getMachineSetsForMachine(ctx, m)
   793  	if err != nil {
   794  		log.Error(err, "Failed getting MachineSets for Machine")
   795  		return nil
   796  	}
   797  	if len(mss) == 0 {
   798  		return nil
   799  	}
   800  
   801  	for _, ms := range mss {
   802  		name := client.ObjectKey{Namespace: ms.Namespace, Name: ms.Name}
   803  		result = append(result, ctrl.Request{NamespacedName: name})
   804  	}
   805  
   806  	return result
   807  }
   808  
   809  func (r *Reconciler) getMachineSetsForMachine(ctx context.Context, m *clusterv1.Machine) ([]*clusterv1.MachineSet, error) {
   810  	if len(m.Labels) == 0 {
   811  		return nil, fmt.Errorf("machine %v has no labels, this is unexpected", client.ObjectKeyFromObject(m))
   812  	}
   813  
   814  	msList := &clusterv1.MachineSetList{}
   815  	if err := r.Client.List(ctx, msList, client.InNamespace(m.Namespace)); err != nil {
   816  		return nil, errors.Wrapf(err, "failed to list MachineSets")
   817  	}
   818  
   819  	var mss []*clusterv1.MachineSet
   820  	for idx := range msList.Items {
   821  		ms := &msList.Items[idx]
   822  		if machine.HasMatchingLabels(ms.Spec.Selector, m.Labels) {
   823  			mss = append(mss, ms)
   824  		}
   825  	}
   826  
   827  	return mss, nil
   828  }
   829  
   830  // shouldAdopt returns true if the MachineSet should be adopted as a stand-alone MachineSet directly owned by the Cluster.
   831  func (r *Reconciler) shouldAdopt(ms *clusterv1.MachineSet) bool {
   832  	// if the MachineSet is controlled by a MachineDeployment, or if it is a stand-alone MachinesSet directly owned by the Cluster, then no-op.
   833  	if util.HasOwner(ms.GetOwnerReferences(), clusterv1.GroupVersion.String(), []string{"MachineDeployment", "Cluster"}) {
   834  		return false
   835  	}
   836  
   837  	// If the MachineSet is originated by a MachineDeployment object, it should not be adopted directly by the Cluster as a stand-alone MachineSet.
   838  	// Note: this is required because after restore from a backup both the MachineSet controller and the
   839  	// MachineDeployment controller are racing to adopt MachineSets, see https://github.com/kubernetes-sigs/cluster-api/issues/7529
   840  	if _, ok := ms.Labels[clusterv1.MachineDeploymentNameLabel]; ok {
   841  		return false
   842  	}
   843  	return true
   844  }
   845  
   846  // updateStatus updates the Status field for the MachineSet
   847  // It checks for the current state of the replicas and updates the Status of the MachineSet.
   848  func (r *Reconciler) updateStatus(ctx context.Context, cluster *clusterv1.Cluster, ms *clusterv1.MachineSet, filteredMachines []*clusterv1.Machine) error {
   849  	log := ctrl.LoggerFrom(ctx)
   850  	newStatus := ms.Status.DeepCopy()
   851  
   852  	// Copy label selector to its status counterpart in string format.
   853  	// This is necessary for CRDs including scale subresources.
   854  	selector, err := metav1.LabelSelectorAsSelector(&ms.Spec.Selector)
   855  	if err != nil {
   856  		return errors.Wrapf(err, "failed to update status for MachineSet %s/%s", ms.Namespace, ms.Name)
   857  	}
   858  	newStatus.Selector = selector.String()
   859  
   860  	// Count the number of machines that have labels matching the labels of the machine
   861  	// template of the replica set, the matching machines may have more
   862  	// labels than are in the template. Because the label of machineTemplateSpec is
   863  	// a superset of the selector of the replica set, so the possible
   864  	// matching machines must be part of the filteredMachines.
   865  	fullyLabeledReplicasCount := 0
   866  	readyReplicasCount := 0
   867  	availableReplicasCount := 0
   868  	desiredReplicas := *ms.Spec.Replicas
   869  	templateLabel := labels.Set(ms.Spec.Template.Labels).AsSelectorPreValidated()
   870  
   871  	for _, machine := range filteredMachines {
   872  		log := log.WithValues("Machine", klog.KObj(machine))
   873  
   874  		if templateLabel.Matches(labels.Set(machine.Labels)) {
   875  			fullyLabeledReplicasCount++
   876  		}
   877  
   878  		if machine.Status.NodeRef == nil {
   879  			log.V(4).Info("Waiting for the machine controller to set status.NodeRef on the Machine")
   880  			continue
   881  		}
   882  
   883  		node, err := r.getMachineNode(ctx, cluster, machine)
   884  		if err != nil && machine.GetDeletionTimestamp().IsZero() {
   885  			log.Error(err, "Unable to retrieve Node status", "node", klog.KObj(node))
   886  			continue
   887  		}
   888  
   889  		if noderefutil.IsNodeReady(node) {
   890  			readyReplicasCount++
   891  			if noderefutil.IsNodeAvailable(node, ms.Spec.MinReadySeconds, metav1.Now()) {
   892  				availableReplicasCount++
   893  			}
   894  		} else if machine.GetDeletionTimestamp().IsZero() {
   895  			log.V(4).Info("Waiting for the Kubernetes node on the machine to report ready state")
   896  		}
   897  	}
   898  
   899  	newStatus.Replicas = int32(len(filteredMachines))
   900  	newStatus.FullyLabeledReplicas = int32(fullyLabeledReplicasCount)
   901  	newStatus.ReadyReplicas = int32(readyReplicasCount)
   902  	newStatus.AvailableReplicas = int32(availableReplicasCount)
   903  
   904  	// Copy the newly calculated status into the machineset
   905  	if ms.Status.Replicas != newStatus.Replicas ||
   906  		ms.Status.FullyLabeledReplicas != newStatus.FullyLabeledReplicas ||
   907  		ms.Status.ReadyReplicas != newStatus.ReadyReplicas ||
   908  		ms.Status.AvailableReplicas != newStatus.AvailableReplicas ||
   909  		ms.Generation != ms.Status.ObservedGeneration {
   910  		log.V(4).Info("Updating status: " +
   911  			fmt.Sprintf("replicas %d->%d (need %d), ", ms.Status.Replicas, newStatus.Replicas, desiredReplicas) +
   912  			fmt.Sprintf("fullyLabeledReplicas %d->%d, ", ms.Status.FullyLabeledReplicas, newStatus.FullyLabeledReplicas) +
   913  			fmt.Sprintf("readyReplicas %d->%d, ", ms.Status.ReadyReplicas, newStatus.ReadyReplicas) +
   914  			fmt.Sprintf("availableReplicas %d->%d, ", ms.Status.AvailableReplicas, newStatus.AvailableReplicas) +
   915  			fmt.Sprintf("observedGeneration %v->%v", ms.Status.ObservedGeneration, ms.Generation))
   916  
   917  		// Save the generation number we acted on, otherwise we might wrongfully indicate
   918  		// that we've seen a spec update when we retry.
   919  		newStatus.ObservedGeneration = ms.Generation
   920  		newStatus.DeepCopyInto(&ms.Status)
   921  	}
   922  	switch {
   923  	// We are scaling up
   924  	case newStatus.Replicas < desiredReplicas:
   925  		conditions.MarkFalse(ms, clusterv1.ResizedCondition, clusterv1.ScalingUpReason, clusterv1.ConditionSeverityWarning, "Scaling up MachineSet to %d replicas (actual %d)", desiredReplicas, newStatus.Replicas)
   926  	// We are scaling down
   927  	case newStatus.Replicas > desiredReplicas:
   928  		conditions.MarkFalse(ms, clusterv1.ResizedCondition, clusterv1.ScalingDownReason, clusterv1.ConditionSeverityWarning, "Scaling down MachineSet to %d replicas (actual %d)", desiredReplicas, newStatus.Replicas)
   929  		// This means that there was no error in generating the desired number of machine objects
   930  		conditions.MarkTrue(ms, clusterv1.MachinesCreatedCondition)
   931  	default:
   932  		// Make sure last resize operation is marked as completed.
   933  		// NOTE: we are checking the number of machines ready so we report resize completed only when the machines
   934  		// are actually provisioned (vs reporting completed immediately after the last machine object is created). This convention is also used by KCP.
   935  		if newStatus.ReadyReplicas == newStatus.Replicas {
   936  			if conditions.IsFalse(ms, clusterv1.ResizedCondition) {
   937  				log.Info("All the replicas are ready", "replicas", newStatus.ReadyReplicas)
   938  			}
   939  			conditions.MarkTrue(ms, clusterv1.ResizedCondition)
   940  		}
   941  		// This means that there was no error in generating the desired number of machine objects
   942  		conditions.MarkTrue(ms, clusterv1.MachinesCreatedCondition)
   943  	}
   944  
   945  	// Aggregate the operational state of all the machines; while aggregating we are adding the
   946  	// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
   947  	conditions.SetAggregate(ms, clusterv1.MachinesReadyCondition, collections.FromMachines(filteredMachines...).ConditionGetters(), conditions.AddSourceRef())
   948  
   949  	return nil
   950  }
   951  
   952  func (r *Reconciler) getMachineNode(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine) (*corev1.Node, error) {
   953  	remoteClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster))
   954  	if err != nil {
   955  		return nil, err
   956  	}
   957  	node := &corev1.Node{}
   958  	if err := remoteClient.Get(ctx, client.ObjectKey{Name: machine.Status.NodeRef.Name}, node); err != nil {
   959  		return nil, errors.Wrapf(err, "error retrieving node %s for machine %s/%s", machine.Status.NodeRef.Name, machine.Namespace, machine.Name)
   960  	}
   961  	return node, nil
   962  }
   963  
   964  func (r *Reconciler) reconcileUnhealthyMachines(ctx context.Context, cluster *clusterv1.Cluster, ms *clusterv1.MachineSet, filteredMachines []*clusterv1.Machine) (ctrl.Result, error) {
   965  	log := ctrl.LoggerFrom(ctx)
   966  	// List all unhealthy machines.
   967  	machinesToRemediate := make([]*clusterv1.Machine, 0, len(filteredMachines))
   968  	for _, m := range filteredMachines {
   969  		// filteredMachines contains machines in deleting status to calculate correct status.
   970  		// skip remediation for those in deleting status.
   971  		if !m.DeletionTimestamp.IsZero() {
   972  			continue
   973  		}
   974  		if conditions.IsFalse(m, clusterv1.MachineOwnerRemediatedCondition) {
   975  			machinesToRemediate = append(machinesToRemediate, m)
   976  		}
   977  	}
   978  
   979  	// If there are no machines to remediate return early.
   980  	if len(machinesToRemediate) == 0 {
   981  		return ctrl.Result{}, nil
   982  	}
   983  
   984  	preflightChecksResult, preflightCheckErrMessage, err := r.runPreflightChecks(ctx, cluster, ms, "Machine Remediation")
   985  	if err != nil {
   986  		// If err is not nil use that as the preflightCheckErrMessage
   987  		preflightCheckErrMessage = err.Error()
   988  	}
   989  
   990  	preflightChecksFailed := err != nil || !preflightChecksResult.IsZero()
   991  	if preflightChecksFailed {
   992  		// PreflightChecks did not pass. Update the MachineOwnerRemediated condition on the unhealthy Machines with
   993  		// WaitingForRemediationReason reason.
   994  		var errs []error
   995  		for _, m := range machinesToRemediate {
   996  			patchHelper, err := patch.NewHelper(m, r.Client)
   997  			if err != nil {
   998  				errs = append(errs, err)
   999  				continue
  1000  			}
  1001  			conditions.MarkFalse(m, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, preflightCheckErrMessage)
  1002  			if err := patchHelper.Patch(ctx, m); err != nil {
  1003  				errs = append(errs, err)
  1004  			}
  1005  		}
  1006  
  1007  		if len(errs) > 0 {
  1008  			return ctrl.Result{}, errors.Wrapf(kerrors.NewAggregate(errs), "failed to patch unhealthy Machines")
  1009  		}
  1010  		return preflightChecksResult, nil
  1011  	}
  1012  
  1013  	// PreflightChecks passed, so it is safe to remediate unhealthy machines.
  1014  	// Remediate unhealthy machines by deleting them.
  1015  	var errs []error
  1016  	for _, m := range machinesToRemediate {
  1017  		log.Info(fmt.Sprintf("Deleting Machine %s because it was marked as unhealthy by the MachineHealthCheck controller", klog.KObj(m)))
  1018  		patch := client.MergeFrom(m.DeepCopy())
  1019  		if err := r.Client.Delete(ctx, m); err != nil {
  1020  			errs = append(errs, errors.Wrapf(err, "failed to delete Machine %s", klog.KObj(m)))
  1021  			continue
  1022  		}
  1023  		conditions.MarkTrue(m, clusterv1.MachineOwnerRemediatedCondition)
  1024  		if err := r.Client.Status().Patch(ctx, m, patch); err != nil && !apierrors.IsNotFound(err) {
  1025  			errs = append(errs, errors.Wrapf(err, "failed to update status of Machine %s", klog.KObj(m)))
  1026  		}
  1027  	}
  1028  
  1029  	if len(errs) > 0 {
  1030  		return ctrl.Result{}, errors.Wrapf(kerrors.NewAggregate(errs), "failed to delete unhealthy Machines")
  1031  	}
  1032  
  1033  	return ctrl.Result{}, nil
  1034  }
  1035  
  1036  func reconcileExternalTemplateReference(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, ref *corev1.ObjectReference) error {
  1037  	if !strings.HasSuffix(ref.Kind, clusterv1.TemplateSuffix) {
  1038  		return nil
  1039  	}
  1040  
  1041  	if err := utilconversion.UpdateReferenceAPIContract(ctx, c, ref); err != nil {
  1042  		return err
  1043  	}
  1044  
  1045  	obj, err := external.Get(ctx, c, ref, cluster.Namespace)
  1046  	if err != nil {
  1047  		return err
  1048  	}
  1049  
  1050  	patchHelper, err := patch.NewHelper(obj, c)
  1051  	if err != nil {
  1052  		return err
  1053  	}
  1054  
  1055  	obj.SetOwnerReferences(util.EnsureOwnerRef(obj.GetOwnerReferences(), metav1.OwnerReference{
  1056  		APIVersion: clusterv1.GroupVersion.String(),
  1057  		Kind:       "Cluster",
  1058  		Name:       cluster.Name,
  1059  		UID:        cluster.UID,
  1060  	}))
  1061  
  1062  	return patchHelper.Patch(ctx, obj)
  1063  }