sigs.k8s.io/cluster-api@v1.6.3/internal/controllers/machine/machine_controller.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package machine
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	"github.com/pkg/errors"
    25  	corev1 "k8s.io/api/core/v1"
    26  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    27  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    28  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    29  	"k8s.io/apimachinery/pkg/types"
    30  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    31  	"k8s.io/apimachinery/pkg/util/wait"
    32  	"k8s.io/client-go/kubernetes"
    33  	"k8s.io/client-go/rest"
    34  	"k8s.io/client-go/tools/record"
    35  	"k8s.io/klog/v2"
    36  	kubedrain "k8s.io/kubectl/pkg/drain"
    37  	ctrl "sigs.k8s.io/controller-runtime"
    38  	"sigs.k8s.io/controller-runtime/pkg/builder"
    39  	"sigs.k8s.io/controller-runtime/pkg/client"
    40  	"sigs.k8s.io/controller-runtime/pkg/controller"
    41  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    42  	"sigs.k8s.io/controller-runtime/pkg/handler"
    43  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    44  
    45  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    46  	"sigs.k8s.io/cluster-api/api/v1beta1/index"
    47  	"sigs.k8s.io/cluster-api/controllers/external"
    48  	"sigs.k8s.io/cluster-api/controllers/noderefutil"
    49  	"sigs.k8s.io/cluster-api/controllers/remote"
    50  	"sigs.k8s.io/cluster-api/internal/util/ssa"
    51  	"sigs.k8s.io/cluster-api/util"
    52  	"sigs.k8s.io/cluster-api/util/annotations"
    53  	"sigs.k8s.io/cluster-api/util/collections"
    54  	"sigs.k8s.io/cluster-api/util/conditions"
    55  	clog "sigs.k8s.io/cluster-api/util/log"
    56  	"sigs.k8s.io/cluster-api/util/patch"
    57  	"sigs.k8s.io/cluster-api/util/predicates"
    58  )
    59  
    60  var (
    61  	errNilNodeRef                 = errors.New("noderef is nil")
    62  	errLastControlPlaneNode       = errors.New("last control plane member")
    63  	errNoControlPlaneNodes        = errors.New("no control plane members")
    64  	errClusterIsBeingDeleted      = errors.New("cluster is being deleted")
    65  	errControlPlaneIsBeingDeleted = errors.New("control plane is being deleted")
    66  )
    67  
    68  // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch
    69  // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch
    70  // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete
    71  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io;bootstrap.cluster.x-k8s.io,resources=*,verbs=get;list;watch;create;update;patch;delete
    72  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status;machines/finalizers,verbs=get;list;watch;create;update;patch;delete
    73  // +kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch
    74  
    75  // Reconciler reconciles a Machine object.
    76  type Reconciler struct {
    77  	Client                    client.Client
    78  	UnstructuredCachingClient client.Client
    79  	APIReader                 client.Reader
    80  	Tracker                   *remote.ClusterCacheTracker
    81  
    82  	// WatchFilterValue is the label value used to filter events prior to reconciliation.
    83  	WatchFilterValue string
    84  
    85  	// NodeDrainClientTimeout timeout of the client used for draining nodes.
    86  	NodeDrainClientTimeout time.Duration
    87  
    88  	controller      controller.Controller
    89  	recorder        record.EventRecorder
    90  	externalTracker external.ObjectTracker
    91  
    92  	// nodeDeletionRetryTimeout determines how long the controller will retry deleting a node
    93  	// during a single reconciliation.
    94  	nodeDeletionRetryTimeout time.Duration
    95  	ssaCache                 ssa.Cache
    96  }
    97  
    98  func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
    99  	clusterToMachines, err := util.ClusterToTypedObjectsMapper(mgr.GetClient(), &clusterv1.MachineList{}, mgr.GetScheme())
   100  	if err != nil {
   101  		return err
   102  	}
   103  
   104  	if r.nodeDeletionRetryTimeout.Nanoseconds() == 0 {
   105  		r.nodeDeletionRetryTimeout = 10 * time.Second
   106  	}
   107  
   108  	c, err := ctrl.NewControllerManagedBy(mgr).
   109  		For(&clusterv1.Machine{}).
   110  		WithOptions(options).
   111  		WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)).
   112  		Watches(
   113  			&clusterv1.Cluster{},
   114  			handler.EnqueueRequestsFromMapFunc(clusterToMachines),
   115  			builder.WithPredicates(
   116  				// TODO: should this wait for Cluster.Status.InfrastructureReady similar to Infra Machine resources?
   117  				predicates.All(ctrl.LoggerFrom(ctx),
   118  					predicates.Any(ctrl.LoggerFrom(ctx),
   119  						predicates.ClusterUnpaused(ctrl.LoggerFrom(ctx)),
   120  						predicates.ClusterControlPlaneInitialized(ctrl.LoggerFrom(ctx)),
   121  					),
   122  					predicates.ResourceHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue),
   123  				),
   124  			)).
   125  		Build(r)
   126  	if err != nil {
   127  		return errors.Wrap(err, "failed setting up with a controller manager")
   128  	}
   129  
   130  	r.controller = c
   131  	r.recorder = mgr.GetEventRecorderFor("machine-controller")
   132  	r.externalTracker = external.ObjectTracker{
   133  		Controller: c,
   134  		Cache:      mgr.GetCache(),
   135  	}
   136  	r.ssaCache = ssa.NewCache()
   137  	return nil
   138  }
   139  
   140  func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
   141  	// Fetch the Machine instance
   142  	m := &clusterv1.Machine{}
   143  	if err := r.Client.Get(ctx, req.NamespacedName, m); err != nil {
   144  		if apierrors.IsNotFound(err) {
   145  			// Object not found, return.  Created objects are automatically garbage collected.
   146  			// For additional cleanup logic use finalizers.
   147  			return ctrl.Result{}, nil
   148  		}
   149  
   150  		// Error reading the object - requeue the request.
   151  		return ctrl.Result{}, err
   152  	}
   153  
   154  	// AddOwners adds the owners of Machine as k/v pairs to the logger.
   155  	// Specifically, it will add KubeadmControlPlane, MachineSet and MachineDeployment.
   156  	ctx, log, err := clog.AddOwners(ctx, r.Client, m)
   157  	if err != nil {
   158  		return ctrl.Result{}, err
   159  	}
   160  
   161  	log = log.WithValues("Cluster", klog.KRef(m.ObjectMeta.Namespace, m.Spec.ClusterName))
   162  	ctx = ctrl.LoggerInto(ctx, log)
   163  
   164  	cluster, err := util.GetClusterByName(ctx, r.Client, m.ObjectMeta.Namespace, m.Spec.ClusterName)
   165  	if err != nil {
   166  		return ctrl.Result{}, errors.Wrapf(err, "failed to get cluster %q for machine %q in namespace %q",
   167  			m.Spec.ClusterName, m.Name, m.Namespace)
   168  	}
   169  
   170  	// Return early if the object or Cluster is paused.
   171  	if annotations.IsPaused(cluster, m) {
   172  		log.Info("Reconciliation is paused for this object")
   173  		return ctrl.Result{}, nil
   174  	}
   175  
   176  	// Initialize the patch helper
   177  	patchHelper, err := patch.NewHelper(m, r.Client)
   178  	if err != nil {
   179  		return ctrl.Result{}, err
   180  	}
   181  
   182  	defer func() {
   183  		r.reconcilePhase(ctx, m)
   184  
   185  		// Always attempt to patch the object and status after each reconciliation.
   186  		// Patch ObservedGeneration only if the reconciliation completed successfully
   187  		patchOpts := []patch.Option{}
   188  		if reterr == nil {
   189  			patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{})
   190  		}
   191  		if err := patchMachine(ctx, patchHelper, m, patchOpts...); err != nil {
   192  			reterr = kerrors.NewAggregate([]error{reterr, err})
   193  		}
   194  	}()
   195  
   196  	// Reconcile labels.
   197  	if m.Labels == nil {
   198  		m.Labels = make(map[string]string)
   199  	}
   200  	m.Labels[clusterv1.ClusterNameLabel] = m.Spec.ClusterName
   201  
   202  	// Handle deletion reconciliation loop.
   203  	if !m.ObjectMeta.DeletionTimestamp.IsZero() {
   204  		res, err := r.reconcileDelete(ctx, cluster, m)
   205  		// Requeue if the reconcile failed because the ClusterCacheTracker was locked for
   206  		// the current cluster because of concurrent access.
   207  		if errors.Is(err, remote.ErrClusterLocked) {
   208  			log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker")
   209  			return ctrl.Result{Requeue: true}, nil
   210  		}
   211  		return res, err
   212  	}
   213  
   214  	// Add finalizer first if not set to avoid the race condition between init and delete.
   215  	// Note: Finalizers in general can only be added when the deletionTimestamp is not set.
   216  	if !controllerutil.ContainsFinalizer(m, clusterv1.MachineFinalizer) {
   217  		controllerutil.AddFinalizer(m, clusterv1.MachineFinalizer)
   218  		return ctrl.Result{}, nil
   219  	}
   220  
   221  	// Handle normal reconciliation loop.
   222  	res, err := r.reconcile(ctx, cluster, m)
   223  	// Requeue if the reconcile failed because the ClusterCacheTracker was locked for
   224  	// the current cluster because of concurrent access.
   225  	if errors.Is(err, remote.ErrClusterLocked) {
   226  		log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker")
   227  		return ctrl.Result{Requeue: true}, nil
   228  	}
   229  	return res, err
   230  }
   231  
   232  func patchMachine(ctx context.Context, patchHelper *patch.Helper, machine *clusterv1.Machine, options ...patch.Option) error {
   233  	// Always update the readyCondition by summarizing the state of other conditions.
   234  	// A step counter is added to represent progress during the provisioning process (instead we are hiding it
   235  	// after provisioning - e.g. when a MHC condition exists - or during the deletion process).
   236  	conditions.SetSummary(machine,
   237  		conditions.WithConditions(
   238  			// Infrastructure problems should take precedence over all the other conditions
   239  			clusterv1.InfrastructureReadyCondition,
   240  			// Bootstrap comes after, but it is relevant only during initial machine provisioning.
   241  			clusterv1.BootstrapReadyCondition,
   242  			// MHC reported condition should take precedence over the remediation progress
   243  			clusterv1.MachineHealthCheckSucceededCondition,
   244  			clusterv1.MachineOwnerRemediatedCondition,
   245  		),
   246  		conditions.WithStepCounterIf(machine.ObjectMeta.DeletionTimestamp.IsZero() && machine.Spec.ProviderID == nil),
   247  		conditions.WithStepCounterIfOnly(
   248  			clusterv1.BootstrapReadyCondition,
   249  			clusterv1.InfrastructureReadyCondition,
   250  		),
   251  	)
   252  
   253  	// Patch the object, ignoring conflicts on the conditions owned by this controller.
   254  	// Also, if requested, we are adding additional options like e.g. Patch ObservedGeneration when issuing the
   255  	// patch at the end of the reconcile loop.
   256  	options = append(options,
   257  		patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
   258  			clusterv1.ReadyCondition,
   259  			clusterv1.BootstrapReadyCondition,
   260  			clusterv1.InfrastructureReadyCondition,
   261  			clusterv1.DrainingSucceededCondition,
   262  			clusterv1.MachineHealthCheckSucceededCondition,
   263  			clusterv1.MachineOwnerRemediatedCondition,
   264  		}},
   265  	)
   266  
   267  	return patchHelper.Patch(ctx, machine, options...)
   268  }
   269  
   270  func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (ctrl.Result, error) {
   271  	// If the machine is a stand-alone one, meaning not originated from a MachineDeployment, then set it as directly
   272  	// owned by the Cluster (if not already present).
   273  	if r.shouldAdopt(m) {
   274  		m.SetOwnerReferences(util.EnsureOwnerRef(m.GetOwnerReferences(), metav1.OwnerReference{
   275  			APIVersion: clusterv1.GroupVersion.String(),
   276  			Kind:       "Cluster",
   277  			Name:       cluster.Name,
   278  			UID:        cluster.UID,
   279  		}))
   280  	}
   281  
   282  	phases := []func(context.Context, *scope) (ctrl.Result, error){
   283  		r.reconcileBootstrap,
   284  		r.reconcileInfrastructure,
   285  		r.reconcileNode,
   286  		r.reconcileCertificateExpiry,
   287  	}
   288  
   289  	res := ctrl.Result{}
   290  	errs := []error{}
   291  	s := &scope{
   292  		cluster: cluster,
   293  		machine: m,
   294  	}
   295  	for _, phase := range phases {
   296  		// Call the inner reconciliation methods.
   297  		phaseResult, err := phase(ctx, s)
   298  		if err != nil {
   299  			errs = append(errs, err)
   300  		}
   301  		if len(errs) > 0 {
   302  			continue
   303  		}
   304  		res = util.LowestNonZeroResult(res, phaseResult)
   305  	}
   306  	return res, kerrors.NewAggregate(errs)
   307  }
   308  
   309  // scope holds the different objects that are read and used during the reconcile.
   310  type scope struct {
   311  	// cluster is the Cluster object the Machine belongs to.
   312  	// It is set at the beginning of the reconcile function.
   313  	cluster *clusterv1.Cluster
   314  
   315  	// machine is the Machine object. It is set at the beginning
   316  	// of the reconcile function.
   317  	machine *clusterv1.Machine
   318  
   319  	// infraMachine is the Infrastructure Machine object that is referenced by the
   320  	// Machine. It is set after reconcileInfrastructure is called.
   321  	infraMachine *unstructured.Unstructured
   322  
   323  	// bootstrapConfig is the BootstrapConfig object that is referenced by the
   324  	// Machine. It is set after reconcileBootstrap is called.
   325  	bootstrapConfig *unstructured.Unstructured
   326  }
   327  
   328  func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (ctrl.Result, error) { //nolint:gocyclo
   329  	log := ctrl.LoggerFrom(ctx)
   330  
   331  	err := r.isDeleteNodeAllowed(ctx, cluster, m)
   332  	isDeleteNodeAllowed := err == nil
   333  	if err != nil {
   334  		switch err {
   335  		case errNoControlPlaneNodes, errLastControlPlaneNode, errNilNodeRef, errClusterIsBeingDeleted, errControlPlaneIsBeingDeleted:
   336  			nodeName := ""
   337  			if m.Status.NodeRef != nil {
   338  				nodeName = m.Status.NodeRef.Name
   339  			}
   340  			log.Info("Deleting Kubernetes Node associated with Machine is not allowed", "Node", klog.KRef("", nodeName), "cause", err.Error())
   341  		default:
   342  			return ctrl.Result{}, errors.Wrapf(err, "failed to check if Kubernetes Node deletion is allowed")
   343  		}
   344  	}
   345  
   346  	if isDeleteNodeAllowed {
   347  		// pre-drain.delete lifecycle hook
   348  		// Return early without error, will requeue if/when the hook owner removes the annotation.
   349  		if annotations.HasWithPrefix(clusterv1.PreDrainDeleteHookAnnotationPrefix, m.ObjectMeta.Annotations) {
   350  			conditions.MarkFalse(m, clusterv1.PreDrainDeleteHookSucceededCondition, clusterv1.WaitingExternalHookReason, clusterv1.ConditionSeverityInfo, "")
   351  			return ctrl.Result{}, nil
   352  		}
   353  		conditions.MarkTrue(m, clusterv1.PreDrainDeleteHookSucceededCondition)
   354  
   355  		// Drain node before deletion and issue a patch in order to make this operation visible to the users.
   356  		if r.isNodeDrainAllowed(m) {
   357  			patchHelper, err := patch.NewHelper(m, r.Client)
   358  			if err != nil {
   359  				return ctrl.Result{}, err
   360  			}
   361  
   362  			log.Info("Draining node", "Node", klog.KRef("", m.Status.NodeRef.Name))
   363  			// The DrainingSucceededCondition never exists before the node is drained for the first time,
   364  			// so its transition time can be used to record the first time draining.
   365  			// This `if` condition prevents the transition time to be changed more than once.
   366  			if conditions.Get(m, clusterv1.DrainingSucceededCondition) == nil {
   367  				conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingReason, clusterv1.ConditionSeverityInfo, "Draining the node before deletion")
   368  			}
   369  
   370  			if err := patchMachine(ctx, patchHelper, m); err != nil {
   371  				return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine")
   372  			}
   373  
   374  			if result, err := r.drainNode(ctx, cluster, m.Status.NodeRef.Name); !result.IsZero() || err != nil {
   375  				if err != nil {
   376  					conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingFailedReason, clusterv1.ConditionSeverityWarning, err.Error())
   377  					r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDrainNode", "error draining Machine's node %q: %v", m.Status.NodeRef.Name, err)
   378  				}
   379  				return result, err
   380  			}
   381  
   382  			conditions.MarkTrue(m, clusterv1.DrainingSucceededCondition)
   383  			r.recorder.Eventf(m, corev1.EventTypeNormal, "SuccessfulDrainNode", "success draining Machine's node %q", m.Status.NodeRef.Name)
   384  		}
   385  
   386  		// After node draining is completed, and if isNodeVolumeDetachingAllowed returns True, make sure all
   387  		// volumes are detached before proceeding to delete the Node.
   388  		if r.isNodeVolumeDetachingAllowed(m) {
   389  			// The VolumeDetachSucceededCondition never exists before we wait for volume detachment for the first time,
   390  			// so its transition time can be used to record the first time we wait for volume detachment.
   391  			// This `if` condition prevents the transition time to be changed more than once.
   392  			if conditions.Get(m, clusterv1.VolumeDetachSucceededCondition) == nil {
   393  				conditions.MarkFalse(m, clusterv1.VolumeDetachSucceededCondition, clusterv1.WaitingForVolumeDetachReason, clusterv1.ConditionSeverityInfo, "Waiting for node volumes to be detached")
   394  			}
   395  
   396  			if ok, err := r.shouldWaitForNodeVolumes(ctx, cluster, m.Status.NodeRef.Name); ok || err != nil {
   397  				if err != nil {
   398  					r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedWaitForVolumeDetach", "error waiting for node volumes detaching, Machine's node %q: %v", m.Status.NodeRef.Name, err)
   399  					return ctrl.Result{}, err
   400  				}
   401  				log.Info("Waiting for node volumes to be detached", "Node", klog.KRef("", m.Status.NodeRef.Name))
   402  				return ctrl.Result{}, nil
   403  			}
   404  			conditions.MarkTrue(m, clusterv1.VolumeDetachSucceededCondition)
   405  			r.recorder.Eventf(m, corev1.EventTypeNormal, "NodeVolumesDetached", "success waiting for node volumes detaching Machine's node %q", m.Status.NodeRef.Name)
   406  		}
   407  	}
   408  
   409  	// pre-term.delete lifecycle hook
   410  	// Return early without error, will requeue if/when the hook owner removes the annotation.
   411  	if annotations.HasWithPrefix(clusterv1.PreTerminateDeleteHookAnnotationPrefix, m.ObjectMeta.Annotations) {
   412  		conditions.MarkFalse(m, clusterv1.PreTerminateDeleteHookSucceededCondition, clusterv1.WaitingExternalHookReason, clusterv1.ConditionSeverityInfo, "")
   413  		return ctrl.Result{}, nil
   414  	}
   415  	conditions.MarkTrue(m, clusterv1.PreTerminateDeleteHookSucceededCondition)
   416  
   417  	// Return early and don't remove the finalizer if we got an error or
   418  	// the external reconciliation deletion isn't ready.
   419  
   420  	patchHelper, err := patch.NewHelper(m, r.Client)
   421  	if err != nil {
   422  		return ctrl.Result{}, err
   423  	}
   424  	conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "")
   425  	if err := patchMachine(ctx, patchHelper, m); err != nil {
   426  		conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityInfo, "")
   427  		return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine")
   428  	}
   429  
   430  	infrastructureDeleted, err := r.reconcileDeleteInfrastructure(ctx, cluster, m)
   431  	if err != nil {
   432  		return ctrl.Result{}, err
   433  	}
   434  	if !infrastructureDeleted {
   435  		log.Info("Waiting for infrastructure to be deleted", m.Spec.InfrastructureRef.Kind, klog.KRef(m.Spec.InfrastructureRef.Namespace, m.Spec.InfrastructureRef.Name))
   436  		return ctrl.Result{}, nil
   437  	}
   438  
   439  	bootstrapDeleted, err := r.reconcileDeleteBootstrap(ctx, cluster, m)
   440  	if err != nil {
   441  		return ctrl.Result{}, err
   442  	}
   443  	if !bootstrapDeleted {
   444  		log.Info("Waiting for bootstrap to be deleted", m.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(m.Spec.Bootstrap.ConfigRef.Namespace, m.Spec.Bootstrap.ConfigRef.Name))
   445  		return ctrl.Result{}, nil
   446  	}
   447  
   448  	// We only delete the node after the underlying infrastructure is gone.
   449  	// https://github.com/kubernetes-sigs/cluster-api/issues/2565
   450  	if isDeleteNodeAllowed {
   451  		log.Info("Deleting node", "Node", klog.KRef("", m.Status.NodeRef.Name))
   452  
   453  		var deleteNodeErr error
   454  		waitErr := wait.PollUntilContextTimeout(ctx, 2*time.Second, r.nodeDeletionRetryTimeout, true, func(ctx context.Context) (bool, error) {
   455  			if deleteNodeErr = r.deleteNode(ctx, cluster, m.Status.NodeRef.Name); deleteNodeErr != nil && !apierrors.IsNotFound(errors.Cause(deleteNodeErr)) {
   456  				return false, nil
   457  			}
   458  			return true, nil
   459  		})
   460  		if waitErr != nil {
   461  			log.Error(deleteNodeErr, "Timed out deleting node", "Node", klog.KRef("", m.Status.NodeRef.Name))
   462  			conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityWarning, "")
   463  			r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDeleteNode", "error deleting Machine's node: %v", deleteNodeErr)
   464  
   465  			// If the node deletion timeout is not expired yet, requeue the Machine for reconciliation.
   466  			if m.Spec.NodeDeletionTimeout == nil || m.Spec.NodeDeletionTimeout.Nanoseconds() == 0 || m.DeletionTimestamp.Add(m.Spec.NodeDeletionTimeout.Duration).After(time.Now()) {
   467  				return ctrl.Result{}, deleteNodeErr
   468  			}
   469  			log.Info("Node deletion timeout expired, continuing without Node deletion.")
   470  		}
   471  	}
   472  
   473  	controllerutil.RemoveFinalizer(m, clusterv1.MachineFinalizer)
   474  	return ctrl.Result{}, nil
   475  }
   476  
   477  func (r *Reconciler) isNodeDrainAllowed(m *clusterv1.Machine) bool {
   478  	if _, exists := m.ObjectMeta.Annotations[clusterv1.ExcludeNodeDrainingAnnotation]; exists {
   479  		return false
   480  	}
   481  
   482  	if r.nodeDrainTimeoutExceeded(m) {
   483  		return false
   484  	}
   485  
   486  	return true
   487  }
   488  
   489  // isNodeVolumeDetachingAllowed returns False if either ExcludeWaitForNodeVolumeDetachAnnotation annotation is set OR
   490  // nodeVolumeDetachTimeoutExceeded timeout is exceeded, otherwise returns True.
   491  func (r *Reconciler) isNodeVolumeDetachingAllowed(m *clusterv1.Machine) bool {
   492  	if _, exists := m.ObjectMeta.Annotations[clusterv1.ExcludeWaitForNodeVolumeDetachAnnotation]; exists {
   493  		return false
   494  	}
   495  
   496  	if r.nodeVolumeDetachTimeoutExceeded(m) {
   497  		return false
   498  	}
   499  
   500  	return true
   501  }
   502  
   503  func (r *Reconciler) nodeDrainTimeoutExceeded(machine *clusterv1.Machine) bool {
   504  	// if the NodeDrainTimeout type is not set by user
   505  	if machine.Spec.NodeDrainTimeout == nil || machine.Spec.NodeDrainTimeout.Seconds() <= 0 {
   506  		return false
   507  	}
   508  
   509  	// if the draining succeeded condition does not exist
   510  	if conditions.Get(machine, clusterv1.DrainingSucceededCondition) == nil {
   511  		return false
   512  	}
   513  
   514  	now := time.Now()
   515  	firstTimeDrain := conditions.GetLastTransitionTime(machine, clusterv1.DrainingSucceededCondition)
   516  	diff := now.Sub(firstTimeDrain.Time)
   517  	return diff.Seconds() >= machine.Spec.NodeDrainTimeout.Seconds()
   518  }
   519  
   520  // nodeVolumeDetachTimeoutExceeded returns False if either NodeVolumeDetachTimeout is set to nil or <=0 OR
   521  // VolumeDetachSucceededCondition is not set on the Machine. Otherwise returns true if the timeout is expired
   522  // since the last transition time of VolumeDetachSucceededCondition.
   523  func (r *Reconciler) nodeVolumeDetachTimeoutExceeded(machine *clusterv1.Machine) bool {
   524  	// if the NodeVolumeDetachTimeout type is not set by user
   525  	if machine.Spec.NodeVolumeDetachTimeout == nil || machine.Spec.NodeVolumeDetachTimeout.Seconds() <= 0 {
   526  		return false
   527  	}
   528  
   529  	// if the volume detaching succeeded condition does not exist
   530  	if conditions.Get(machine, clusterv1.VolumeDetachSucceededCondition) == nil {
   531  		return false
   532  	}
   533  
   534  	now := time.Now()
   535  	firstTimeDetach := conditions.GetLastTransitionTime(machine, clusterv1.VolumeDetachSucceededCondition)
   536  	diff := now.Sub(firstTimeDetach.Time)
   537  	return diff.Seconds() >= machine.Spec.NodeVolumeDetachTimeout.Seconds()
   538  }
   539  
   540  // isDeleteNodeAllowed returns nil only if the Machine's NodeRef is not nil
   541  // and if the Machine is not the last control plane node in the cluster.
   542  func (r *Reconciler) isDeleteNodeAllowed(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine) error {
   543  	log := ctrl.LoggerFrom(ctx)
   544  	// Return early if the cluster is being deleted.
   545  	if !cluster.DeletionTimestamp.IsZero() {
   546  		return errClusterIsBeingDeleted
   547  	}
   548  
   549  	// Cannot delete something that doesn't exist.
   550  	if machine.Status.NodeRef == nil {
   551  		return errNilNodeRef
   552  	}
   553  
   554  	// controlPlaneRef is an optional field in the Cluster so skip the external
   555  	// managed control plane check if it is nil
   556  	if cluster.Spec.ControlPlaneRef != nil {
   557  		controlPlane, err := external.Get(ctx, r.Client, cluster.Spec.ControlPlaneRef, cluster.Spec.ControlPlaneRef.Namespace)
   558  		if apierrors.IsNotFound(err) {
   559  			// If control plane object in the reference does not exist, log and skip check for
   560  			// external managed control plane
   561  			log.Error(err, "control plane object specified in cluster spec.controlPlaneRef does not exist", "kind", cluster.Spec.ControlPlaneRef.Kind, "name", cluster.Spec.ControlPlaneRef.Name)
   562  		} else {
   563  			if err != nil {
   564  				// If any other error occurs when trying to get the control plane object,
   565  				// return the error so we can retry
   566  				return err
   567  			}
   568  
   569  			// Return early if the object referenced by controlPlaneRef is being deleted.
   570  			if !controlPlane.GetDeletionTimestamp().IsZero() {
   571  				return errControlPlaneIsBeingDeleted
   572  			}
   573  
   574  			// Check if the ControlPlane is externally managed (AKS, EKS, GKE, etc)
   575  			// and skip the following section if control plane is externally managed
   576  			// because there will be no control plane nodes registered
   577  			if util.IsExternalManagedControlPlane(controlPlane) {
   578  				return nil
   579  			}
   580  		}
   581  	}
   582  
   583  	// Get all of the active machines that belong to this cluster.
   584  	machines, err := collections.GetFilteredMachinesForCluster(ctx, r.Client, cluster, collections.ActiveMachines)
   585  	if err != nil {
   586  		return err
   587  	}
   588  
   589  	// Whether or not it is okay to delete the NodeRef depends on the
   590  	// number of remaining control plane members and whether or not this
   591  	// machine is one of them.
   592  	numControlPlaneMachines := len(machines.Filter(collections.ControlPlaneMachines(cluster.Name)))
   593  	if numControlPlaneMachines == 0 {
   594  		// Do not delete the NodeRef if there are no remaining members of
   595  		// the control plane.
   596  		return errNoControlPlaneNodes
   597  	}
   598  	// Otherwise it is okay to delete the NodeRef.
   599  	return nil
   600  }
   601  
   602  func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster, nodeName string) (ctrl.Result, error) {
   603  	log := ctrl.LoggerFrom(ctx, "Node", klog.KRef("", nodeName))
   604  
   605  	restConfig, err := r.Tracker.GetRESTConfig(ctx, util.ObjectKey(cluster))
   606  	if err != nil {
   607  		if errors.Is(err, remote.ErrClusterLocked) {
   608  			log.V(5).Info("Requeuing drain Node because another worker has the lock on the ClusterCacheTracker")
   609  			return ctrl.Result{Requeue: true}, nil
   610  		}
   611  		log.Error(err, "Error creating a remote client for cluster while draining Node, won't retry")
   612  		return ctrl.Result{}, nil
   613  	}
   614  	restConfig = rest.CopyConfig(restConfig)
   615  	restConfig.Timeout = r.NodeDrainClientTimeout
   616  	kubeClient, err := kubernetes.NewForConfig(restConfig)
   617  	if err != nil {
   618  		log.Error(err, "Error creating a remote client while deleting Machine, won't retry")
   619  		return ctrl.Result{}, nil
   620  	}
   621  
   622  	node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
   623  	if err != nil {
   624  		if apierrors.IsNotFound(err) {
   625  			// If an admin deletes the node directly, we'll end up here.
   626  			log.Error(err, "Could not find node from noderef, it may have already been deleted")
   627  			return ctrl.Result{}, nil
   628  		}
   629  		return ctrl.Result{}, errors.Wrapf(err, "unable to get node %v", nodeName)
   630  	}
   631  
   632  	drainer := &kubedrain.Helper{
   633  		Client:              kubeClient,
   634  		Ctx:                 ctx,
   635  		Force:               true,
   636  		IgnoreAllDaemonSets: true,
   637  		DeleteEmptyDirData:  true,
   638  		GracePeriodSeconds:  -1,
   639  		// If a pod is not evicted in 20 seconds, retry the eviction next time the
   640  		// machine gets reconciled again (to allow other machines to be reconciled).
   641  		Timeout: 20 * time.Second,
   642  		OnPodDeletedOrEvicted: func(pod *corev1.Pod, usingEviction bool) {
   643  			verbStr := "Deleted"
   644  			if usingEviction {
   645  				verbStr = "Evicted"
   646  			}
   647  			log.Info(fmt.Sprintf("%s pod from Node", verbStr),
   648  				"Pod", klog.KObj(pod))
   649  		},
   650  		Out: writer{log.Info},
   651  		ErrOut: writer{func(msg string, keysAndValues ...interface{}) {
   652  			log.Error(nil, msg, keysAndValues...)
   653  		}},
   654  	}
   655  
   656  	if noderefutil.IsNodeUnreachable(node) {
   657  		// When the node is unreachable and some pods are not evicted for as long as this timeout, we ignore them.
   658  		drainer.SkipWaitForDeleteTimeoutSeconds = 60 * 5 // 5 minutes
   659  	}
   660  
   661  	if err := kubedrain.RunCordonOrUncordon(drainer, node, true); err != nil {
   662  		// Machine will be re-reconciled after a cordon failure.
   663  		log.Error(err, "Cordon failed")
   664  		return ctrl.Result{}, errors.Wrapf(err, "unable to cordon node %v", node.Name)
   665  	}
   666  
   667  	if err := kubedrain.RunNodeDrain(drainer, node.Name); err != nil {
   668  		// Machine will be re-reconciled after a drain failure.
   669  		log.Error(err, "Drain failed, retry in 20s")
   670  		return ctrl.Result{RequeueAfter: 20 * time.Second}, nil
   671  	}
   672  
   673  	log.Info("Drain successful")
   674  	return ctrl.Result{}, nil
   675  }
   676  
   677  // shouldWaitForNodeVolumes returns true if node status still have volumes attached
   678  // pod deletion and volume detach happen asynchronously, so pod could be deleted before volume detached from the node
   679  // this could cause issue for some storage provisioner, for example, vsphere-volume this is problematic
   680  // because if the node is deleted before detach success, then the underline VMDK will be deleted together with the Machine
   681  // so after node draining we need to check if all volumes are detached before deleting the node.
   682  func (r *Reconciler) shouldWaitForNodeVolumes(ctx context.Context, cluster *clusterv1.Cluster, nodeName string) (bool, error) {
   683  	log := ctrl.LoggerFrom(ctx, "Node", klog.KRef("", nodeName))
   684  
   685  	remoteClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster))
   686  	if err != nil {
   687  		return true, err
   688  	}
   689  
   690  	node := &corev1.Node{}
   691  	if err := remoteClient.Get(ctx, types.NamespacedName{Name: nodeName}, node); err != nil {
   692  		if apierrors.IsNotFound(err) {
   693  			log.Error(err, "Could not find node from noderef, it may have already been deleted")
   694  			return false, nil
   695  		}
   696  		return true, err
   697  	}
   698  
   699  	return len(node.Status.VolumesAttached) != 0, nil
   700  }
   701  
   702  func (r *Reconciler) deleteNode(ctx context.Context, cluster *clusterv1.Cluster, name string) error {
   703  	log := ctrl.LoggerFrom(ctx)
   704  
   705  	remoteClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster))
   706  	if err != nil {
   707  		if errors.Is(err, remote.ErrClusterLocked) {
   708  			return errors.Wrapf(err, "failed deleting Node because another worker has the lock on the ClusterCacheTracker")
   709  		}
   710  		log.Error(err, "Error creating a remote client for cluster while deleting Node, won't retry")
   711  		return nil
   712  	}
   713  
   714  	node := &corev1.Node{
   715  		ObjectMeta: metav1.ObjectMeta{
   716  			Name: name,
   717  		},
   718  	}
   719  
   720  	if err := remoteClient.Delete(ctx, node); err != nil {
   721  		return errors.Wrapf(err, "error deleting node %s", name)
   722  	}
   723  	return nil
   724  }
   725  
   726  func (r *Reconciler) reconcileDeleteBootstrap(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (bool, error) {
   727  	obj, err := r.reconcileDeleteExternal(ctx, cluster, m, m.Spec.Bootstrap.ConfigRef)
   728  	if err != nil {
   729  		return false, err
   730  	}
   731  
   732  	if obj == nil {
   733  		// Marks the bootstrap as deleted
   734  		conditions.MarkFalse(m, clusterv1.BootstrapReadyCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "")
   735  		return true, nil
   736  	}
   737  
   738  	// Report a summary of current status of the bootstrap object defined for this machine.
   739  	conditions.SetMirror(m, clusterv1.BootstrapReadyCondition,
   740  		conditions.UnstructuredGetter(obj),
   741  		conditions.WithFallbackValue(false, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, ""),
   742  	)
   743  	return false, nil
   744  }
   745  
   746  func (r *Reconciler) reconcileDeleteInfrastructure(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (bool, error) {
   747  	obj, err := r.reconcileDeleteExternal(ctx, cluster, m, &m.Spec.InfrastructureRef)
   748  	if err != nil {
   749  		return false, err
   750  	}
   751  
   752  	if obj == nil {
   753  		// Marks the infrastructure as deleted
   754  		conditions.MarkFalse(m, clusterv1.InfrastructureReadyCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "")
   755  		return true, nil
   756  	}
   757  
   758  	// Report a summary of current status of the bootstrap object defined for this machine.
   759  	conditions.SetMirror(m, clusterv1.InfrastructureReadyCondition,
   760  		conditions.UnstructuredGetter(obj),
   761  		conditions.WithFallbackValue(false, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, ""),
   762  	)
   763  	return false, nil
   764  }
   765  
   766  // reconcileDeleteExternal tries to delete external references.
   767  func (r *Reconciler) reconcileDeleteExternal(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine, ref *corev1.ObjectReference) (*unstructured.Unstructured, error) {
   768  	if ref == nil {
   769  		return nil, nil
   770  	}
   771  
   772  	// get the external object
   773  	obj, err := external.Get(ctx, r.UnstructuredCachingClient, ref, m.Namespace)
   774  	if err != nil && !apierrors.IsNotFound(errors.Cause(err)) {
   775  		return nil, errors.Wrapf(err, "failed to get %s %q for Machine %q in namespace %q",
   776  			ref.GroupVersionKind(), ref.Name, m.Name, m.Namespace)
   777  	}
   778  
   779  	if obj != nil {
   780  		// reconcileExternal ensures that we set the object's OwnerReferences correctly and watch the object.
   781  		// The machine delete logic depends on reconciling the machine when the external objects are deleted.
   782  		// This avoids a race condition where the machine is deleted before the external objects are ever reconciled
   783  		// by this controller.
   784  		if _, err := r.ensureExternalOwnershipAndWatch(ctx, cluster, m, ref); err != nil {
   785  			return nil, err
   786  		}
   787  
   788  		// Issue a delete request.
   789  		if err := r.Client.Delete(ctx, obj); err != nil && !apierrors.IsNotFound(err) {
   790  			return obj, errors.Wrapf(err,
   791  				"failed to delete %v %q for Machine %q in namespace %q",
   792  				obj.GroupVersionKind(), obj.GetName(), m.Name, m.Namespace)
   793  		}
   794  	}
   795  
   796  	// Return true if there are no more external objects.
   797  	return obj, nil
   798  }
   799  
   800  // shouldAdopt returns true if the Machine should be adopted as a stand-alone Machine directly owned by the Cluster.
   801  func (r *Reconciler) shouldAdopt(m *clusterv1.Machine) bool {
   802  	// if the machine is controlled by something (MS or KCP), or if it is a stand-alone machine directly owned by the Cluster, then no-op.
   803  	if metav1.GetControllerOf(m) != nil || util.HasOwner(m.GetOwnerReferences(), clusterv1.GroupVersion.String(), []string{"Cluster"}) {
   804  		return false
   805  	}
   806  
   807  	// Note: following checks are required because after restore from a backup both the Machine controller and the
   808  	// MachineSet, MachinePool, or ControlPlane controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529
   809  
   810  	// If the Machine is originated by a MachineSet, it should not be adopted directly by the Cluster as a stand-alone Machine.
   811  	if _, ok := m.Labels[clusterv1.MachineSetNameLabel]; ok {
   812  		return false
   813  	}
   814  
   815  	// If the Machine is originated by a MachinePool object, it should not be adopted directly by the Cluster as a stand-alone Machine.
   816  	if _, ok := m.Labels[clusterv1.MachinePoolNameLabel]; ok {
   817  		return false
   818  	}
   819  
   820  	// If the Machine is originated by a ControlPlane object, it should not be adopted directly by the Cluster as a stand-alone Machine.
   821  	if _, ok := m.Labels[clusterv1.MachineControlPlaneNameLabel]; ok {
   822  		return false
   823  	}
   824  	return true
   825  }
   826  
   827  func (r *Reconciler) watchClusterNodes(ctx context.Context, cluster *clusterv1.Cluster) error {
   828  	log := ctrl.LoggerFrom(ctx)
   829  
   830  	if !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) {
   831  		log.V(5).Info("Skipping node watching setup because control plane is not initialized")
   832  		return nil
   833  	}
   834  
   835  	// If there is no tracker, don't watch remote nodes
   836  	if r.Tracker == nil {
   837  		return nil
   838  	}
   839  
   840  	return r.Tracker.Watch(ctx, remote.WatchInput{
   841  		Name:         "machine-watchNodes",
   842  		Cluster:      util.ObjectKey(cluster),
   843  		Watcher:      r.controller,
   844  		Kind:         &corev1.Node{},
   845  		EventHandler: handler.EnqueueRequestsFromMapFunc(r.nodeToMachine),
   846  	})
   847  }
   848  
   849  func (r *Reconciler) nodeToMachine(ctx context.Context, o client.Object) []reconcile.Request {
   850  	node, ok := o.(*corev1.Node)
   851  	if !ok {
   852  		panic(fmt.Sprintf("Expected a Node but got a %T", o))
   853  	}
   854  
   855  	var filters []client.ListOption
   856  	// Match by clusterName when the node has the annotation.
   857  	if clusterName, ok := node.GetAnnotations()[clusterv1.ClusterNameAnnotation]; ok {
   858  		filters = append(filters, client.MatchingLabels{
   859  			clusterv1.ClusterNameLabel: clusterName,
   860  		})
   861  	}
   862  
   863  	// Match by namespace when the node has the annotation.
   864  	if namespace, ok := node.GetAnnotations()[clusterv1.ClusterNamespaceAnnotation]; ok {
   865  		filters = append(filters, client.InNamespace(namespace))
   866  	}
   867  
   868  	// Match by nodeName and status.nodeRef.name.
   869  	machineList := &clusterv1.MachineList{}
   870  	if err := r.Client.List(
   871  		ctx,
   872  		machineList,
   873  		append(filters, client.MatchingFields{index.MachineNodeNameField: node.Name})...); err != nil {
   874  		return nil
   875  	}
   876  
   877  	// There should be exactly 1 Machine for the node.
   878  	if len(machineList.Items) == 1 {
   879  		return []reconcile.Request{{NamespacedName: util.ObjectKey(&machineList.Items[0])}}
   880  	}
   881  
   882  	// Otherwise let's match by providerID. This is useful when e.g the NodeRef has not been set yet.
   883  	// Match by providerID
   884  	if node.Spec.ProviderID == "" {
   885  		return nil
   886  	}
   887  	machineList = &clusterv1.MachineList{}
   888  	if err := r.Client.List(
   889  		ctx,
   890  		machineList,
   891  		append(filters, client.MatchingFields{index.MachineProviderIDField: node.Spec.ProviderID})...); err != nil {
   892  		return nil
   893  	}
   894  
   895  	// There should be exactly 1 Machine for the node.
   896  	if len(machineList.Items) == 1 {
   897  		return []reconcile.Request{{NamespacedName: util.ObjectKey(&machineList.Items[0])}}
   898  	}
   899  
   900  	return nil
   901  }
   902  
   903  // writer implements io.Writer interface as a pass-through for klog.
   904  type writer struct {
   905  	logFunc func(msg string, keysAndValues ...interface{})
   906  }
   907  
   908  // Write passes string(p) into writer's logFunc and always returns len(p).
   909  func (w writer) Write(p []byte) (n int, err error) {
   910  	w.logFunc(string(p))
   911  	return len(p), nil
   912  }