sigs.k8s.io/cluster-api@v1.7.1/internal/controllers/machine/machine_controller.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package machine
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	"github.com/pkg/errors"
    25  	corev1 "k8s.io/api/core/v1"
    26  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    27  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    28  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    29  	"k8s.io/apimachinery/pkg/types"
    30  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    31  	"k8s.io/apimachinery/pkg/util/wait"
    32  	"k8s.io/client-go/kubernetes"
    33  	"k8s.io/client-go/rest"
    34  	"k8s.io/client-go/tools/record"
    35  	"k8s.io/klog/v2"
    36  	kubedrain "k8s.io/kubectl/pkg/drain"
    37  	ctrl "sigs.k8s.io/controller-runtime"
    38  	"sigs.k8s.io/controller-runtime/pkg/builder"
    39  	"sigs.k8s.io/controller-runtime/pkg/client"
    40  	"sigs.k8s.io/controller-runtime/pkg/controller"
    41  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    42  	"sigs.k8s.io/controller-runtime/pkg/handler"
    43  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    44  
    45  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    46  	"sigs.k8s.io/cluster-api/api/v1beta1/index"
    47  	"sigs.k8s.io/cluster-api/controllers/external"
    48  	"sigs.k8s.io/cluster-api/controllers/noderefutil"
    49  	"sigs.k8s.io/cluster-api/controllers/remote"
    50  	"sigs.k8s.io/cluster-api/internal/util/ssa"
    51  	"sigs.k8s.io/cluster-api/util"
    52  	"sigs.k8s.io/cluster-api/util/annotations"
    53  	"sigs.k8s.io/cluster-api/util/collections"
    54  	"sigs.k8s.io/cluster-api/util/conditions"
    55  	clog "sigs.k8s.io/cluster-api/util/log"
    56  	"sigs.k8s.io/cluster-api/util/patch"
    57  	"sigs.k8s.io/cluster-api/util/predicates"
    58  )
    59  
    60  var (
    61  	errNilNodeRef                 = errors.New("noderef is nil")
    62  	errLastControlPlaneNode       = errors.New("last control plane member")
    63  	errNoControlPlaneNodes        = errors.New("no control plane members")
    64  	errClusterIsBeingDeleted      = errors.New("cluster is being deleted")
    65  	errControlPlaneIsBeingDeleted = errors.New("control plane is being deleted")
    66  )
    67  
    68  // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch
    69  // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch
    70  // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete
    71  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io;bootstrap.cluster.x-k8s.io,resources=*,verbs=get;list;watch;create;update;patch;delete
    72  // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status;machines/finalizers,verbs=get;list;watch;create;update;patch;delete
    73  // +kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch
    74  
    75  // Reconciler reconciles a Machine object.
    76  type Reconciler struct {
    77  	Client                    client.Client
    78  	UnstructuredCachingClient client.Client
    79  	APIReader                 client.Reader
    80  	Tracker                   *remote.ClusterCacheTracker
    81  
    82  	// WatchFilterValue is the label value used to filter events prior to reconciliation.
    83  	WatchFilterValue string
    84  
    85  	// NodeDrainClientTimeout timeout of the client used for draining nodes.
    86  	NodeDrainClientTimeout time.Duration
    87  
    88  	controller      controller.Controller
    89  	recorder        record.EventRecorder
    90  	externalTracker external.ObjectTracker
    91  
    92  	// nodeDeletionRetryTimeout determines how long the controller will retry deleting a node
    93  	// during a single reconciliation.
    94  	nodeDeletionRetryTimeout time.Duration
    95  	ssaCache                 ssa.Cache
    96  }
    97  
    98  func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
    99  	clusterToMachines, err := util.ClusterToTypedObjectsMapper(mgr.GetClient(), &clusterv1.MachineList{}, mgr.GetScheme())
   100  	if err != nil {
   101  		return err
   102  	}
   103  	msToMachines, err := util.MachineSetToObjectsMapper(mgr.GetClient(), &clusterv1.MachineList{}, mgr.GetScheme())
   104  	if err != nil {
   105  		return err
   106  	}
   107  	mdToMachines, err := util.MachineDeploymentToObjectsMapper(mgr.GetClient(), &clusterv1.MachineList{}, mgr.GetScheme())
   108  	if err != nil {
   109  		return err
   110  	}
   111  
   112  	if r.nodeDeletionRetryTimeout.Nanoseconds() == 0 {
   113  		r.nodeDeletionRetryTimeout = 10 * time.Second
   114  	}
   115  
   116  	c, err := ctrl.NewControllerManagedBy(mgr).
   117  		For(&clusterv1.Machine{}).
   118  		WithOptions(options).
   119  		WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)).
   120  		Watches(
   121  			&clusterv1.Cluster{},
   122  			handler.EnqueueRequestsFromMapFunc(clusterToMachines),
   123  			builder.WithPredicates(
   124  				// TODO: should this wait for Cluster.Status.InfrastructureReady similar to Infra Machine resources?
   125  				predicates.All(ctrl.LoggerFrom(ctx),
   126  					predicates.Any(ctrl.LoggerFrom(ctx),
   127  						predicates.ClusterUnpaused(ctrl.LoggerFrom(ctx)),
   128  						predicates.ClusterControlPlaneInitialized(ctrl.LoggerFrom(ctx)),
   129  					),
   130  					predicates.ResourceHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue),
   131  				),
   132  			)).
   133  		Watches(
   134  			&clusterv1.MachineSet{},
   135  			handler.EnqueueRequestsFromMapFunc(msToMachines),
   136  		).
   137  		Watches(
   138  			&clusterv1.MachineDeployment{},
   139  			handler.EnqueueRequestsFromMapFunc(mdToMachines),
   140  		).
   141  		Build(r)
   142  	if err != nil {
   143  		return errors.Wrap(err, "failed setting up with a controller manager")
   144  	}
   145  
   146  	r.controller = c
   147  	r.recorder = mgr.GetEventRecorderFor("machine-controller")
   148  	r.externalTracker = external.ObjectTracker{
   149  		Controller: c,
   150  		Cache:      mgr.GetCache(),
   151  	}
   152  	r.ssaCache = ssa.NewCache()
   153  	return nil
   154  }
   155  
   156  func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
   157  	// Fetch the Machine instance
   158  	m := &clusterv1.Machine{}
   159  	if err := r.Client.Get(ctx, req.NamespacedName, m); err != nil {
   160  		if apierrors.IsNotFound(err) {
   161  			// Object not found, return.  Created objects are automatically garbage collected.
   162  			// For additional cleanup logic use finalizers.
   163  			return ctrl.Result{}, nil
   164  		}
   165  
   166  		// Error reading the object - requeue the request.
   167  		return ctrl.Result{}, err
   168  	}
   169  
   170  	// AddOwners adds the owners of Machine as k/v pairs to the logger.
   171  	// Specifically, it will add KubeadmControlPlane, MachineSet and MachineDeployment.
   172  	ctx, log, err := clog.AddOwners(ctx, r.Client, m)
   173  	if err != nil {
   174  		return ctrl.Result{}, err
   175  	}
   176  
   177  	log = log.WithValues("Cluster", klog.KRef(m.ObjectMeta.Namespace, m.Spec.ClusterName))
   178  	ctx = ctrl.LoggerInto(ctx, log)
   179  
   180  	cluster, err := util.GetClusterByName(ctx, r.Client, m.ObjectMeta.Namespace, m.Spec.ClusterName)
   181  	if err != nil {
   182  		return ctrl.Result{}, errors.Wrapf(err, "failed to get cluster %q for machine %q in namespace %q",
   183  			m.Spec.ClusterName, m.Name, m.Namespace)
   184  	}
   185  
   186  	// Return early if the object or Cluster is paused.
   187  	if annotations.IsPaused(cluster, m) {
   188  		log.Info("Reconciliation is paused for this object")
   189  		return ctrl.Result{}, nil
   190  	}
   191  
   192  	// Initialize the patch helper
   193  	patchHelper, err := patch.NewHelper(m, r.Client)
   194  	if err != nil {
   195  		return ctrl.Result{}, err
   196  	}
   197  
   198  	defer func() {
   199  		r.reconcilePhase(ctx, m)
   200  
   201  		// Always attempt to patch the object and status after each reconciliation.
   202  		// Patch ObservedGeneration only if the reconciliation completed successfully
   203  		patchOpts := []patch.Option{}
   204  		if reterr == nil {
   205  			patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{})
   206  		}
   207  		if err := patchMachine(ctx, patchHelper, m, patchOpts...); err != nil {
   208  			reterr = kerrors.NewAggregate([]error{reterr, err})
   209  		}
   210  	}()
   211  
   212  	// Reconcile labels.
   213  	if m.Labels == nil {
   214  		m.Labels = make(map[string]string)
   215  	}
   216  	m.Labels[clusterv1.ClusterNameLabel] = m.Spec.ClusterName
   217  
   218  	// Handle deletion reconciliation loop.
   219  	if !m.ObjectMeta.DeletionTimestamp.IsZero() {
   220  		res, err := r.reconcileDelete(ctx, cluster, m)
   221  		// Requeue if the reconcile failed because the ClusterCacheTracker was locked for
   222  		// the current cluster because of concurrent access.
   223  		if errors.Is(err, remote.ErrClusterLocked) {
   224  			log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker")
   225  			return ctrl.Result{RequeueAfter: time.Minute}, nil
   226  		}
   227  		return res, err
   228  	}
   229  
   230  	// Add finalizer first if not set to avoid the race condition between init and delete.
   231  	// Note: Finalizers in general can only be added when the deletionTimestamp is not set.
   232  	if !controllerutil.ContainsFinalizer(m, clusterv1.MachineFinalizer) {
   233  		controllerutil.AddFinalizer(m, clusterv1.MachineFinalizer)
   234  		return ctrl.Result{}, nil
   235  	}
   236  
   237  	// Handle normal reconciliation loop.
   238  	res, err := r.reconcile(ctx, cluster, m)
   239  	// Requeue if the reconcile failed because the ClusterCacheTracker was locked for
   240  	// the current cluster because of concurrent access.
   241  	if errors.Is(err, remote.ErrClusterLocked) {
   242  		log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker")
   243  		return ctrl.Result{RequeueAfter: time.Minute}, nil
   244  	}
   245  	return res, err
   246  }
   247  
   248  func patchMachine(ctx context.Context, patchHelper *patch.Helper, machine *clusterv1.Machine, options ...patch.Option) error {
   249  	// Always update the readyCondition by summarizing the state of other conditions.
   250  	// A step counter is added to represent progress during the provisioning process (instead we are hiding it
   251  	// after provisioning - e.g. when a MHC condition exists - or during the deletion process).
   252  	conditions.SetSummary(machine,
   253  		conditions.WithConditions(
   254  			// Infrastructure problems should take precedence over all the other conditions
   255  			clusterv1.InfrastructureReadyCondition,
   256  			// Bootstrap comes after, but it is relevant only during initial machine provisioning.
   257  			clusterv1.BootstrapReadyCondition,
   258  			// MHC reported condition should take precedence over the remediation progress
   259  			clusterv1.MachineHealthCheckSucceededCondition,
   260  			clusterv1.MachineOwnerRemediatedCondition,
   261  			clusterv1.DrainingSucceededCondition,
   262  		),
   263  		conditions.WithStepCounterIf(machine.ObjectMeta.DeletionTimestamp.IsZero() && machine.Spec.ProviderID == nil),
   264  		conditions.WithStepCounterIfOnly(
   265  			clusterv1.BootstrapReadyCondition,
   266  			clusterv1.InfrastructureReadyCondition,
   267  		),
   268  	)
   269  
   270  	// Patch the object, ignoring conflicts on the conditions owned by this controller.
   271  	// Also, if requested, we are adding additional options like e.g. Patch ObservedGeneration when issuing the
   272  	// patch at the end of the reconcile loop.
   273  	options = append(options,
   274  		patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
   275  			clusterv1.ReadyCondition,
   276  			clusterv1.BootstrapReadyCondition,
   277  			clusterv1.InfrastructureReadyCondition,
   278  			clusterv1.DrainingSucceededCondition,
   279  			clusterv1.MachineHealthCheckSucceededCondition,
   280  			clusterv1.MachineOwnerRemediatedCondition,
   281  		}},
   282  	)
   283  
   284  	return patchHelper.Patch(ctx, machine, options...)
   285  }
   286  
   287  func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (ctrl.Result, error) {
   288  	// If the machine is a stand-alone one, meaning not originated from a MachineDeployment, then set it as directly
   289  	// owned by the Cluster (if not already present).
   290  	if r.shouldAdopt(m) {
   291  		m.SetOwnerReferences(util.EnsureOwnerRef(m.GetOwnerReferences(), metav1.OwnerReference{
   292  			APIVersion: clusterv1.GroupVersion.String(),
   293  			Kind:       "Cluster",
   294  			Name:       cluster.Name,
   295  			UID:        cluster.UID,
   296  		}))
   297  	}
   298  
   299  	phases := []func(context.Context, *scope) (ctrl.Result, error){
   300  		r.reconcileBootstrap,
   301  		r.reconcileInfrastructure,
   302  		r.reconcileNode,
   303  		r.reconcileCertificateExpiry,
   304  	}
   305  
   306  	res := ctrl.Result{}
   307  	errs := []error{}
   308  	s := &scope{
   309  		cluster: cluster,
   310  		machine: m,
   311  	}
   312  	for _, phase := range phases {
   313  		// Call the inner reconciliation methods.
   314  		phaseResult, err := phase(ctx, s)
   315  		if err != nil {
   316  			errs = append(errs, err)
   317  		}
   318  		if len(errs) > 0 {
   319  			continue
   320  		}
   321  		res = util.LowestNonZeroResult(res, phaseResult)
   322  	}
   323  	return res, kerrors.NewAggregate(errs)
   324  }
   325  
   326  // scope holds the different objects that are read and used during the reconcile.
   327  type scope struct {
   328  	// cluster is the Cluster object the Machine belongs to.
   329  	// It is set at the beginning of the reconcile function.
   330  	cluster *clusterv1.Cluster
   331  
   332  	// machine is the Machine object. It is set at the beginning
   333  	// of the reconcile function.
   334  	machine *clusterv1.Machine
   335  
   336  	// infraMachine is the Infrastructure Machine object that is referenced by the
   337  	// Machine. It is set after reconcileInfrastructure is called.
   338  	infraMachine *unstructured.Unstructured
   339  
   340  	// bootstrapConfig is the BootstrapConfig object that is referenced by the
   341  	// Machine. It is set after reconcileBootstrap is called.
   342  	bootstrapConfig *unstructured.Unstructured
   343  }
   344  
   345  func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (ctrl.Result, error) { //nolint:gocyclo
   346  	log := ctrl.LoggerFrom(ctx)
   347  
   348  	err := r.isDeleteNodeAllowed(ctx, cluster, m)
   349  	isDeleteNodeAllowed := err == nil
   350  	if err != nil {
   351  		switch err {
   352  		case errNoControlPlaneNodes, errLastControlPlaneNode, errNilNodeRef, errClusterIsBeingDeleted, errControlPlaneIsBeingDeleted:
   353  			nodeName := ""
   354  			if m.Status.NodeRef != nil {
   355  				nodeName = m.Status.NodeRef.Name
   356  			}
   357  			log.Info("Skipping deletion of Kubernetes Node associated with Machine as it is not allowed", "Node", klog.KRef("", nodeName), "cause", err.Error())
   358  		default:
   359  			return ctrl.Result{}, errors.Wrapf(err, "failed to check if Kubernetes Node deletion is allowed")
   360  		}
   361  	}
   362  
   363  	if isDeleteNodeAllowed {
   364  		// pre-drain.delete lifecycle hook
   365  		// Return early without error, will requeue if/when the hook owner removes the annotation.
   366  		if annotations.HasWithPrefix(clusterv1.PreDrainDeleteHookAnnotationPrefix, m.ObjectMeta.Annotations) {
   367  			conditions.MarkFalse(m, clusterv1.PreDrainDeleteHookSucceededCondition, clusterv1.WaitingExternalHookReason, clusterv1.ConditionSeverityInfo, "")
   368  			return ctrl.Result{}, nil
   369  		}
   370  		conditions.MarkTrue(m, clusterv1.PreDrainDeleteHookSucceededCondition)
   371  
   372  		// Drain node before deletion and issue a patch in order to make this operation visible to the users.
   373  		if r.isNodeDrainAllowed(m) {
   374  			patchHelper, err := patch.NewHelper(m, r.Client)
   375  			if err != nil {
   376  				return ctrl.Result{}, err
   377  			}
   378  
   379  			log.Info("Draining node", "Node", klog.KRef("", m.Status.NodeRef.Name))
   380  			// The DrainingSucceededCondition never exists before the node is drained for the first time,
   381  			// so its transition time can be used to record the first time draining.
   382  			// This `if` condition prevents the transition time to be changed more than once.
   383  			if conditions.Get(m, clusterv1.DrainingSucceededCondition) == nil {
   384  				conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingReason, clusterv1.ConditionSeverityInfo, "Draining the node before deletion")
   385  			}
   386  
   387  			if err := patchMachine(ctx, patchHelper, m); err != nil {
   388  				return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine")
   389  			}
   390  
   391  			if result, err := r.drainNode(ctx, cluster, m.Status.NodeRef.Name); !result.IsZero() || err != nil {
   392  				if err != nil {
   393  					conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingFailedReason, clusterv1.ConditionSeverityWarning, err.Error())
   394  					r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDrainNode", "error draining Machine's node %q: %v", m.Status.NodeRef.Name, err)
   395  				}
   396  				return result, err
   397  			}
   398  
   399  			conditions.MarkTrue(m, clusterv1.DrainingSucceededCondition)
   400  			r.recorder.Eventf(m, corev1.EventTypeNormal, "SuccessfulDrainNode", "success draining Machine's node %q", m.Status.NodeRef.Name)
   401  		}
   402  
   403  		// After node draining is completed, and if isNodeVolumeDetachingAllowed returns True, make sure all
   404  		// volumes are detached before proceeding to delete the Node.
   405  		if r.isNodeVolumeDetachingAllowed(m) {
   406  			// The VolumeDetachSucceededCondition never exists before we wait for volume detachment for the first time,
   407  			// so its transition time can be used to record the first time we wait for volume detachment.
   408  			// This `if` condition prevents the transition time to be changed more than once.
   409  			if conditions.Get(m, clusterv1.VolumeDetachSucceededCondition) == nil {
   410  				conditions.MarkFalse(m, clusterv1.VolumeDetachSucceededCondition, clusterv1.WaitingForVolumeDetachReason, clusterv1.ConditionSeverityInfo, "Waiting for node volumes to be detached")
   411  			}
   412  
   413  			if ok, err := r.shouldWaitForNodeVolumes(ctx, cluster, m.Status.NodeRef.Name); ok || err != nil {
   414  				if err != nil {
   415  					r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedWaitForVolumeDetach", "error waiting for node volumes detaching, Machine's node %q: %v", m.Status.NodeRef.Name, err)
   416  					return ctrl.Result{}, err
   417  				}
   418  				log.Info("Waiting for node volumes to be detached", "Node", klog.KRef("", m.Status.NodeRef.Name))
   419  				return ctrl.Result{}, nil
   420  			}
   421  			conditions.MarkTrue(m, clusterv1.VolumeDetachSucceededCondition)
   422  			r.recorder.Eventf(m, corev1.EventTypeNormal, "NodeVolumesDetached", "success waiting for node volumes detaching Machine's node %q", m.Status.NodeRef.Name)
   423  		}
   424  	}
   425  
   426  	// pre-term.delete lifecycle hook
   427  	// Return early without error, will requeue if/when the hook owner removes the annotation.
   428  	if annotations.HasWithPrefix(clusterv1.PreTerminateDeleteHookAnnotationPrefix, m.ObjectMeta.Annotations) {
   429  		conditions.MarkFalse(m, clusterv1.PreTerminateDeleteHookSucceededCondition, clusterv1.WaitingExternalHookReason, clusterv1.ConditionSeverityInfo, "")
   430  		return ctrl.Result{}, nil
   431  	}
   432  	conditions.MarkTrue(m, clusterv1.PreTerminateDeleteHookSucceededCondition)
   433  
   434  	// Return early and don't remove the finalizer if we got an error or
   435  	// the external reconciliation deletion isn't ready.
   436  
   437  	patchHelper, err := patch.NewHelper(m, r.Client)
   438  	if err != nil {
   439  		return ctrl.Result{}, err
   440  	}
   441  	conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "")
   442  	if err := patchMachine(ctx, patchHelper, m); err != nil {
   443  		conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityInfo, "")
   444  		return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine")
   445  	}
   446  
   447  	infrastructureDeleted, err := r.reconcileDeleteInfrastructure(ctx, cluster, m)
   448  	if err != nil {
   449  		return ctrl.Result{}, err
   450  	}
   451  	if !infrastructureDeleted {
   452  		log.Info("Waiting for infrastructure to be deleted", m.Spec.InfrastructureRef.Kind, klog.KRef(m.Spec.InfrastructureRef.Namespace, m.Spec.InfrastructureRef.Name))
   453  		return ctrl.Result{}, nil
   454  	}
   455  
   456  	bootstrapDeleted, err := r.reconcileDeleteBootstrap(ctx, cluster, m)
   457  	if err != nil {
   458  		return ctrl.Result{}, err
   459  	}
   460  	if !bootstrapDeleted {
   461  		log.Info("Waiting for bootstrap to be deleted", m.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(m.Spec.Bootstrap.ConfigRef.Namespace, m.Spec.Bootstrap.ConfigRef.Name))
   462  		return ctrl.Result{}, nil
   463  	}
   464  
   465  	// We only delete the node after the underlying infrastructure is gone.
   466  	// https://github.com/kubernetes-sigs/cluster-api/issues/2565
   467  	if isDeleteNodeAllowed {
   468  		log.Info("Deleting node", "Node", klog.KRef("", m.Status.NodeRef.Name))
   469  
   470  		var deleteNodeErr error
   471  		waitErr := wait.PollUntilContextTimeout(ctx, 2*time.Second, r.nodeDeletionRetryTimeout, true, func(ctx context.Context) (bool, error) {
   472  			if deleteNodeErr = r.deleteNode(ctx, cluster, m.Status.NodeRef.Name); deleteNodeErr != nil && !apierrors.IsNotFound(errors.Cause(deleteNodeErr)) {
   473  				return false, nil
   474  			}
   475  			return true, nil
   476  		})
   477  		if waitErr != nil {
   478  			log.Error(deleteNodeErr, "Timed out deleting node", "Node", klog.KRef("", m.Status.NodeRef.Name))
   479  			conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityWarning, "")
   480  			r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDeleteNode", "error deleting Machine's node: %v", deleteNodeErr)
   481  
   482  			// If the node deletion timeout is not expired yet, requeue the Machine for reconciliation.
   483  			if m.Spec.NodeDeletionTimeout == nil || m.Spec.NodeDeletionTimeout.Nanoseconds() == 0 || m.DeletionTimestamp.Add(m.Spec.NodeDeletionTimeout.Duration).After(time.Now()) {
   484  				return ctrl.Result{}, deleteNodeErr
   485  			}
   486  			log.Info("Node deletion timeout expired, continuing without Node deletion.")
   487  		}
   488  	}
   489  
   490  	controllerutil.RemoveFinalizer(m, clusterv1.MachineFinalizer)
   491  	return ctrl.Result{}, nil
   492  }
   493  
   494  func (r *Reconciler) isNodeDrainAllowed(m *clusterv1.Machine) bool {
   495  	if _, exists := m.ObjectMeta.Annotations[clusterv1.ExcludeNodeDrainingAnnotation]; exists {
   496  		return false
   497  	}
   498  
   499  	if r.nodeDrainTimeoutExceeded(m) {
   500  		return false
   501  	}
   502  
   503  	return true
   504  }
   505  
   506  // isNodeVolumeDetachingAllowed returns False if either ExcludeWaitForNodeVolumeDetachAnnotation annotation is set OR
   507  // nodeVolumeDetachTimeoutExceeded timeout is exceeded, otherwise returns True.
   508  func (r *Reconciler) isNodeVolumeDetachingAllowed(m *clusterv1.Machine) bool {
   509  	if _, exists := m.ObjectMeta.Annotations[clusterv1.ExcludeWaitForNodeVolumeDetachAnnotation]; exists {
   510  		return false
   511  	}
   512  
   513  	if r.nodeVolumeDetachTimeoutExceeded(m) {
   514  		return false
   515  	}
   516  
   517  	return true
   518  }
   519  
   520  func (r *Reconciler) nodeDrainTimeoutExceeded(machine *clusterv1.Machine) bool {
   521  	// if the NodeDrainTimeout type is not set by user
   522  	if machine.Spec.NodeDrainTimeout == nil || machine.Spec.NodeDrainTimeout.Seconds() <= 0 {
   523  		return false
   524  	}
   525  
   526  	// if the draining succeeded condition does not exist
   527  	if conditions.Get(machine, clusterv1.DrainingSucceededCondition) == nil {
   528  		return false
   529  	}
   530  
   531  	now := time.Now()
   532  	firstTimeDrain := conditions.GetLastTransitionTime(machine, clusterv1.DrainingSucceededCondition)
   533  	diff := now.Sub(firstTimeDrain.Time)
   534  	return diff.Seconds() >= machine.Spec.NodeDrainTimeout.Seconds()
   535  }
   536  
   537  // nodeVolumeDetachTimeoutExceeded returns False if either NodeVolumeDetachTimeout is set to nil or <=0 OR
   538  // VolumeDetachSucceededCondition is not set on the Machine. Otherwise returns true if the timeout is expired
   539  // since the last transition time of VolumeDetachSucceededCondition.
   540  func (r *Reconciler) nodeVolumeDetachTimeoutExceeded(machine *clusterv1.Machine) bool {
   541  	// if the NodeVolumeDetachTimeout type is not set by user
   542  	if machine.Spec.NodeVolumeDetachTimeout == nil || machine.Spec.NodeVolumeDetachTimeout.Seconds() <= 0 {
   543  		return false
   544  	}
   545  
   546  	// if the volume detaching succeeded condition does not exist
   547  	if conditions.Get(machine, clusterv1.VolumeDetachSucceededCondition) == nil {
   548  		return false
   549  	}
   550  
   551  	now := time.Now()
   552  	firstTimeDetach := conditions.GetLastTransitionTime(machine, clusterv1.VolumeDetachSucceededCondition)
   553  	diff := now.Sub(firstTimeDetach.Time)
   554  	return diff.Seconds() >= machine.Spec.NodeVolumeDetachTimeout.Seconds()
   555  }
   556  
   557  // isDeleteNodeAllowed returns nil only if the Machine's NodeRef is not nil
   558  // and if the Machine is not the last control plane node in the cluster.
   559  func (r *Reconciler) isDeleteNodeAllowed(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine) error {
   560  	log := ctrl.LoggerFrom(ctx)
   561  	// Return early if the cluster is being deleted.
   562  	if !cluster.DeletionTimestamp.IsZero() {
   563  		return errClusterIsBeingDeleted
   564  	}
   565  
   566  	// Cannot delete something that doesn't exist.
   567  	if machine.Status.NodeRef == nil {
   568  		return errNilNodeRef
   569  	}
   570  
   571  	// controlPlaneRef is an optional field in the Cluster so skip the external
   572  	// managed control plane check if it is nil
   573  	if cluster.Spec.ControlPlaneRef != nil {
   574  		controlPlane, err := external.Get(ctx, r.Client, cluster.Spec.ControlPlaneRef, cluster.Spec.ControlPlaneRef.Namespace)
   575  		if apierrors.IsNotFound(err) {
   576  			// If control plane object in the reference does not exist, log and skip check for
   577  			// external managed control plane
   578  			log.Error(err, "control plane object specified in cluster spec.controlPlaneRef does not exist", "kind", cluster.Spec.ControlPlaneRef.Kind, "name", cluster.Spec.ControlPlaneRef.Name)
   579  		} else {
   580  			if err != nil {
   581  				// If any other error occurs when trying to get the control plane object,
   582  				// return the error so we can retry
   583  				return err
   584  			}
   585  
   586  			// Return early if the object referenced by controlPlaneRef is being deleted.
   587  			if !controlPlane.GetDeletionTimestamp().IsZero() {
   588  				return errControlPlaneIsBeingDeleted
   589  			}
   590  
   591  			// Check if the ControlPlane is externally managed (AKS, EKS, GKE, etc)
   592  			// and skip the following section if control plane is externally managed
   593  			// because there will be no control plane nodes registered
   594  			if util.IsExternalManagedControlPlane(controlPlane) {
   595  				return nil
   596  			}
   597  		}
   598  	}
   599  
   600  	// Get all of the active machines that belong to this cluster.
   601  	machines, err := collections.GetFilteredMachinesForCluster(ctx, r.Client, cluster, collections.ActiveMachines)
   602  	if err != nil {
   603  		return err
   604  	}
   605  
   606  	// Whether or not it is okay to delete the NodeRef depends on the
   607  	// number of remaining control plane members and whether or not this
   608  	// machine is one of them.
   609  	numControlPlaneMachines := len(machines.Filter(collections.ControlPlaneMachines(cluster.Name)))
   610  	if numControlPlaneMachines == 0 {
   611  		// Do not delete the NodeRef if there are no remaining members of
   612  		// the control plane.
   613  		return errNoControlPlaneNodes
   614  	}
   615  	// Otherwise it is okay to delete the NodeRef.
   616  	return nil
   617  }
   618  
   619  func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster, nodeName string) (ctrl.Result, error) {
   620  	log := ctrl.LoggerFrom(ctx, "Node", klog.KRef("", nodeName))
   621  
   622  	restConfig, err := r.Tracker.GetRESTConfig(ctx, util.ObjectKey(cluster))
   623  	if err != nil {
   624  		if errors.Is(err, remote.ErrClusterLocked) {
   625  			log.V(5).Info("Requeuing drain Node because another worker has the lock on the ClusterCacheTracker")
   626  			return ctrl.Result{RequeueAfter: time.Minute}, nil
   627  		}
   628  		log.Error(err, "Error creating a remote client for cluster while draining Node, won't retry")
   629  		return ctrl.Result{}, nil
   630  	}
   631  	restConfig = rest.CopyConfig(restConfig)
   632  	restConfig.Timeout = r.NodeDrainClientTimeout
   633  	kubeClient, err := kubernetes.NewForConfig(restConfig)
   634  	if err != nil {
   635  		log.Error(err, "Error creating a remote client while deleting Machine, won't retry")
   636  		return ctrl.Result{}, nil
   637  	}
   638  
   639  	node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
   640  	if err != nil {
   641  		if apierrors.IsNotFound(err) {
   642  			// If an admin deletes the node directly, we'll end up here.
   643  			log.Error(err, "Could not find node from noderef, it may have already been deleted")
   644  			return ctrl.Result{}, nil
   645  		}
   646  		return ctrl.Result{}, errors.Wrapf(err, "unable to get node %v", nodeName)
   647  	}
   648  
   649  	drainer := &kubedrain.Helper{
   650  		Client:              kubeClient,
   651  		Ctx:                 ctx,
   652  		Force:               true,
   653  		IgnoreAllDaemonSets: true,
   654  		DeleteEmptyDirData:  true,
   655  		GracePeriodSeconds:  -1,
   656  		// If a pod is not evicted in 20 seconds, retry the eviction next time the
   657  		// machine gets reconciled again (to allow other machines to be reconciled).
   658  		Timeout: 20 * time.Second,
   659  		OnPodDeletedOrEvicted: func(pod *corev1.Pod, usingEviction bool) {
   660  			verbStr := "Deleted"
   661  			if usingEviction {
   662  				verbStr = "Evicted"
   663  			}
   664  			log.Info(fmt.Sprintf("%s pod from Node", verbStr),
   665  				"Pod", klog.KObj(pod))
   666  		},
   667  		Out: writer{log.Info},
   668  		ErrOut: writer{func(msg string, keysAndValues ...interface{}) {
   669  			log.Error(nil, msg, keysAndValues...)
   670  		}},
   671  	}
   672  
   673  	if noderefutil.IsNodeUnreachable(node) {
   674  		// When the node is unreachable and some pods are not evicted for as long as this timeout, we ignore them.
   675  		drainer.SkipWaitForDeleteTimeoutSeconds = 60 * 5 // 5 minutes
   676  	}
   677  
   678  	if err := kubedrain.RunCordonOrUncordon(drainer, node, true); err != nil {
   679  		// Machine will be re-reconciled after a cordon failure.
   680  		log.Error(err, "Cordon failed")
   681  		return ctrl.Result{}, errors.Wrapf(err, "unable to cordon node %v", node.Name)
   682  	}
   683  
   684  	if err := kubedrain.RunNodeDrain(drainer, node.Name); err != nil {
   685  		// Machine will be re-reconciled after a drain failure.
   686  		log.Error(err, "Drain failed, retry in 20s")
   687  		return ctrl.Result{RequeueAfter: 20 * time.Second}, nil
   688  	}
   689  
   690  	log.Info("Drain successful")
   691  	return ctrl.Result{}, nil
   692  }
   693  
   694  // shouldWaitForNodeVolumes returns true if node status still have volumes attached
   695  // pod deletion and volume detach happen asynchronously, so pod could be deleted before volume detached from the node
   696  // this could cause issue for some storage provisioner, for example, vsphere-volume this is problematic
   697  // because if the node is deleted before detach success, then the underline VMDK will be deleted together with the Machine
   698  // so after node draining we need to check if all volumes are detached before deleting the node.
   699  func (r *Reconciler) shouldWaitForNodeVolumes(ctx context.Context, cluster *clusterv1.Cluster, nodeName string) (bool, error) {
   700  	log := ctrl.LoggerFrom(ctx, "Node", klog.KRef("", nodeName))
   701  
   702  	remoteClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster))
   703  	if err != nil {
   704  		return true, err
   705  	}
   706  
   707  	node := &corev1.Node{}
   708  	if err := remoteClient.Get(ctx, types.NamespacedName{Name: nodeName}, node); err != nil {
   709  		if apierrors.IsNotFound(err) {
   710  			log.Error(err, "Could not find node from noderef, it may have already been deleted")
   711  			return false, nil
   712  		}
   713  		return true, err
   714  	}
   715  
   716  	return len(node.Status.VolumesAttached) != 0, nil
   717  }
   718  
   719  func (r *Reconciler) deleteNode(ctx context.Context, cluster *clusterv1.Cluster, name string) error {
   720  	log := ctrl.LoggerFrom(ctx)
   721  
   722  	remoteClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster))
   723  	if err != nil {
   724  		if errors.Is(err, remote.ErrClusterLocked) {
   725  			return errors.Wrapf(err, "failed deleting Node because another worker has the lock on the ClusterCacheTracker")
   726  		}
   727  		log.Error(err, "Error creating a remote client for cluster while deleting Node, won't retry")
   728  		return nil
   729  	}
   730  
   731  	node := &corev1.Node{
   732  		ObjectMeta: metav1.ObjectMeta{
   733  			Name: name,
   734  		},
   735  	}
   736  
   737  	if err := remoteClient.Delete(ctx, node); err != nil {
   738  		return errors.Wrapf(err, "error deleting node %s", name)
   739  	}
   740  	return nil
   741  }
   742  
   743  func (r *Reconciler) reconcileDeleteBootstrap(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (bool, error) {
   744  	obj, err := r.reconcileDeleteExternal(ctx, cluster, m, m.Spec.Bootstrap.ConfigRef)
   745  	if err != nil {
   746  		return false, err
   747  	}
   748  
   749  	if obj == nil {
   750  		// Marks the bootstrap as deleted
   751  		conditions.MarkFalse(m, clusterv1.BootstrapReadyCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "")
   752  		return true, nil
   753  	}
   754  
   755  	// Report a summary of current status of the bootstrap object defined for this machine.
   756  	conditions.SetMirror(m, clusterv1.BootstrapReadyCondition,
   757  		conditions.UnstructuredGetter(obj),
   758  		conditions.WithFallbackValue(false, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, ""),
   759  	)
   760  	return false, nil
   761  }
   762  
   763  func (r *Reconciler) reconcileDeleteInfrastructure(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (bool, error) {
   764  	obj, err := r.reconcileDeleteExternal(ctx, cluster, m, &m.Spec.InfrastructureRef)
   765  	if err != nil {
   766  		return false, err
   767  	}
   768  
   769  	if obj == nil {
   770  		// Marks the infrastructure as deleted
   771  		conditions.MarkFalse(m, clusterv1.InfrastructureReadyCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "")
   772  		return true, nil
   773  	}
   774  
   775  	// Report a summary of current status of the bootstrap object defined for this machine.
   776  	conditions.SetMirror(m, clusterv1.InfrastructureReadyCondition,
   777  		conditions.UnstructuredGetter(obj),
   778  		conditions.WithFallbackValue(false, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, ""),
   779  	)
   780  	return false, nil
   781  }
   782  
   783  // reconcileDeleteExternal tries to delete external references.
   784  func (r *Reconciler) reconcileDeleteExternal(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine, ref *corev1.ObjectReference) (*unstructured.Unstructured, error) {
   785  	if ref == nil {
   786  		return nil, nil
   787  	}
   788  
   789  	// get the external object
   790  	obj, err := external.Get(ctx, r.UnstructuredCachingClient, ref, m.Namespace)
   791  	if err != nil && !apierrors.IsNotFound(errors.Cause(err)) {
   792  		return nil, errors.Wrapf(err, "failed to get %s %q for Machine %q in namespace %q",
   793  			ref.GroupVersionKind(), ref.Name, m.Name, m.Namespace)
   794  	}
   795  
   796  	if obj != nil {
   797  		// reconcileExternal ensures that we set the object's OwnerReferences correctly and watch the object.
   798  		// The machine delete logic depends on reconciling the machine when the external objects are deleted.
   799  		// This avoids a race condition where the machine is deleted before the external objects are ever reconciled
   800  		// by this controller.
   801  		if _, err := r.ensureExternalOwnershipAndWatch(ctx, cluster, m, ref); err != nil {
   802  			return nil, err
   803  		}
   804  
   805  		// Issue a delete request.
   806  		if err := r.Client.Delete(ctx, obj); err != nil && !apierrors.IsNotFound(err) {
   807  			return obj, errors.Wrapf(err,
   808  				"failed to delete %v %q for Machine %q in namespace %q",
   809  				obj.GroupVersionKind(), obj.GetName(), m.Name, m.Namespace)
   810  		}
   811  	}
   812  
   813  	// Return true if there are no more external objects.
   814  	return obj, nil
   815  }
   816  
   817  // shouldAdopt returns true if the Machine should be adopted as a stand-alone Machine directly owned by the Cluster.
   818  func (r *Reconciler) shouldAdopt(m *clusterv1.Machine) bool {
   819  	// if the machine is controlled by something (MS or KCP), or if it is a stand-alone machine directly owned by the Cluster, then no-op.
   820  	if metav1.GetControllerOf(m) != nil || util.HasOwner(m.GetOwnerReferences(), clusterv1.GroupVersion.String(), []string{"Cluster"}) {
   821  		return false
   822  	}
   823  
   824  	// Note: following checks are required because after restore from a backup both the Machine controller and the
   825  	// MachineSet, MachinePool, or ControlPlane controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529
   826  
   827  	// If the Machine is originated by a MachineSet, it should not be adopted directly by the Cluster as a stand-alone Machine.
   828  	if _, ok := m.Labels[clusterv1.MachineSetNameLabel]; ok {
   829  		return false
   830  	}
   831  
   832  	// If the Machine is originated by a MachinePool object, it should not be adopted directly by the Cluster as a stand-alone Machine.
   833  	if _, ok := m.Labels[clusterv1.MachinePoolNameLabel]; ok {
   834  		return false
   835  	}
   836  
   837  	// If the Machine is originated by a ControlPlane object, it should not be adopted directly by the Cluster as a stand-alone Machine.
   838  	if _, ok := m.Labels[clusterv1.MachineControlPlaneNameLabel]; ok {
   839  		return false
   840  	}
   841  	return true
   842  }
   843  
   844  func (r *Reconciler) watchClusterNodes(ctx context.Context, cluster *clusterv1.Cluster) error {
   845  	log := ctrl.LoggerFrom(ctx)
   846  
   847  	if !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) {
   848  		log.V(5).Info("Skipping node watching setup because control plane is not initialized")
   849  		return nil
   850  	}
   851  
   852  	// If there is no tracker, don't watch remote nodes
   853  	if r.Tracker == nil {
   854  		return nil
   855  	}
   856  
   857  	return r.Tracker.Watch(ctx, remote.WatchInput{
   858  		Name:         "machine-watchNodes",
   859  		Cluster:      util.ObjectKey(cluster),
   860  		Watcher:      r.controller,
   861  		Kind:         &corev1.Node{},
   862  		EventHandler: handler.EnqueueRequestsFromMapFunc(r.nodeToMachine),
   863  	})
   864  }
   865  
   866  func (r *Reconciler) nodeToMachine(ctx context.Context, o client.Object) []reconcile.Request {
   867  	node, ok := o.(*corev1.Node)
   868  	if !ok {
   869  		panic(fmt.Sprintf("Expected a Node but got a %T", o))
   870  	}
   871  
   872  	var filters []client.ListOption
   873  	// Match by clusterName when the node has the annotation.
   874  	if clusterName, ok := node.GetAnnotations()[clusterv1.ClusterNameAnnotation]; ok {
   875  		filters = append(filters, client.MatchingLabels{
   876  			clusterv1.ClusterNameLabel: clusterName,
   877  		})
   878  	}
   879  
   880  	// Match by namespace when the node has the annotation.
   881  	if namespace, ok := node.GetAnnotations()[clusterv1.ClusterNamespaceAnnotation]; ok {
   882  		filters = append(filters, client.InNamespace(namespace))
   883  	}
   884  
   885  	// Match by nodeName and status.nodeRef.name.
   886  	machineList := &clusterv1.MachineList{}
   887  	if err := r.Client.List(
   888  		ctx,
   889  		machineList,
   890  		append(filters, client.MatchingFields{index.MachineNodeNameField: node.Name})...); err != nil {
   891  		return nil
   892  	}
   893  
   894  	// There should be exactly 1 Machine for the node.
   895  	if len(machineList.Items) == 1 {
   896  		return []reconcile.Request{{NamespacedName: util.ObjectKey(&machineList.Items[0])}}
   897  	}
   898  
   899  	// Otherwise let's match by providerID. This is useful when e.g the NodeRef has not been set yet.
   900  	// Match by providerID
   901  	if node.Spec.ProviderID == "" {
   902  		return nil
   903  	}
   904  	machineList = &clusterv1.MachineList{}
   905  	if err := r.Client.List(
   906  		ctx,
   907  		machineList,
   908  		append(filters, client.MatchingFields{index.MachineProviderIDField: node.Spec.ProviderID})...); err != nil {
   909  		return nil
   910  	}
   911  
   912  	// There should be exactly 1 Machine for the node.
   913  	if len(machineList.Items) == 1 {
   914  		return []reconcile.Request{{NamespacedName: util.ObjectKey(&machineList.Items[0])}}
   915  	}
   916  
   917  	return nil
   918  }
   919  
   920  // writer implements io.Writer interface as a pass-through for klog.
   921  type writer struct {
   922  	logFunc func(msg string, keysAndValues ...interface{})
   923  }
   924  
   925  // Write passes string(p) into writer's logFunc and always returns len(p).
   926  func (w writer) Write(p []byte) (n int, err error) {
   927  	w.logFunc(string(p))
   928  	return len(p), nil
   929  }