sigs.k8s.io/cluster-api@v1.7.1/internal/controllers/machine/machine_controller.go (about) 1 /* 2 Copyright 2019 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package machine 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 "github.com/pkg/errors" 25 corev1 "k8s.io/api/core/v1" 26 apierrors "k8s.io/apimachinery/pkg/api/errors" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 29 "k8s.io/apimachinery/pkg/types" 30 kerrors "k8s.io/apimachinery/pkg/util/errors" 31 "k8s.io/apimachinery/pkg/util/wait" 32 "k8s.io/client-go/kubernetes" 33 "k8s.io/client-go/rest" 34 "k8s.io/client-go/tools/record" 35 "k8s.io/klog/v2" 36 kubedrain "k8s.io/kubectl/pkg/drain" 37 ctrl "sigs.k8s.io/controller-runtime" 38 "sigs.k8s.io/controller-runtime/pkg/builder" 39 "sigs.k8s.io/controller-runtime/pkg/client" 40 "sigs.k8s.io/controller-runtime/pkg/controller" 41 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 42 "sigs.k8s.io/controller-runtime/pkg/handler" 43 "sigs.k8s.io/controller-runtime/pkg/reconcile" 44 45 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 46 "sigs.k8s.io/cluster-api/api/v1beta1/index" 47 "sigs.k8s.io/cluster-api/controllers/external" 48 "sigs.k8s.io/cluster-api/controllers/noderefutil" 49 "sigs.k8s.io/cluster-api/controllers/remote" 50 "sigs.k8s.io/cluster-api/internal/util/ssa" 51 "sigs.k8s.io/cluster-api/util" 52 "sigs.k8s.io/cluster-api/util/annotations" 53 "sigs.k8s.io/cluster-api/util/collections" 54 "sigs.k8s.io/cluster-api/util/conditions" 55 clog "sigs.k8s.io/cluster-api/util/log" 56 "sigs.k8s.io/cluster-api/util/patch" 57 "sigs.k8s.io/cluster-api/util/predicates" 58 ) 59 60 var ( 61 errNilNodeRef = errors.New("noderef is nil") 62 errLastControlPlaneNode = errors.New("last control plane member") 63 errNoControlPlaneNodes = errors.New("no control plane members") 64 errClusterIsBeingDeleted = errors.New("cluster is being deleted") 65 errControlPlaneIsBeingDeleted = errors.New("control plane is being deleted") 66 ) 67 68 // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch 69 // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch 70 // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete 71 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io;bootstrap.cluster.x-k8s.io,resources=*,verbs=get;list;watch;create;update;patch;delete 72 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status;machines/finalizers,verbs=get;list;watch;create;update;patch;delete 73 // +kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch 74 75 // Reconciler reconciles a Machine object. 76 type Reconciler struct { 77 Client client.Client 78 UnstructuredCachingClient client.Client 79 APIReader client.Reader 80 Tracker *remote.ClusterCacheTracker 81 82 // WatchFilterValue is the label value used to filter events prior to reconciliation. 83 WatchFilterValue string 84 85 // NodeDrainClientTimeout timeout of the client used for draining nodes. 86 NodeDrainClientTimeout time.Duration 87 88 controller controller.Controller 89 recorder record.EventRecorder 90 externalTracker external.ObjectTracker 91 92 // nodeDeletionRetryTimeout determines how long the controller will retry deleting a node 93 // during a single reconciliation. 94 nodeDeletionRetryTimeout time.Duration 95 ssaCache ssa.Cache 96 } 97 98 func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { 99 clusterToMachines, err := util.ClusterToTypedObjectsMapper(mgr.GetClient(), &clusterv1.MachineList{}, mgr.GetScheme()) 100 if err != nil { 101 return err 102 } 103 msToMachines, err := util.MachineSetToObjectsMapper(mgr.GetClient(), &clusterv1.MachineList{}, mgr.GetScheme()) 104 if err != nil { 105 return err 106 } 107 mdToMachines, err := util.MachineDeploymentToObjectsMapper(mgr.GetClient(), &clusterv1.MachineList{}, mgr.GetScheme()) 108 if err != nil { 109 return err 110 } 111 112 if r.nodeDeletionRetryTimeout.Nanoseconds() == 0 { 113 r.nodeDeletionRetryTimeout = 10 * time.Second 114 } 115 116 c, err := ctrl.NewControllerManagedBy(mgr). 117 For(&clusterv1.Machine{}). 118 WithOptions(options). 119 WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)). 120 Watches( 121 &clusterv1.Cluster{}, 122 handler.EnqueueRequestsFromMapFunc(clusterToMachines), 123 builder.WithPredicates( 124 // TODO: should this wait for Cluster.Status.InfrastructureReady similar to Infra Machine resources? 125 predicates.All(ctrl.LoggerFrom(ctx), 126 predicates.Any(ctrl.LoggerFrom(ctx), 127 predicates.ClusterUnpaused(ctrl.LoggerFrom(ctx)), 128 predicates.ClusterControlPlaneInitialized(ctrl.LoggerFrom(ctx)), 129 ), 130 predicates.ResourceHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue), 131 ), 132 )). 133 Watches( 134 &clusterv1.MachineSet{}, 135 handler.EnqueueRequestsFromMapFunc(msToMachines), 136 ). 137 Watches( 138 &clusterv1.MachineDeployment{}, 139 handler.EnqueueRequestsFromMapFunc(mdToMachines), 140 ). 141 Build(r) 142 if err != nil { 143 return errors.Wrap(err, "failed setting up with a controller manager") 144 } 145 146 r.controller = c 147 r.recorder = mgr.GetEventRecorderFor("machine-controller") 148 r.externalTracker = external.ObjectTracker{ 149 Controller: c, 150 Cache: mgr.GetCache(), 151 } 152 r.ssaCache = ssa.NewCache() 153 return nil 154 } 155 156 func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { 157 // Fetch the Machine instance 158 m := &clusterv1.Machine{} 159 if err := r.Client.Get(ctx, req.NamespacedName, m); err != nil { 160 if apierrors.IsNotFound(err) { 161 // Object not found, return. Created objects are automatically garbage collected. 162 // For additional cleanup logic use finalizers. 163 return ctrl.Result{}, nil 164 } 165 166 // Error reading the object - requeue the request. 167 return ctrl.Result{}, err 168 } 169 170 // AddOwners adds the owners of Machine as k/v pairs to the logger. 171 // Specifically, it will add KubeadmControlPlane, MachineSet and MachineDeployment. 172 ctx, log, err := clog.AddOwners(ctx, r.Client, m) 173 if err != nil { 174 return ctrl.Result{}, err 175 } 176 177 log = log.WithValues("Cluster", klog.KRef(m.ObjectMeta.Namespace, m.Spec.ClusterName)) 178 ctx = ctrl.LoggerInto(ctx, log) 179 180 cluster, err := util.GetClusterByName(ctx, r.Client, m.ObjectMeta.Namespace, m.Spec.ClusterName) 181 if err != nil { 182 return ctrl.Result{}, errors.Wrapf(err, "failed to get cluster %q for machine %q in namespace %q", 183 m.Spec.ClusterName, m.Name, m.Namespace) 184 } 185 186 // Return early if the object or Cluster is paused. 187 if annotations.IsPaused(cluster, m) { 188 log.Info("Reconciliation is paused for this object") 189 return ctrl.Result{}, nil 190 } 191 192 // Initialize the patch helper 193 patchHelper, err := patch.NewHelper(m, r.Client) 194 if err != nil { 195 return ctrl.Result{}, err 196 } 197 198 defer func() { 199 r.reconcilePhase(ctx, m) 200 201 // Always attempt to patch the object and status after each reconciliation. 202 // Patch ObservedGeneration only if the reconciliation completed successfully 203 patchOpts := []patch.Option{} 204 if reterr == nil { 205 patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{}) 206 } 207 if err := patchMachine(ctx, patchHelper, m, patchOpts...); err != nil { 208 reterr = kerrors.NewAggregate([]error{reterr, err}) 209 } 210 }() 211 212 // Reconcile labels. 213 if m.Labels == nil { 214 m.Labels = make(map[string]string) 215 } 216 m.Labels[clusterv1.ClusterNameLabel] = m.Spec.ClusterName 217 218 // Handle deletion reconciliation loop. 219 if !m.ObjectMeta.DeletionTimestamp.IsZero() { 220 res, err := r.reconcileDelete(ctx, cluster, m) 221 // Requeue if the reconcile failed because the ClusterCacheTracker was locked for 222 // the current cluster because of concurrent access. 223 if errors.Is(err, remote.ErrClusterLocked) { 224 log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker") 225 return ctrl.Result{RequeueAfter: time.Minute}, nil 226 } 227 return res, err 228 } 229 230 // Add finalizer first if not set to avoid the race condition between init and delete. 231 // Note: Finalizers in general can only be added when the deletionTimestamp is not set. 232 if !controllerutil.ContainsFinalizer(m, clusterv1.MachineFinalizer) { 233 controllerutil.AddFinalizer(m, clusterv1.MachineFinalizer) 234 return ctrl.Result{}, nil 235 } 236 237 // Handle normal reconciliation loop. 238 res, err := r.reconcile(ctx, cluster, m) 239 // Requeue if the reconcile failed because the ClusterCacheTracker was locked for 240 // the current cluster because of concurrent access. 241 if errors.Is(err, remote.ErrClusterLocked) { 242 log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker") 243 return ctrl.Result{RequeueAfter: time.Minute}, nil 244 } 245 return res, err 246 } 247 248 func patchMachine(ctx context.Context, patchHelper *patch.Helper, machine *clusterv1.Machine, options ...patch.Option) error { 249 // Always update the readyCondition by summarizing the state of other conditions. 250 // A step counter is added to represent progress during the provisioning process (instead we are hiding it 251 // after provisioning - e.g. when a MHC condition exists - or during the deletion process). 252 conditions.SetSummary(machine, 253 conditions.WithConditions( 254 // Infrastructure problems should take precedence over all the other conditions 255 clusterv1.InfrastructureReadyCondition, 256 // Bootstrap comes after, but it is relevant only during initial machine provisioning. 257 clusterv1.BootstrapReadyCondition, 258 // MHC reported condition should take precedence over the remediation progress 259 clusterv1.MachineHealthCheckSucceededCondition, 260 clusterv1.MachineOwnerRemediatedCondition, 261 clusterv1.DrainingSucceededCondition, 262 ), 263 conditions.WithStepCounterIf(machine.ObjectMeta.DeletionTimestamp.IsZero() && machine.Spec.ProviderID == nil), 264 conditions.WithStepCounterIfOnly( 265 clusterv1.BootstrapReadyCondition, 266 clusterv1.InfrastructureReadyCondition, 267 ), 268 ) 269 270 // Patch the object, ignoring conflicts on the conditions owned by this controller. 271 // Also, if requested, we are adding additional options like e.g. Patch ObservedGeneration when issuing the 272 // patch at the end of the reconcile loop. 273 options = append(options, 274 patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ 275 clusterv1.ReadyCondition, 276 clusterv1.BootstrapReadyCondition, 277 clusterv1.InfrastructureReadyCondition, 278 clusterv1.DrainingSucceededCondition, 279 clusterv1.MachineHealthCheckSucceededCondition, 280 clusterv1.MachineOwnerRemediatedCondition, 281 }}, 282 ) 283 284 return patchHelper.Patch(ctx, machine, options...) 285 } 286 287 func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (ctrl.Result, error) { 288 // If the machine is a stand-alone one, meaning not originated from a MachineDeployment, then set it as directly 289 // owned by the Cluster (if not already present). 290 if r.shouldAdopt(m) { 291 m.SetOwnerReferences(util.EnsureOwnerRef(m.GetOwnerReferences(), metav1.OwnerReference{ 292 APIVersion: clusterv1.GroupVersion.String(), 293 Kind: "Cluster", 294 Name: cluster.Name, 295 UID: cluster.UID, 296 })) 297 } 298 299 phases := []func(context.Context, *scope) (ctrl.Result, error){ 300 r.reconcileBootstrap, 301 r.reconcileInfrastructure, 302 r.reconcileNode, 303 r.reconcileCertificateExpiry, 304 } 305 306 res := ctrl.Result{} 307 errs := []error{} 308 s := &scope{ 309 cluster: cluster, 310 machine: m, 311 } 312 for _, phase := range phases { 313 // Call the inner reconciliation methods. 314 phaseResult, err := phase(ctx, s) 315 if err != nil { 316 errs = append(errs, err) 317 } 318 if len(errs) > 0 { 319 continue 320 } 321 res = util.LowestNonZeroResult(res, phaseResult) 322 } 323 return res, kerrors.NewAggregate(errs) 324 } 325 326 // scope holds the different objects that are read and used during the reconcile. 327 type scope struct { 328 // cluster is the Cluster object the Machine belongs to. 329 // It is set at the beginning of the reconcile function. 330 cluster *clusterv1.Cluster 331 332 // machine is the Machine object. It is set at the beginning 333 // of the reconcile function. 334 machine *clusterv1.Machine 335 336 // infraMachine is the Infrastructure Machine object that is referenced by the 337 // Machine. It is set after reconcileInfrastructure is called. 338 infraMachine *unstructured.Unstructured 339 340 // bootstrapConfig is the BootstrapConfig object that is referenced by the 341 // Machine. It is set after reconcileBootstrap is called. 342 bootstrapConfig *unstructured.Unstructured 343 } 344 345 func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (ctrl.Result, error) { //nolint:gocyclo 346 log := ctrl.LoggerFrom(ctx) 347 348 err := r.isDeleteNodeAllowed(ctx, cluster, m) 349 isDeleteNodeAllowed := err == nil 350 if err != nil { 351 switch err { 352 case errNoControlPlaneNodes, errLastControlPlaneNode, errNilNodeRef, errClusterIsBeingDeleted, errControlPlaneIsBeingDeleted: 353 nodeName := "" 354 if m.Status.NodeRef != nil { 355 nodeName = m.Status.NodeRef.Name 356 } 357 log.Info("Skipping deletion of Kubernetes Node associated with Machine as it is not allowed", "Node", klog.KRef("", nodeName), "cause", err.Error()) 358 default: 359 return ctrl.Result{}, errors.Wrapf(err, "failed to check if Kubernetes Node deletion is allowed") 360 } 361 } 362 363 if isDeleteNodeAllowed { 364 // pre-drain.delete lifecycle hook 365 // Return early without error, will requeue if/when the hook owner removes the annotation. 366 if annotations.HasWithPrefix(clusterv1.PreDrainDeleteHookAnnotationPrefix, m.ObjectMeta.Annotations) { 367 conditions.MarkFalse(m, clusterv1.PreDrainDeleteHookSucceededCondition, clusterv1.WaitingExternalHookReason, clusterv1.ConditionSeverityInfo, "") 368 return ctrl.Result{}, nil 369 } 370 conditions.MarkTrue(m, clusterv1.PreDrainDeleteHookSucceededCondition) 371 372 // Drain node before deletion and issue a patch in order to make this operation visible to the users. 373 if r.isNodeDrainAllowed(m) { 374 patchHelper, err := patch.NewHelper(m, r.Client) 375 if err != nil { 376 return ctrl.Result{}, err 377 } 378 379 log.Info("Draining node", "Node", klog.KRef("", m.Status.NodeRef.Name)) 380 // The DrainingSucceededCondition never exists before the node is drained for the first time, 381 // so its transition time can be used to record the first time draining. 382 // This `if` condition prevents the transition time to be changed more than once. 383 if conditions.Get(m, clusterv1.DrainingSucceededCondition) == nil { 384 conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingReason, clusterv1.ConditionSeverityInfo, "Draining the node before deletion") 385 } 386 387 if err := patchMachine(ctx, patchHelper, m); err != nil { 388 return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine") 389 } 390 391 if result, err := r.drainNode(ctx, cluster, m.Status.NodeRef.Name); !result.IsZero() || err != nil { 392 if err != nil { 393 conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) 394 r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDrainNode", "error draining Machine's node %q: %v", m.Status.NodeRef.Name, err) 395 } 396 return result, err 397 } 398 399 conditions.MarkTrue(m, clusterv1.DrainingSucceededCondition) 400 r.recorder.Eventf(m, corev1.EventTypeNormal, "SuccessfulDrainNode", "success draining Machine's node %q", m.Status.NodeRef.Name) 401 } 402 403 // After node draining is completed, and if isNodeVolumeDetachingAllowed returns True, make sure all 404 // volumes are detached before proceeding to delete the Node. 405 if r.isNodeVolumeDetachingAllowed(m) { 406 // The VolumeDetachSucceededCondition never exists before we wait for volume detachment for the first time, 407 // so its transition time can be used to record the first time we wait for volume detachment. 408 // This `if` condition prevents the transition time to be changed more than once. 409 if conditions.Get(m, clusterv1.VolumeDetachSucceededCondition) == nil { 410 conditions.MarkFalse(m, clusterv1.VolumeDetachSucceededCondition, clusterv1.WaitingForVolumeDetachReason, clusterv1.ConditionSeverityInfo, "Waiting for node volumes to be detached") 411 } 412 413 if ok, err := r.shouldWaitForNodeVolumes(ctx, cluster, m.Status.NodeRef.Name); ok || err != nil { 414 if err != nil { 415 r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedWaitForVolumeDetach", "error waiting for node volumes detaching, Machine's node %q: %v", m.Status.NodeRef.Name, err) 416 return ctrl.Result{}, err 417 } 418 log.Info("Waiting for node volumes to be detached", "Node", klog.KRef("", m.Status.NodeRef.Name)) 419 return ctrl.Result{}, nil 420 } 421 conditions.MarkTrue(m, clusterv1.VolumeDetachSucceededCondition) 422 r.recorder.Eventf(m, corev1.EventTypeNormal, "NodeVolumesDetached", "success waiting for node volumes detaching Machine's node %q", m.Status.NodeRef.Name) 423 } 424 } 425 426 // pre-term.delete lifecycle hook 427 // Return early without error, will requeue if/when the hook owner removes the annotation. 428 if annotations.HasWithPrefix(clusterv1.PreTerminateDeleteHookAnnotationPrefix, m.ObjectMeta.Annotations) { 429 conditions.MarkFalse(m, clusterv1.PreTerminateDeleteHookSucceededCondition, clusterv1.WaitingExternalHookReason, clusterv1.ConditionSeverityInfo, "") 430 return ctrl.Result{}, nil 431 } 432 conditions.MarkTrue(m, clusterv1.PreTerminateDeleteHookSucceededCondition) 433 434 // Return early and don't remove the finalizer if we got an error or 435 // the external reconciliation deletion isn't ready. 436 437 patchHelper, err := patch.NewHelper(m, r.Client) 438 if err != nil { 439 return ctrl.Result{}, err 440 } 441 conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "") 442 if err := patchMachine(ctx, patchHelper, m); err != nil { 443 conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityInfo, "") 444 return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine") 445 } 446 447 infrastructureDeleted, err := r.reconcileDeleteInfrastructure(ctx, cluster, m) 448 if err != nil { 449 return ctrl.Result{}, err 450 } 451 if !infrastructureDeleted { 452 log.Info("Waiting for infrastructure to be deleted", m.Spec.InfrastructureRef.Kind, klog.KRef(m.Spec.InfrastructureRef.Namespace, m.Spec.InfrastructureRef.Name)) 453 return ctrl.Result{}, nil 454 } 455 456 bootstrapDeleted, err := r.reconcileDeleteBootstrap(ctx, cluster, m) 457 if err != nil { 458 return ctrl.Result{}, err 459 } 460 if !bootstrapDeleted { 461 log.Info("Waiting for bootstrap to be deleted", m.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(m.Spec.Bootstrap.ConfigRef.Namespace, m.Spec.Bootstrap.ConfigRef.Name)) 462 return ctrl.Result{}, nil 463 } 464 465 // We only delete the node after the underlying infrastructure is gone. 466 // https://github.com/kubernetes-sigs/cluster-api/issues/2565 467 if isDeleteNodeAllowed { 468 log.Info("Deleting node", "Node", klog.KRef("", m.Status.NodeRef.Name)) 469 470 var deleteNodeErr error 471 waitErr := wait.PollUntilContextTimeout(ctx, 2*time.Second, r.nodeDeletionRetryTimeout, true, func(ctx context.Context) (bool, error) { 472 if deleteNodeErr = r.deleteNode(ctx, cluster, m.Status.NodeRef.Name); deleteNodeErr != nil && !apierrors.IsNotFound(errors.Cause(deleteNodeErr)) { 473 return false, nil 474 } 475 return true, nil 476 }) 477 if waitErr != nil { 478 log.Error(deleteNodeErr, "Timed out deleting node", "Node", klog.KRef("", m.Status.NodeRef.Name)) 479 conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityWarning, "") 480 r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDeleteNode", "error deleting Machine's node: %v", deleteNodeErr) 481 482 // If the node deletion timeout is not expired yet, requeue the Machine for reconciliation. 483 if m.Spec.NodeDeletionTimeout == nil || m.Spec.NodeDeletionTimeout.Nanoseconds() == 0 || m.DeletionTimestamp.Add(m.Spec.NodeDeletionTimeout.Duration).After(time.Now()) { 484 return ctrl.Result{}, deleteNodeErr 485 } 486 log.Info("Node deletion timeout expired, continuing without Node deletion.") 487 } 488 } 489 490 controllerutil.RemoveFinalizer(m, clusterv1.MachineFinalizer) 491 return ctrl.Result{}, nil 492 } 493 494 func (r *Reconciler) isNodeDrainAllowed(m *clusterv1.Machine) bool { 495 if _, exists := m.ObjectMeta.Annotations[clusterv1.ExcludeNodeDrainingAnnotation]; exists { 496 return false 497 } 498 499 if r.nodeDrainTimeoutExceeded(m) { 500 return false 501 } 502 503 return true 504 } 505 506 // isNodeVolumeDetachingAllowed returns False if either ExcludeWaitForNodeVolumeDetachAnnotation annotation is set OR 507 // nodeVolumeDetachTimeoutExceeded timeout is exceeded, otherwise returns True. 508 func (r *Reconciler) isNodeVolumeDetachingAllowed(m *clusterv1.Machine) bool { 509 if _, exists := m.ObjectMeta.Annotations[clusterv1.ExcludeWaitForNodeVolumeDetachAnnotation]; exists { 510 return false 511 } 512 513 if r.nodeVolumeDetachTimeoutExceeded(m) { 514 return false 515 } 516 517 return true 518 } 519 520 func (r *Reconciler) nodeDrainTimeoutExceeded(machine *clusterv1.Machine) bool { 521 // if the NodeDrainTimeout type is not set by user 522 if machine.Spec.NodeDrainTimeout == nil || machine.Spec.NodeDrainTimeout.Seconds() <= 0 { 523 return false 524 } 525 526 // if the draining succeeded condition does not exist 527 if conditions.Get(machine, clusterv1.DrainingSucceededCondition) == nil { 528 return false 529 } 530 531 now := time.Now() 532 firstTimeDrain := conditions.GetLastTransitionTime(machine, clusterv1.DrainingSucceededCondition) 533 diff := now.Sub(firstTimeDrain.Time) 534 return diff.Seconds() >= machine.Spec.NodeDrainTimeout.Seconds() 535 } 536 537 // nodeVolumeDetachTimeoutExceeded returns False if either NodeVolumeDetachTimeout is set to nil or <=0 OR 538 // VolumeDetachSucceededCondition is not set on the Machine. Otherwise returns true if the timeout is expired 539 // since the last transition time of VolumeDetachSucceededCondition. 540 func (r *Reconciler) nodeVolumeDetachTimeoutExceeded(machine *clusterv1.Machine) bool { 541 // if the NodeVolumeDetachTimeout type is not set by user 542 if machine.Spec.NodeVolumeDetachTimeout == nil || machine.Spec.NodeVolumeDetachTimeout.Seconds() <= 0 { 543 return false 544 } 545 546 // if the volume detaching succeeded condition does not exist 547 if conditions.Get(machine, clusterv1.VolumeDetachSucceededCondition) == nil { 548 return false 549 } 550 551 now := time.Now() 552 firstTimeDetach := conditions.GetLastTransitionTime(machine, clusterv1.VolumeDetachSucceededCondition) 553 diff := now.Sub(firstTimeDetach.Time) 554 return diff.Seconds() >= machine.Spec.NodeVolumeDetachTimeout.Seconds() 555 } 556 557 // isDeleteNodeAllowed returns nil only if the Machine's NodeRef is not nil 558 // and if the Machine is not the last control plane node in the cluster. 559 func (r *Reconciler) isDeleteNodeAllowed(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine) error { 560 log := ctrl.LoggerFrom(ctx) 561 // Return early if the cluster is being deleted. 562 if !cluster.DeletionTimestamp.IsZero() { 563 return errClusterIsBeingDeleted 564 } 565 566 // Cannot delete something that doesn't exist. 567 if machine.Status.NodeRef == nil { 568 return errNilNodeRef 569 } 570 571 // controlPlaneRef is an optional field in the Cluster so skip the external 572 // managed control plane check if it is nil 573 if cluster.Spec.ControlPlaneRef != nil { 574 controlPlane, err := external.Get(ctx, r.Client, cluster.Spec.ControlPlaneRef, cluster.Spec.ControlPlaneRef.Namespace) 575 if apierrors.IsNotFound(err) { 576 // If control plane object in the reference does not exist, log and skip check for 577 // external managed control plane 578 log.Error(err, "control plane object specified in cluster spec.controlPlaneRef does not exist", "kind", cluster.Spec.ControlPlaneRef.Kind, "name", cluster.Spec.ControlPlaneRef.Name) 579 } else { 580 if err != nil { 581 // If any other error occurs when trying to get the control plane object, 582 // return the error so we can retry 583 return err 584 } 585 586 // Return early if the object referenced by controlPlaneRef is being deleted. 587 if !controlPlane.GetDeletionTimestamp().IsZero() { 588 return errControlPlaneIsBeingDeleted 589 } 590 591 // Check if the ControlPlane is externally managed (AKS, EKS, GKE, etc) 592 // and skip the following section if control plane is externally managed 593 // because there will be no control plane nodes registered 594 if util.IsExternalManagedControlPlane(controlPlane) { 595 return nil 596 } 597 } 598 } 599 600 // Get all of the active machines that belong to this cluster. 601 machines, err := collections.GetFilteredMachinesForCluster(ctx, r.Client, cluster, collections.ActiveMachines) 602 if err != nil { 603 return err 604 } 605 606 // Whether or not it is okay to delete the NodeRef depends on the 607 // number of remaining control plane members and whether or not this 608 // machine is one of them. 609 numControlPlaneMachines := len(machines.Filter(collections.ControlPlaneMachines(cluster.Name))) 610 if numControlPlaneMachines == 0 { 611 // Do not delete the NodeRef if there are no remaining members of 612 // the control plane. 613 return errNoControlPlaneNodes 614 } 615 // Otherwise it is okay to delete the NodeRef. 616 return nil 617 } 618 619 func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster, nodeName string) (ctrl.Result, error) { 620 log := ctrl.LoggerFrom(ctx, "Node", klog.KRef("", nodeName)) 621 622 restConfig, err := r.Tracker.GetRESTConfig(ctx, util.ObjectKey(cluster)) 623 if err != nil { 624 if errors.Is(err, remote.ErrClusterLocked) { 625 log.V(5).Info("Requeuing drain Node because another worker has the lock on the ClusterCacheTracker") 626 return ctrl.Result{RequeueAfter: time.Minute}, nil 627 } 628 log.Error(err, "Error creating a remote client for cluster while draining Node, won't retry") 629 return ctrl.Result{}, nil 630 } 631 restConfig = rest.CopyConfig(restConfig) 632 restConfig.Timeout = r.NodeDrainClientTimeout 633 kubeClient, err := kubernetes.NewForConfig(restConfig) 634 if err != nil { 635 log.Error(err, "Error creating a remote client while deleting Machine, won't retry") 636 return ctrl.Result{}, nil 637 } 638 639 node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) 640 if err != nil { 641 if apierrors.IsNotFound(err) { 642 // If an admin deletes the node directly, we'll end up here. 643 log.Error(err, "Could not find node from noderef, it may have already been deleted") 644 return ctrl.Result{}, nil 645 } 646 return ctrl.Result{}, errors.Wrapf(err, "unable to get node %v", nodeName) 647 } 648 649 drainer := &kubedrain.Helper{ 650 Client: kubeClient, 651 Ctx: ctx, 652 Force: true, 653 IgnoreAllDaemonSets: true, 654 DeleteEmptyDirData: true, 655 GracePeriodSeconds: -1, 656 // If a pod is not evicted in 20 seconds, retry the eviction next time the 657 // machine gets reconciled again (to allow other machines to be reconciled). 658 Timeout: 20 * time.Second, 659 OnPodDeletedOrEvicted: func(pod *corev1.Pod, usingEviction bool) { 660 verbStr := "Deleted" 661 if usingEviction { 662 verbStr = "Evicted" 663 } 664 log.Info(fmt.Sprintf("%s pod from Node", verbStr), 665 "Pod", klog.KObj(pod)) 666 }, 667 Out: writer{log.Info}, 668 ErrOut: writer{func(msg string, keysAndValues ...interface{}) { 669 log.Error(nil, msg, keysAndValues...) 670 }}, 671 } 672 673 if noderefutil.IsNodeUnreachable(node) { 674 // When the node is unreachable and some pods are not evicted for as long as this timeout, we ignore them. 675 drainer.SkipWaitForDeleteTimeoutSeconds = 60 * 5 // 5 minutes 676 } 677 678 if err := kubedrain.RunCordonOrUncordon(drainer, node, true); err != nil { 679 // Machine will be re-reconciled after a cordon failure. 680 log.Error(err, "Cordon failed") 681 return ctrl.Result{}, errors.Wrapf(err, "unable to cordon node %v", node.Name) 682 } 683 684 if err := kubedrain.RunNodeDrain(drainer, node.Name); err != nil { 685 // Machine will be re-reconciled after a drain failure. 686 log.Error(err, "Drain failed, retry in 20s") 687 return ctrl.Result{RequeueAfter: 20 * time.Second}, nil 688 } 689 690 log.Info("Drain successful") 691 return ctrl.Result{}, nil 692 } 693 694 // shouldWaitForNodeVolumes returns true if node status still have volumes attached 695 // pod deletion and volume detach happen asynchronously, so pod could be deleted before volume detached from the node 696 // this could cause issue for some storage provisioner, for example, vsphere-volume this is problematic 697 // because if the node is deleted before detach success, then the underline VMDK will be deleted together with the Machine 698 // so after node draining we need to check if all volumes are detached before deleting the node. 699 func (r *Reconciler) shouldWaitForNodeVolumes(ctx context.Context, cluster *clusterv1.Cluster, nodeName string) (bool, error) { 700 log := ctrl.LoggerFrom(ctx, "Node", klog.KRef("", nodeName)) 701 702 remoteClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) 703 if err != nil { 704 return true, err 705 } 706 707 node := &corev1.Node{} 708 if err := remoteClient.Get(ctx, types.NamespacedName{Name: nodeName}, node); err != nil { 709 if apierrors.IsNotFound(err) { 710 log.Error(err, "Could not find node from noderef, it may have already been deleted") 711 return false, nil 712 } 713 return true, err 714 } 715 716 return len(node.Status.VolumesAttached) != 0, nil 717 } 718 719 func (r *Reconciler) deleteNode(ctx context.Context, cluster *clusterv1.Cluster, name string) error { 720 log := ctrl.LoggerFrom(ctx) 721 722 remoteClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) 723 if err != nil { 724 if errors.Is(err, remote.ErrClusterLocked) { 725 return errors.Wrapf(err, "failed deleting Node because another worker has the lock on the ClusterCacheTracker") 726 } 727 log.Error(err, "Error creating a remote client for cluster while deleting Node, won't retry") 728 return nil 729 } 730 731 node := &corev1.Node{ 732 ObjectMeta: metav1.ObjectMeta{ 733 Name: name, 734 }, 735 } 736 737 if err := remoteClient.Delete(ctx, node); err != nil { 738 return errors.Wrapf(err, "error deleting node %s", name) 739 } 740 return nil 741 } 742 743 func (r *Reconciler) reconcileDeleteBootstrap(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (bool, error) { 744 obj, err := r.reconcileDeleteExternal(ctx, cluster, m, m.Spec.Bootstrap.ConfigRef) 745 if err != nil { 746 return false, err 747 } 748 749 if obj == nil { 750 // Marks the bootstrap as deleted 751 conditions.MarkFalse(m, clusterv1.BootstrapReadyCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "") 752 return true, nil 753 } 754 755 // Report a summary of current status of the bootstrap object defined for this machine. 756 conditions.SetMirror(m, clusterv1.BootstrapReadyCondition, 757 conditions.UnstructuredGetter(obj), 758 conditions.WithFallbackValue(false, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, ""), 759 ) 760 return false, nil 761 } 762 763 func (r *Reconciler) reconcileDeleteInfrastructure(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (bool, error) { 764 obj, err := r.reconcileDeleteExternal(ctx, cluster, m, &m.Spec.InfrastructureRef) 765 if err != nil { 766 return false, err 767 } 768 769 if obj == nil { 770 // Marks the infrastructure as deleted 771 conditions.MarkFalse(m, clusterv1.InfrastructureReadyCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "") 772 return true, nil 773 } 774 775 // Report a summary of current status of the bootstrap object defined for this machine. 776 conditions.SetMirror(m, clusterv1.InfrastructureReadyCondition, 777 conditions.UnstructuredGetter(obj), 778 conditions.WithFallbackValue(false, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, ""), 779 ) 780 return false, nil 781 } 782 783 // reconcileDeleteExternal tries to delete external references. 784 func (r *Reconciler) reconcileDeleteExternal(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine, ref *corev1.ObjectReference) (*unstructured.Unstructured, error) { 785 if ref == nil { 786 return nil, nil 787 } 788 789 // get the external object 790 obj, err := external.Get(ctx, r.UnstructuredCachingClient, ref, m.Namespace) 791 if err != nil && !apierrors.IsNotFound(errors.Cause(err)) { 792 return nil, errors.Wrapf(err, "failed to get %s %q for Machine %q in namespace %q", 793 ref.GroupVersionKind(), ref.Name, m.Name, m.Namespace) 794 } 795 796 if obj != nil { 797 // reconcileExternal ensures that we set the object's OwnerReferences correctly and watch the object. 798 // The machine delete logic depends on reconciling the machine when the external objects are deleted. 799 // This avoids a race condition where the machine is deleted before the external objects are ever reconciled 800 // by this controller. 801 if _, err := r.ensureExternalOwnershipAndWatch(ctx, cluster, m, ref); err != nil { 802 return nil, err 803 } 804 805 // Issue a delete request. 806 if err := r.Client.Delete(ctx, obj); err != nil && !apierrors.IsNotFound(err) { 807 return obj, errors.Wrapf(err, 808 "failed to delete %v %q for Machine %q in namespace %q", 809 obj.GroupVersionKind(), obj.GetName(), m.Name, m.Namespace) 810 } 811 } 812 813 // Return true if there are no more external objects. 814 return obj, nil 815 } 816 817 // shouldAdopt returns true if the Machine should be adopted as a stand-alone Machine directly owned by the Cluster. 818 func (r *Reconciler) shouldAdopt(m *clusterv1.Machine) bool { 819 // if the machine is controlled by something (MS or KCP), or if it is a stand-alone machine directly owned by the Cluster, then no-op. 820 if metav1.GetControllerOf(m) != nil || util.HasOwner(m.GetOwnerReferences(), clusterv1.GroupVersion.String(), []string{"Cluster"}) { 821 return false 822 } 823 824 // Note: following checks are required because after restore from a backup both the Machine controller and the 825 // MachineSet, MachinePool, or ControlPlane controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529 826 827 // If the Machine is originated by a MachineSet, it should not be adopted directly by the Cluster as a stand-alone Machine. 828 if _, ok := m.Labels[clusterv1.MachineSetNameLabel]; ok { 829 return false 830 } 831 832 // If the Machine is originated by a MachinePool object, it should not be adopted directly by the Cluster as a stand-alone Machine. 833 if _, ok := m.Labels[clusterv1.MachinePoolNameLabel]; ok { 834 return false 835 } 836 837 // If the Machine is originated by a ControlPlane object, it should not be adopted directly by the Cluster as a stand-alone Machine. 838 if _, ok := m.Labels[clusterv1.MachineControlPlaneNameLabel]; ok { 839 return false 840 } 841 return true 842 } 843 844 func (r *Reconciler) watchClusterNodes(ctx context.Context, cluster *clusterv1.Cluster) error { 845 log := ctrl.LoggerFrom(ctx) 846 847 if !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { 848 log.V(5).Info("Skipping node watching setup because control plane is not initialized") 849 return nil 850 } 851 852 // If there is no tracker, don't watch remote nodes 853 if r.Tracker == nil { 854 return nil 855 } 856 857 return r.Tracker.Watch(ctx, remote.WatchInput{ 858 Name: "machine-watchNodes", 859 Cluster: util.ObjectKey(cluster), 860 Watcher: r.controller, 861 Kind: &corev1.Node{}, 862 EventHandler: handler.EnqueueRequestsFromMapFunc(r.nodeToMachine), 863 }) 864 } 865 866 func (r *Reconciler) nodeToMachine(ctx context.Context, o client.Object) []reconcile.Request { 867 node, ok := o.(*corev1.Node) 868 if !ok { 869 panic(fmt.Sprintf("Expected a Node but got a %T", o)) 870 } 871 872 var filters []client.ListOption 873 // Match by clusterName when the node has the annotation. 874 if clusterName, ok := node.GetAnnotations()[clusterv1.ClusterNameAnnotation]; ok { 875 filters = append(filters, client.MatchingLabels{ 876 clusterv1.ClusterNameLabel: clusterName, 877 }) 878 } 879 880 // Match by namespace when the node has the annotation. 881 if namespace, ok := node.GetAnnotations()[clusterv1.ClusterNamespaceAnnotation]; ok { 882 filters = append(filters, client.InNamespace(namespace)) 883 } 884 885 // Match by nodeName and status.nodeRef.name. 886 machineList := &clusterv1.MachineList{} 887 if err := r.Client.List( 888 ctx, 889 machineList, 890 append(filters, client.MatchingFields{index.MachineNodeNameField: node.Name})...); err != nil { 891 return nil 892 } 893 894 // There should be exactly 1 Machine for the node. 895 if len(machineList.Items) == 1 { 896 return []reconcile.Request{{NamespacedName: util.ObjectKey(&machineList.Items[0])}} 897 } 898 899 // Otherwise let's match by providerID. This is useful when e.g the NodeRef has not been set yet. 900 // Match by providerID 901 if node.Spec.ProviderID == "" { 902 return nil 903 } 904 machineList = &clusterv1.MachineList{} 905 if err := r.Client.List( 906 ctx, 907 machineList, 908 append(filters, client.MatchingFields{index.MachineProviderIDField: node.Spec.ProviderID})...); err != nil { 909 return nil 910 } 911 912 // There should be exactly 1 Machine for the node. 913 if len(machineList.Items) == 1 { 914 return []reconcile.Request{{NamespacedName: util.ObjectKey(&machineList.Items[0])}} 915 } 916 917 return nil 918 } 919 920 // writer implements io.Writer interface as a pass-through for klog. 921 type writer struct { 922 logFunc func(msg string, keysAndValues ...interface{}) 923 } 924 925 // Write passes string(p) into writer's logFunc and always returns len(p). 926 func (w writer) Write(p []byte) (n int, err error) { 927 w.logFunc(string(p)) 928 return len(p), nil 929 }