sigs.k8s.io/cluster-api@v1.6.3/internal/controllers/machine/machine_controller.go (about) 1 /* 2 Copyright 2019 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package machine 18 19 import ( 20 "context" 21 "fmt" 22 "time" 23 24 "github.com/pkg/errors" 25 corev1 "k8s.io/api/core/v1" 26 apierrors "k8s.io/apimachinery/pkg/api/errors" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 29 "k8s.io/apimachinery/pkg/types" 30 kerrors "k8s.io/apimachinery/pkg/util/errors" 31 "k8s.io/apimachinery/pkg/util/wait" 32 "k8s.io/client-go/kubernetes" 33 "k8s.io/client-go/rest" 34 "k8s.io/client-go/tools/record" 35 "k8s.io/klog/v2" 36 kubedrain "k8s.io/kubectl/pkg/drain" 37 ctrl "sigs.k8s.io/controller-runtime" 38 "sigs.k8s.io/controller-runtime/pkg/builder" 39 "sigs.k8s.io/controller-runtime/pkg/client" 40 "sigs.k8s.io/controller-runtime/pkg/controller" 41 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 42 "sigs.k8s.io/controller-runtime/pkg/handler" 43 "sigs.k8s.io/controller-runtime/pkg/reconcile" 44 45 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 46 "sigs.k8s.io/cluster-api/api/v1beta1/index" 47 "sigs.k8s.io/cluster-api/controllers/external" 48 "sigs.k8s.io/cluster-api/controllers/noderefutil" 49 "sigs.k8s.io/cluster-api/controllers/remote" 50 "sigs.k8s.io/cluster-api/internal/util/ssa" 51 "sigs.k8s.io/cluster-api/util" 52 "sigs.k8s.io/cluster-api/util/annotations" 53 "sigs.k8s.io/cluster-api/util/collections" 54 "sigs.k8s.io/cluster-api/util/conditions" 55 clog "sigs.k8s.io/cluster-api/util/log" 56 "sigs.k8s.io/cluster-api/util/patch" 57 "sigs.k8s.io/cluster-api/util/predicates" 58 ) 59 60 var ( 61 errNilNodeRef = errors.New("noderef is nil") 62 errLastControlPlaneNode = errors.New("last control plane member") 63 errNoControlPlaneNodes = errors.New("no control plane members") 64 errClusterIsBeingDeleted = errors.New("cluster is being deleted") 65 errControlPlaneIsBeingDeleted = errors.New("control plane is being deleted") 66 ) 67 68 // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch 69 // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch 70 // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;create;update;patch;delete 71 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io;bootstrap.cluster.x-k8s.io,resources=*,verbs=get;list;watch;create;update;patch;delete 72 // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status;machines/finalizers,verbs=get;list;watch;create;update;patch;delete 73 // +kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch 74 75 // Reconciler reconciles a Machine object. 76 type Reconciler struct { 77 Client client.Client 78 UnstructuredCachingClient client.Client 79 APIReader client.Reader 80 Tracker *remote.ClusterCacheTracker 81 82 // WatchFilterValue is the label value used to filter events prior to reconciliation. 83 WatchFilterValue string 84 85 // NodeDrainClientTimeout timeout of the client used for draining nodes. 86 NodeDrainClientTimeout time.Duration 87 88 controller controller.Controller 89 recorder record.EventRecorder 90 externalTracker external.ObjectTracker 91 92 // nodeDeletionRetryTimeout determines how long the controller will retry deleting a node 93 // during a single reconciliation. 94 nodeDeletionRetryTimeout time.Duration 95 ssaCache ssa.Cache 96 } 97 98 func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { 99 clusterToMachines, err := util.ClusterToTypedObjectsMapper(mgr.GetClient(), &clusterv1.MachineList{}, mgr.GetScheme()) 100 if err != nil { 101 return err 102 } 103 104 if r.nodeDeletionRetryTimeout.Nanoseconds() == 0 { 105 r.nodeDeletionRetryTimeout = 10 * time.Second 106 } 107 108 c, err := ctrl.NewControllerManagedBy(mgr). 109 For(&clusterv1.Machine{}). 110 WithOptions(options). 111 WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)). 112 Watches( 113 &clusterv1.Cluster{}, 114 handler.EnqueueRequestsFromMapFunc(clusterToMachines), 115 builder.WithPredicates( 116 // TODO: should this wait for Cluster.Status.InfrastructureReady similar to Infra Machine resources? 117 predicates.All(ctrl.LoggerFrom(ctx), 118 predicates.Any(ctrl.LoggerFrom(ctx), 119 predicates.ClusterUnpaused(ctrl.LoggerFrom(ctx)), 120 predicates.ClusterControlPlaneInitialized(ctrl.LoggerFrom(ctx)), 121 ), 122 predicates.ResourceHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue), 123 ), 124 )). 125 Build(r) 126 if err != nil { 127 return errors.Wrap(err, "failed setting up with a controller manager") 128 } 129 130 r.controller = c 131 r.recorder = mgr.GetEventRecorderFor("machine-controller") 132 r.externalTracker = external.ObjectTracker{ 133 Controller: c, 134 Cache: mgr.GetCache(), 135 } 136 r.ssaCache = ssa.NewCache() 137 return nil 138 } 139 140 func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { 141 // Fetch the Machine instance 142 m := &clusterv1.Machine{} 143 if err := r.Client.Get(ctx, req.NamespacedName, m); err != nil { 144 if apierrors.IsNotFound(err) { 145 // Object not found, return. Created objects are automatically garbage collected. 146 // For additional cleanup logic use finalizers. 147 return ctrl.Result{}, nil 148 } 149 150 // Error reading the object - requeue the request. 151 return ctrl.Result{}, err 152 } 153 154 // AddOwners adds the owners of Machine as k/v pairs to the logger. 155 // Specifically, it will add KubeadmControlPlane, MachineSet and MachineDeployment. 156 ctx, log, err := clog.AddOwners(ctx, r.Client, m) 157 if err != nil { 158 return ctrl.Result{}, err 159 } 160 161 log = log.WithValues("Cluster", klog.KRef(m.ObjectMeta.Namespace, m.Spec.ClusterName)) 162 ctx = ctrl.LoggerInto(ctx, log) 163 164 cluster, err := util.GetClusterByName(ctx, r.Client, m.ObjectMeta.Namespace, m.Spec.ClusterName) 165 if err != nil { 166 return ctrl.Result{}, errors.Wrapf(err, "failed to get cluster %q for machine %q in namespace %q", 167 m.Spec.ClusterName, m.Name, m.Namespace) 168 } 169 170 // Return early if the object or Cluster is paused. 171 if annotations.IsPaused(cluster, m) { 172 log.Info("Reconciliation is paused for this object") 173 return ctrl.Result{}, nil 174 } 175 176 // Initialize the patch helper 177 patchHelper, err := patch.NewHelper(m, r.Client) 178 if err != nil { 179 return ctrl.Result{}, err 180 } 181 182 defer func() { 183 r.reconcilePhase(ctx, m) 184 185 // Always attempt to patch the object and status after each reconciliation. 186 // Patch ObservedGeneration only if the reconciliation completed successfully 187 patchOpts := []patch.Option{} 188 if reterr == nil { 189 patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{}) 190 } 191 if err := patchMachine(ctx, patchHelper, m, patchOpts...); err != nil { 192 reterr = kerrors.NewAggregate([]error{reterr, err}) 193 } 194 }() 195 196 // Reconcile labels. 197 if m.Labels == nil { 198 m.Labels = make(map[string]string) 199 } 200 m.Labels[clusterv1.ClusterNameLabel] = m.Spec.ClusterName 201 202 // Handle deletion reconciliation loop. 203 if !m.ObjectMeta.DeletionTimestamp.IsZero() { 204 res, err := r.reconcileDelete(ctx, cluster, m) 205 // Requeue if the reconcile failed because the ClusterCacheTracker was locked for 206 // the current cluster because of concurrent access. 207 if errors.Is(err, remote.ErrClusterLocked) { 208 log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker") 209 return ctrl.Result{Requeue: true}, nil 210 } 211 return res, err 212 } 213 214 // Add finalizer first if not set to avoid the race condition between init and delete. 215 // Note: Finalizers in general can only be added when the deletionTimestamp is not set. 216 if !controllerutil.ContainsFinalizer(m, clusterv1.MachineFinalizer) { 217 controllerutil.AddFinalizer(m, clusterv1.MachineFinalizer) 218 return ctrl.Result{}, nil 219 } 220 221 // Handle normal reconciliation loop. 222 res, err := r.reconcile(ctx, cluster, m) 223 // Requeue if the reconcile failed because the ClusterCacheTracker was locked for 224 // the current cluster because of concurrent access. 225 if errors.Is(err, remote.ErrClusterLocked) { 226 log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker") 227 return ctrl.Result{Requeue: true}, nil 228 } 229 return res, err 230 } 231 232 func patchMachine(ctx context.Context, patchHelper *patch.Helper, machine *clusterv1.Machine, options ...patch.Option) error { 233 // Always update the readyCondition by summarizing the state of other conditions. 234 // A step counter is added to represent progress during the provisioning process (instead we are hiding it 235 // after provisioning - e.g. when a MHC condition exists - or during the deletion process). 236 conditions.SetSummary(machine, 237 conditions.WithConditions( 238 // Infrastructure problems should take precedence over all the other conditions 239 clusterv1.InfrastructureReadyCondition, 240 // Bootstrap comes after, but it is relevant only during initial machine provisioning. 241 clusterv1.BootstrapReadyCondition, 242 // MHC reported condition should take precedence over the remediation progress 243 clusterv1.MachineHealthCheckSucceededCondition, 244 clusterv1.MachineOwnerRemediatedCondition, 245 ), 246 conditions.WithStepCounterIf(machine.ObjectMeta.DeletionTimestamp.IsZero() && machine.Spec.ProviderID == nil), 247 conditions.WithStepCounterIfOnly( 248 clusterv1.BootstrapReadyCondition, 249 clusterv1.InfrastructureReadyCondition, 250 ), 251 ) 252 253 // Patch the object, ignoring conflicts on the conditions owned by this controller. 254 // Also, if requested, we are adding additional options like e.g. Patch ObservedGeneration when issuing the 255 // patch at the end of the reconcile loop. 256 options = append(options, 257 patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ 258 clusterv1.ReadyCondition, 259 clusterv1.BootstrapReadyCondition, 260 clusterv1.InfrastructureReadyCondition, 261 clusterv1.DrainingSucceededCondition, 262 clusterv1.MachineHealthCheckSucceededCondition, 263 clusterv1.MachineOwnerRemediatedCondition, 264 }}, 265 ) 266 267 return patchHelper.Patch(ctx, machine, options...) 268 } 269 270 func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (ctrl.Result, error) { 271 // If the machine is a stand-alone one, meaning not originated from a MachineDeployment, then set it as directly 272 // owned by the Cluster (if not already present). 273 if r.shouldAdopt(m) { 274 m.SetOwnerReferences(util.EnsureOwnerRef(m.GetOwnerReferences(), metav1.OwnerReference{ 275 APIVersion: clusterv1.GroupVersion.String(), 276 Kind: "Cluster", 277 Name: cluster.Name, 278 UID: cluster.UID, 279 })) 280 } 281 282 phases := []func(context.Context, *scope) (ctrl.Result, error){ 283 r.reconcileBootstrap, 284 r.reconcileInfrastructure, 285 r.reconcileNode, 286 r.reconcileCertificateExpiry, 287 } 288 289 res := ctrl.Result{} 290 errs := []error{} 291 s := &scope{ 292 cluster: cluster, 293 machine: m, 294 } 295 for _, phase := range phases { 296 // Call the inner reconciliation methods. 297 phaseResult, err := phase(ctx, s) 298 if err != nil { 299 errs = append(errs, err) 300 } 301 if len(errs) > 0 { 302 continue 303 } 304 res = util.LowestNonZeroResult(res, phaseResult) 305 } 306 return res, kerrors.NewAggregate(errs) 307 } 308 309 // scope holds the different objects that are read and used during the reconcile. 310 type scope struct { 311 // cluster is the Cluster object the Machine belongs to. 312 // It is set at the beginning of the reconcile function. 313 cluster *clusterv1.Cluster 314 315 // machine is the Machine object. It is set at the beginning 316 // of the reconcile function. 317 machine *clusterv1.Machine 318 319 // infraMachine is the Infrastructure Machine object that is referenced by the 320 // Machine. It is set after reconcileInfrastructure is called. 321 infraMachine *unstructured.Unstructured 322 323 // bootstrapConfig is the BootstrapConfig object that is referenced by the 324 // Machine. It is set after reconcileBootstrap is called. 325 bootstrapConfig *unstructured.Unstructured 326 } 327 328 func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (ctrl.Result, error) { //nolint:gocyclo 329 log := ctrl.LoggerFrom(ctx) 330 331 err := r.isDeleteNodeAllowed(ctx, cluster, m) 332 isDeleteNodeAllowed := err == nil 333 if err != nil { 334 switch err { 335 case errNoControlPlaneNodes, errLastControlPlaneNode, errNilNodeRef, errClusterIsBeingDeleted, errControlPlaneIsBeingDeleted: 336 nodeName := "" 337 if m.Status.NodeRef != nil { 338 nodeName = m.Status.NodeRef.Name 339 } 340 log.Info("Deleting Kubernetes Node associated with Machine is not allowed", "Node", klog.KRef("", nodeName), "cause", err.Error()) 341 default: 342 return ctrl.Result{}, errors.Wrapf(err, "failed to check if Kubernetes Node deletion is allowed") 343 } 344 } 345 346 if isDeleteNodeAllowed { 347 // pre-drain.delete lifecycle hook 348 // Return early without error, will requeue if/when the hook owner removes the annotation. 349 if annotations.HasWithPrefix(clusterv1.PreDrainDeleteHookAnnotationPrefix, m.ObjectMeta.Annotations) { 350 conditions.MarkFalse(m, clusterv1.PreDrainDeleteHookSucceededCondition, clusterv1.WaitingExternalHookReason, clusterv1.ConditionSeverityInfo, "") 351 return ctrl.Result{}, nil 352 } 353 conditions.MarkTrue(m, clusterv1.PreDrainDeleteHookSucceededCondition) 354 355 // Drain node before deletion and issue a patch in order to make this operation visible to the users. 356 if r.isNodeDrainAllowed(m) { 357 patchHelper, err := patch.NewHelper(m, r.Client) 358 if err != nil { 359 return ctrl.Result{}, err 360 } 361 362 log.Info("Draining node", "Node", klog.KRef("", m.Status.NodeRef.Name)) 363 // The DrainingSucceededCondition never exists before the node is drained for the first time, 364 // so its transition time can be used to record the first time draining. 365 // This `if` condition prevents the transition time to be changed more than once. 366 if conditions.Get(m, clusterv1.DrainingSucceededCondition) == nil { 367 conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingReason, clusterv1.ConditionSeverityInfo, "Draining the node before deletion") 368 } 369 370 if err := patchMachine(ctx, patchHelper, m); err != nil { 371 return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine") 372 } 373 374 if result, err := r.drainNode(ctx, cluster, m.Status.NodeRef.Name); !result.IsZero() || err != nil { 375 if err != nil { 376 conditions.MarkFalse(m, clusterv1.DrainingSucceededCondition, clusterv1.DrainingFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) 377 r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDrainNode", "error draining Machine's node %q: %v", m.Status.NodeRef.Name, err) 378 } 379 return result, err 380 } 381 382 conditions.MarkTrue(m, clusterv1.DrainingSucceededCondition) 383 r.recorder.Eventf(m, corev1.EventTypeNormal, "SuccessfulDrainNode", "success draining Machine's node %q", m.Status.NodeRef.Name) 384 } 385 386 // After node draining is completed, and if isNodeVolumeDetachingAllowed returns True, make sure all 387 // volumes are detached before proceeding to delete the Node. 388 if r.isNodeVolumeDetachingAllowed(m) { 389 // The VolumeDetachSucceededCondition never exists before we wait for volume detachment for the first time, 390 // so its transition time can be used to record the first time we wait for volume detachment. 391 // This `if` condition prevents the transition time to be changed more than once. 392 if conditions.Get(m, clusterv1.VolumeDetachSucceededCondition) == nil { 393 conditions.MarkFalse(m, clusterv1.VolumeDetachSucceededCondition, clusterv1.WaitingForVolumeDetachReason, clusterv1.ConditionSeverityInfo, "Waiting for node volumes to be detached") 394 } 395 396 if ok, err := r.shouldWaitForNodeVolumes(ctx, cluster, m.Status.NodeRef.Name); ok || err != nil { 397 if err != nil { 398 r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedWaitForVolumeDetach", "error waiting for node volumes detaching, Machine's node %q: %v", m.Status.NodeRef.Name, err) 399 return ctrl.Result{}, err 400 } 401 log.Info("Waiting for node volumes to be detached", "Node", klog.KRef("", m.Status.NodeRef.Name)) 402 return ctrl.Result{}, nil 403 } 404 conditions.MarkTrue(m, clusterv1.VolumeDetachSucceededCondition) 405 r.recorder.Eventf(m, corev1.EventTypeNormal, "NodeVolumesDetached", "success waiting for node volumes detaching Machine's node %q", m.Status.NodeRef.Name) 406 } 407 } 408 409 // pre-term.delete lifecycle hook 410 // Return early without error, will requeue if/when the hook owner removes the annotation. 411 if annotations.HasWithPrefix(clusterv1.PreTerminateDeleteHookAnnotationPrefix, m.ObjectMeta.Annotations) { 412 conditions.MarkFalse(m, clusterv1.PreTerminateDeleteHookSucceededCondition, clusterv1.WaitingExternalHookReason, clusterv1.ConditionSeverityInfo, "") 413 return ctrl.Result{}, nil 414 } 415 conditions.MarkTrue(m, clusterv1.PreTerminateDeleteHookSucceededCondition) 416 417 // Return early and don't remove the finalizer if we got an error or 418 // the external reconciliation deletion isn't ready. 419 420 patchHelper, err := patch.NewHelper(m, r.Client) 421 if err != nil { 422 return ctrl.Result{}, err 423 } 424 conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "") 425 if err := patchMachine(ctx, patchHelper, m); err != nil { 426 conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityInfo, "") 427 return ctrl.Result{}, errors.Wrap(err, "failed to patch Machine") 428 } 429 430 infrastructureDeleted, err := r.reconcileDeleteInfrastructure(ctx, cluster, m) 431 if err != nil { 432 return ctrl.Result{}, err 433 } 434 if !infrastructureDeleted { 435 log.Info("Waiting for infrastructure to be deleted", m.Spec.InfrastructureRef.Kind, klog.KRef(m.Spec.InfrastructureRef.Namespace, m.Spec.InfrastructureRef.Name)) 436 return ctrl.Result{}, nil 437 } 438 439 bootstrapDeleted, err := r.reconcileDeleteBootstrap(ctx, cluster, m) 440 if err != nil { 441 return ctrl.Result{}, err 442 } 443 if !bootstrapDeleted { 444 log.Info("Waiting for bootstrap to be deleted", m.Spec.Bootstrap.ConfigRef.Kind, klog.KRef(m.Spec.Bootstrap.ConfigRef.Namespace, m.Spec.Bootstrap.ConfigRef.Name)) 445 return ctrl.Result{}, nil 446 } 447 448 // We only delete the node after the underlying infrastructure is gone. 449 // https://github.com/kubernetes-sigs/cluster-api/issues/2565 450 if isDeleteNodeAllowed { 451 log.Info("Deleting node", "Node", klog.KRef("", m.Status.NodeRef.Name)) 452 453 var deleteNodeErr error 454 waitErr := wait.PollUntilContextTimeout(ctx, 2*time.Second, r.nodeDeletionRetryTimeout, true, func(ctx context.Context) (bool, error) { 455 if deleteNodeErr = r.deleteNode(ctx, cluster, m.Status.NodeRef.Name); deleteNodeErr != nil && !apierrors.IsNotFound(errors.Cause(deleteNodeErr)) { 456 return false, nil 457 } 458 return true, nil 459 }) 460 if waitErr != nil { 461 log.Error(deleteNodeErr, "Timed out deleting node", "Node", klog.KRef("", m.Status.NodeRef.Name)) 462 conditions.MarkFalse(m, clusterv1.MachineNodeHealthyCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityWarning, "") 463 r.recorder.Eventf(m, corev1.EventTypeWarning, "FailedDeleteNode", "error deleting Machine's node: %v", deleteNodeErr) 464 465 // If the node deletion timeout is not expired yet, requeue the Machine for reconciliation. 466 if m.Spec.NodeDeletionTimeout == nil || m.Spec.NodeDeletionTimeout.Nanoseconds() == 0 || m.DeletionTimestamp.Add(m.Spec.NodeDeletionTimeout.Duration).After(time.Now()) { 467 return ctrl.Result{}, deleteNodeErr 468 } 469 log.Info("Node deletion timeout expired, continuing without Node deletion.") 470 } 471 } 472 473 controllerutil.RemoveFinalizer(m, clusterv1.MachineFinalizer) 474 return ctrl.Result{}, nil 475 } 476 477 func (r *Reconciler) isNodeDrainAllowed(m *clusterv1.Machine) bool { 478 if _, exists := m.ObjectMeta.Annotations[clusterv1.ExcludeNodeDrainingAnnotation]; exists { 479 return false 480 } 481 482 if r.nodeDrainTimeoutExceeded(m) { 483 return false 484 } 485 486 return true 487 } 488 489 // isNodeVolumeDetachingAllowed returns False if either ExcludeWaitForNodeVolumeDetachAnnotation annotation is set OR 490 // nodeVolumeDetachTimeoutExceeded timeout is exceeded, otherwise returns True. 491 func (r *Reconciler) isNodeVolumeDetachingAllowed(m *clusterv1.Machine) bool { 492 if _, exists := m.ObjectMeta.Annotations[clusterv1.ExcludeWaitForNodeVolumeDetachAnnotation]; exists { 493 return false 494 } 495 496 if r.nodeVolumeDetachTimeoutExceeded(m) { 497 return false 498 } 499 500 return true 501 } 502 503 func (r *Reconciler) nodeDrainTimeoutExceeded(machine *clusterv1.Machine) bool { 504 // if the NodeDrainTimeout type is not set by user 505 if machine.Spec.NodeDrainTimeout == nil || machine.Spec.NodeDrainTimeout.Seconds() <= 0 { 506 return false 507 } 508 509 // if the draining succeeded condition does not exist 510 if conditions.Get(machine, clusterv1.DrainingSucceededCondition) == nil { 511 return false 512 } 513 514 now := time.Now() 515 firstTimeDrain := conditions.GetLastTransitionTime(machine, clusterv1.DrainingSucceededCondition) 516 diff := now.Sub(firstTimeDrain.Time) 517 return diff.Seconds() >= machine.Spec.NodeDrainTimeout.Seconds() 518 } 519 520 // nodeVolumeDetachTimeoutExceeded returns False if either NodeVolumeDetachTimeout is set to nil or <=0 OR 521 // VolumeDetachSucceededCondition is not set on the Machine. Otherwise returns true if the timeout is expired 522 // since the last transition time of VolumeDetachSucceededCondition. 523 func (r *Reconciler) nodeVolumeDetachTimeoutExceeded(machine *clusterv1.Machine) bool { 524 // if the NodeVolumeDetachTimeout type is not set by user 525 if machine.Spec.NodeVolumeDetachTimeout == nil || machine.Spec.NodeVolumeDetachTimeout.Seconds() <= 0 { 526 return false 527 } 528 529 // if the volume detaching succeeded condition does not exist 530 if conditions.Get(machine, clusterv1.VolumeDetachSucceededCondition) == nil { 531 return false 532 } 533 534 now := time.Now() 535 firstTimeDetach := conditions.GetLastTransitionTime(machine, clusterv1.VolumeDetachSucceededCondition) 536 diff := now.Sub(firstTimeDetach.Time) 537 return diff.Seconds() >= machine.Spec.NodeVolumeDetachTimeout.Seconds() 538 } 539 540 // isDeleteNodeAllowed returns nil only if the Machine's NodeRef is not nil 541 // and if the Machine is not the last control plane node in the cluster. 542 func (r *Reconciler) isDeleteNodeAllowed(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine) error { 543 log := ctrl.LoggerFrom(ctx) 544 // Return early if the cluster is being deleted. 545 if !cluster.DeletionTimestamp.IsZero() { 546 return errClusterIsBeingDeleted 547 } 548 549 // Cannot delete something that doesn't exist. 550 if machine.Status.NodeRef == nil { 551 return errNilNodeRef 552 } 553 554 // controlPlaneRef is an optional field in the Cluster so skip the external 555 // managed control plane check if it is nil 556 if cluster.Spec.ControlPlaneRef != nil { 557 controlPlane, err := external.Get(ctx, r.Client, cluster.Spec.ControlPlaneRef, cluster.Spec.ControlPlaneRef.Namespace) 558 if apierrors.IsNotFound(err) { 559 // If control plane object in the reference does not exist, log and skip check for 560 // external managed control plane 561 log.Error(err, "control plane object specified in cluster spec.controlPlaneRef does not exist", "kind", cluster.Spec.ControlPlaneRef.Kind, "name", cluster.Spec.ControlPlaneRef.Name) 562 } else { 563 if err != nil { 564 // If any other error occurs when trying to get the control plane object, 565 // return the error so we can retry 566 return err 567 } 568 569 // Return early if the object referenced by controlPlaneRef is being deleted. 570 if !controlPlane.GetDeletionTimestamp().IsZero() { 571 return errControlPlaneIsBeingDeleted 572 } 573 574 // Check if the ControlPlane is externally managed (AKS, EKS, GKE, etc) 575 // and skip the following section if control plane is externally managed 576 // because there will be no control plane nodes registered 577 if util.IsExternalManagedControlPlane(controlPlane) { 578 return nil 579 } 580 } 581 } 582 583 // Get all of the active machines that belong to this cluster. 584 machines, err := collections.GetFilteredMachinesForCluster(ctx, r.Client, cluster, collections.ActiveMachines) 585 if err != nil { 586 return err 587 } 588 589 // Whether or not it is okay to delete the NodeRef depends on the 590 // number of remaining control plane members and whether or not this 591 // machine is one of them. 592 numControlPlaneMachines := len(machines.Filter(collections.ControlPlaneMachines(cluster.Name))) 593 if numControlPlaneMachines == 0 { 594 // Do not delete the NodeRef if there are no remaining members of 595 // the control plane. 596 return errNoControlPlaneNodes 597 } 598 // Otherwise it is okay to delete the NodeRef. 599 return nil 600 } 601 602 func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster, nodeName string) (ctrl.Result, error) { 603 log := ctrl.LoggerFrom(ctx, "Node", klog.KRef("", nodeName)) 604 605 restConfig, err := r.Tracker.GetRESTConfig(ctx, util.ObjectKey(cluster)) 606 if err != nil { 607 if errors.Is(err, remote.ErrClusterLocked) { 608 log.V(5).Info("Requeuing drain Node because another worker has the lock on the ClusterCacheTracker") 609 return ctrl.Result{Requeue: true}, nil 610 } 611 log.Error(err, "Error creating a remote client for cluster while draining Node, won't retry") 612 return ctrl.Result{}, nil 613 } 614 restConfig = rest.CopyConfig(restConfig) 615 restConfig.Timeout = r.NodeDrainClientTimeout 616 kubeClient, err := kubernetes.NewForConfig(restConfig) 617 if err != nil { 618 log.Error(err, "Error creating a remote client while deleting Machine, won't retry") 619 return ctrl.Result{}, nil 620 } 621 622 node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) 623 if err != nil { 624 if apierrors.IsNotFound(err) { 625 // If an admin deletes the node directly, we'll end up here. 626 log.Error(err, "Could not find node from noderef, it may have already been deleted") 627 return ctrl.Result{}, nil 628 } 629 return ctrl.Result{}, errors.Wrapf(err, "unable to get node %v", nodeName) 630 } 631 632 drainer := &kubedrain.Helper{ 633 Client: kubeClient, 634 Ctx: ctx, 635 Force: true, 636 IgnoreAllDaemonSets: true, 637 DeleteEmptyDirData: true, 638 GracePeriodSeconds: -1, 639 // If a pod is not evicted in 20 seconds, retry the eviction next time the 640 // machine gets reconciled again (to allow other machines to be reconciled). 641 Timeout: 20 * time.Second, 642 OnPodDeletedOrEvicted: func(pod *corev1.Pod, usingEviction bool) { 643 verbStr := "Deleted" 644 if usingEviction { 645 verbStr = "Evicted" 646 } 647 log.Info(fmt.Sprintf("%s pod from Node", verbStr), 648 "Pod", klog.KObj(pod)) 649 }, 650 Out: writer{log.Info}, 651 ErrOut: writer{func(msg string, keysAndValues ...interface{}) { 652 log.Error(nil, msg, keysAndValues...) 653 }}, 654 } 655 656 if noderefutil.IsNodeUnreachable(node) { 657 // When the node is unreachable and some pods are not evicted for as long as this timeout, we ignore them. 658 drainer.SkipWaitForDeleteTimeoutSeconds = 60 * 5 // 5 minutes 659 } 660 661 if err := kubedrain.RunCordonOrUncordon(drainer, node, true); err != nil { 662 // Machine will be re-reconciled after a cordon failure. 663 log.Error(err, "Cordon failed") 664 return ctrl.Result{}, errors.Wrapf(err, "unable to cordon node %v", node.Name) 665 } 666 667 if err := kubedrain.RunNodeDrain(drainer, node.Name); err != nil { 668 // Machine will be re-reconciled after a drain failure. 669 log.Error(err, "Drain failed, retry in 20s") 670 return ctrl.Result{RequeueAfter: 20 * time.Second}, nil 671 } 672 673 log.Info("Drain successful") 674 return ctrl.Result{}, nil 675 } 676 677 // shouldWaitForNodeVolumes returns true if node status still have volumes attached 678 // pod deletion and volume detach happen asynchronously, so pod could be deleted before volume detached from the node 679 // this could cause issue for some storage provisioner, for example, vsphere-volume this is problematic 680 // because if the node is deleted before detach success, then the underline VMDK will be deleted together with the Machine 681 // so after node draining we need to check if all volumes are detached before deleting the node. 682 func (r *Reconciler) shouldWaitForNodeVolumes(ctx context.Context, cluster *clusterv1.Cluster, nodeName string) (bool, error) { 683 log := ctrl.LoggerFrom(ctx, "Node", klog.KRef("", nodeName)) 684 685 remoteClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) 686 if err != nil { 687 return true, err 688 } 689 690 node := &corev1.Node{} 691 if err := remoteClient.Get(ctx, types.NamespacedName{Name: nodeName}, node); err != nil { 692 if apierrors.IsNotFound(err) { 693 log.Error(err, "Could not find node from noderef, it may have already been deleted") 694 return false, nil 695 } 696 return true, err 697 } 698 699 return len(node.Status.VolumesAttached) != 0, nil 700 } 701 702 func (r *Reconciler) deleteNode(ctx context.Context, cluster *clusterv1.Cluster, name string) error { 703 log := ctrl.LoggerFrom(ctx) 704 705 remoteClient, err := r.Tracker.GetClient(ctx, util.ObjectKey(cluster)) 706 if err != nil { 707 if errors.Is(err, remote.ErrClusterLocked) { 708 return errors.Wrapf(err, "failed deleting Node because another worker has the lock on the ClusterCacheTracker") 709 } 710 log.Error(err, "Error creating a remote client for cluster while deleting Node, won't retry") 711 return nil 712 } 713 714 node := &corev1.Node{ 715 ObjectMeta: metav1.ObjectMeta{ 716 Name: name, 717 }, 718 } 719 720 if err := remoteClient.Delete(ctx, node); err != nil { 721 return errors.Wrapf(err, "error deleting node %s", name) 722 } 723 return nil 724 } 725 726 func (r *Reconciler) reconcileDeleteBootstrap(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (bool, error) { 727 obj, err := r.reconcileDeleteExternal(ctx, cluster, m, m.Spec.Bootstrap.ConfigRef) 728 if err != nil { 729 return false, err 730 } 731 732 if obj == nil { 733 // Marks the bootstrap as deleted 734 conditions.MarkFalse(m, clusterv1.BootstrapReadyCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "") 735 return true, nil 736 } 737 738 // Report a summary of current status of the bootstrap object defined for this machine. 739 conditions.SetMirror(m, clusterv1.BootstrapReadyCondition, 740 conditions.UnstructuredGetter(obj), 741 conditions.WithFallbackValue(false, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, ""), 742 ) 743 return false, nil 744 } 745 746 func (r *Reconciler) reconcileDeleteInfrastructure(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine) (bool, error) { 747 obj, err := r.reconcileDeleteExternal(ctx, cluster, m, &m.Spec.InfrastructureRef) 748 if err != nil { 749 return false, err 750 } 751 752 if obj == nil { 753 // Marks the infrastructure as deleted 754 conditions.MarkFalse(m, clusterv1.InfrastructureReadyCondition, clusterv1.DeletedReason, clusterv1.ConditionSeverityInfo, "") 755 return true, nil 756 } 757 758 // Report a summary of current status of the bootstrap object defined for this machine. 759 conditions.SetMirror(m, clusterv1.InfrastructureReadyCondition, 760 conditions.UnstructuredGetter(obj), 761 conditions.WithFallbackValue(false, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, ""), 762 ) 763 return false, nil 764 } 765 766 // reconcileDeleteExternal tries to delete external references. 767 func (r *Reconciler) reconcileDeleteExternal(ctx context.Context, cluster *clusterv1.Cluster, m *clusterv1.Machine, ref *corev1.ObjectReference) (*unstructured.Unstructured, error) { 768 if ref == nil { 769 return nil, nil 770 } 771 772 // get the external object 773 obj, err := external.Get(ctx, r.UnstructuredCachingClient, ref, m.Namespace) 774 if err != nil && !apierrors.IsNotFound(errors.Cause(err)) { 775 return nil, errors.Wrapf(err, "failed to get %s %q for Machine %q in namespace %q", 776 ref.GroupVersionKind(), ref.Name, m.Name, m.Namespace) 777 } 778 779 if obj != nil { 780 // reconcileExternal ensures that we set the object's OwnerReferences correctly and watch the object. 781 // The machine delete logic depends on reconciling the machine when the external objects are deleted. 782 // This avoids a race condition where the machine is deleted before the external objects are ever reconciled 783 // by this controller. 784 if _, err := r.ensureExternalOwnershipAndWatch(ctx, cluster, m, ref); err != nil { 785 return nil, err 786 } 787 788 // Issue a delete request. 789 if err := r.Client.Delete(ctx, obj); err != nil && !apierrors.IsNotFound(err) { 790 return obj, errors.Wrapf(err, 791 "failed to delete %v %q for Machine %q in namespace %q", 792 obj.GroupVersionKind(), obj.GetName(), m.Name, m.Namespace) 793 } 794 } 795 796 // Return true if there are no more external objects. 797 return obj, nil 798 } 799 800 // shouldAdopt returns true if the Machine should be adopted as a stand-alone Machine directly owned by the Cluster. 801 func (r *Reconciler) shouldAdopt(m *clusterv1.Machine) bool { 802 // if the machine is controlled by something (MS or KCP), or if it is a stand-alone machine directly owned by the Cluster, then no-op. 803 if metav1.GetControllerOf(m) != nil || util.HasOwner(m.GetOwnerReferences(), clusterv1.GroupVersion.String(), []string{"Cluster"}) { 804 return false 805 } 806 807 // Note: following checks are required because after restore from a backup both the Machine controller and the 808 // MachineSet, MachinePool, or ControlPlane controller are racing to adopt Machines, see https://github.com/kubernetes-sigs/cluster-api/issues/7529 809 810 // If the Machine is originated by a MachineSet, it should not be adopted directly by the Cluster as a stand-alone Machine. 811 if _, ok := m.Labels[clusterv1.MachineSetNameLabel]; ok { 812 return false 813 } 814 815 // If the Machine is originated by a MachinePool object, it should not be adopted directly by the Cluster as a stand-alone Machine. 816 if _, ok := m.Labels[clusterv1.MachinePoolNameLabel]; ok { 817 return false 818 } 819 820 // If the Machine is originated by a ControlPlane object, it should not be adopted directly by the Cluster as a stand-alone Machine. 821 if _, ok := m.Labels[clusterv1.MachineControlPlaneNameLabel]; ok { 822 return false 823 } 824 return true 825 } 826 827 func (r *Reconciler) watchClusterNodes(ctx context.Context, cluster *clusterv1.Cluster) error { 828 log := ctrl.LoggerFrom(ctx) 829 830 if !conditions.IsTrue(cluster, clusterv1.ControlPlaneInitializedCondition) { 831 log.V(5).Info("Skipping node watching setup because control plane is not initialized") 832 return nil 833 } 834 835 // If there is no tracker, don't watch remote nodes 836 if r.Tracker == nil { 837 return nil 838 } 839 840 return r.Tracker.Watch(ctx, remote.WatchInput{ 841 Name: "machine-watchNodes", 842 Cluster: util.ObjectKey(cluster), 843 Watcher: r.controller, 844 Kind: &corev1.Node{}, 845 EventHandler: handler.EnqueueRequestsFromMapFunc(r.nodeToMachine), 846 }) 847 } 848 849 func (r *Reconciler) nodeToMachine(ctx context.Context, o client.Object) []reconcile.Request { 850 node, ok := o.(*corev1.Node) 851 if !ok { 852 panic(fmt.Sprintf("Expected a Node but got a %T", o)) 853 } 854 855 var filters []client.ListOption 856 // Match by clusterName when the node has the annotation. 857 if clusterName, ok := node.GetAnnotations()[clusterv1.ClusterNameAnnotation]; ok { 858 filters = append(filters, client.MatchingLabels{ 859 clusterv1.ClusterNameLabel: clusterName, 860 }) 861 } 862 863 // Match by namespace when the node has the annotation. 864 if namespace, ok := node.GetAnnotations()[clusterv1.ClusterNamespaceAnnotation]; ok { 865 filters = append(filters, client.InNamespace(namespace)) 866 } 867 868 // Match by nodeName and status.nodeRef.name. 869 machineList := &clusterv1.MachineList{} 870 if err := r.Client.List( 871 ctx, 872 machineList, 873 append(filters, client.MatchingFields{index.MachineNodeNameField: node.Name})...); err != nil { 874 return nil 875 } 876 877 // There should be exactly 1 Machine for the node. 878 if len(machineList.Items) == 1 { 879 return []reconcile.Request{{NamespacedName: util.ObjectKey(&machineList.Items[0])}} 880 } 881 882 // Otherwise let's match by providerID. This is useful when e.g the NodeRef has not been set yet. 883 // Match by providerID 884 if node.Spec.ProviderID == "" { 885 return nil 886 } 887 machineList = &clusterv1.MachineList{} 888 if err := r.Client.List( 889 ctx, 890 machineList, 891 append(filters, client.MatchingFields{index.MachineProviderIDField: node.Spec.ProviderID})...); err != nil { 892 return nil 893 } 894 895 // There should be exactly 1 Machine for the node. 896 if len(machineList.Items) == 1 { 897 return []reconcile.Request{{NamespacedName: util.ObjectKey(&machineList.Items[0])}} 898 } 899 900 return nil 901 } 902 903 // writer implements io.Writer interface as a pass-through for klog. 904 type writer struct { 905 logFunc func(msg string, keysAndValues ...interface{}) 906 } 907 908 // Write passes string(p) into writer's logFunc and always returns len(p). 909 func (w writer) Write(p []byte) (n int, err error) { 910 w.logFunc(string(p)) 911 return len(p), nil 912 }