sigs.k8s.io/cluster-api@v1.6.3/internal/controllers/topology/cluster/reconcile_state.go (about) 1 /* 2 Copyright 2021 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package cluster 18 19 import ( 20 "context" 21 "fmt" 22 "strings" 23 "time" 24 25 "github.com/pkg/errors" 26 corev1 "k8s.io/api/core/v1" 27 apierrors "k8s.io/apimachinery/pkg/api/errors" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" 30 "k8s.io/apimachinery/pkg/util/sets" 31 "k8s.io/apimachinery/pkg/util/validation/field" 32 "k8s.io/apimachinery/pkg/util/wait" 33 "k8s.io/apiserver/pkg/storage/names" 34 "k8s.io/klog/v2" 35 ctrl "sigs.k8s.io/controller-runtime" 36 "sigs.k8s.io/controller-runtime/pkg/client" 37 38 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 39 expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" 40 runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1" 41 "sigs.k8s.io/cluster-api/feature" 42 "sigs.k8s.io/cluster-api/internal/contract" 43 "sigs.k8s.io/cluster-api/internal/controllers/topology/cluster/scope" 44 "sigs.k8s.io/cluster-api/internal/controllers/topology/cluster/structuredmerge" 45 "sigs.k8s.io/cluster-api/internal/hooks" 46 tlog "sigs.k8s.io/cluster-api/internal/log" 47 "sigs.k8s.io/cluster-api/internal/topology/check" 48 ) 49 50 const ( 51 createEventReason = "TopologyCreate" 52 updateEventReason = "TopologyUpdate" 53 deleteEventReason = "TopologyDelete" 54 ) 55 56 // reconcileState reconciles the current and desired state of the managed Cluster topology. 57 // NOTE: We are assuming all the required objects are provided as input; also, in case of any error, 58 // the entire reconcile operation will fail. This might be improved in the future if support for reconciling 59 // subset of a topology will be implemented. 60 func (r *Reconciler) reconcileState(ctx context.Context, s *scope.Scope) error { 61 log := tlog.LoggerFrom(ctx) 62 log.Infof("Reconciling state for topology owned objects") 63 64 // Reconcile the Cluster shim, a temporary object used a mean to collect 65 // objects/templates that can be orphaned in case of errors during the 66 // remaining part of the reconcile process. 67 if err := r.reconcileClusterShim(ctx, s); err != nil { 68 return err 69 } 70 71 if feature.Gates.Enabled(feature.RuntimeSDK) { 72 if err := r.callAfterHooks(ctx, s); err != nil { 73 return err 74 } 75 } 76 77 // Reconcile desired state of the InfrastructureCluster object. 78 if err := r.reconcileInfrastructureCluster(ctx, s); err != nil { 79 return err 80 } 81 82 // Reconcile desired state of the ControlPlane object. 83 if err := r.reconcileControlPlane(ctx, s); err != nil { 84 return err 85 } 86 87 // Reconcile desired state of the Cluster object. 88 if err := r.reconcileCluster(ctx, s); err != nil { 89 return err 90 } 91 92 // Reconcile desired state of the MachineDeployment objects. 93 if err := r.reconcileMachineDeployments(ctx, s); err != nil { 94 return err 95 } 96 97 // Reconcile desired state of the MachinePool object and return. 98 return r.reconcileMachinePools(ctx, s) 99 } 100 101 // Reconcile the Cluster shim, a temporary object used a mean to collect objects/templates 102 // that might be orphaned in case of errors during the remaining part of the reconcile process. 103 func (r *Reconciler) reconcileClusterShim(ctx context.Context, s *scope.Scope) error { 104 shim := clusterShim(s.Current.Cluster) 105 106 // If we are going to create the InfrastructureCluster or the ControlPlane object, then 107 // add a temporary cluster-shim object and use it as an additional owner. 108 // This will ensure the objects will be garbage collected in case of errors in between 109 // creating InfrastructureCluster/ControlPlane objects and updating the Cluster with the 110 // references to above objects. 111 if s.Current.InfrastructureCluster == nil || s.Current.ControlPlane.Object == nil { 112 // Given that the cluster shim is a temporary object which is only modified 113 // by this controller, it is not necessary to use the SSA patch helper. 114 if err := r.Client.Create(ctx, shim); err != nil { 115 if !apierrors.IsAlreadyExists(err) { 116 return errors.Wrap(err, "failed to create the cluster shim object") 117 } 118 if err := r.Client.Get(ctx, client.ObjectKeyFromObject(shim), shim); err != nil { 119 return errors.Wrapf(err, "failed to read the cluster shim object") 120 } 121 } 122 123 // Enforce type meta back given that it gets blanked out by Get. 124 shim.Kind = "Secret" 125 shim.APIVersion = corev1.SchemeGroupVersion.String() 126 127 // Add the shim as a temporary owner for the InfrastructureCluster. 128 ownerRefs := s.Desired.InfrastructureCluster.GetOwnerReferences() 129 ownerRefs = append(ownerRefs, *ownerReferenceTo(shim)) 130 s.Desired.InfrastructureCluster.SetOwnerReferences(ownerRefs) 131 132 // Add the shim as a temporary owner for the ControlPlane. 133 ownerRefs = s.Desired.ControlPlane.Object.GetOwnerReferences() 134 ownerRefs = append(ownerRefs, *ownerReferenceTo(shim)) 135 s.Desired.ControlPlane.Object.SetOwnerReferences(ownerRefs) 136 } 137 138 // If the InfrastructureCluster and the ControlPlane objects have been already created 139 // in previous reconciliation, check if they have already been reconciled by the ClusterController 140 // by verifying the ownerReference for the Cluster is present. 141 // 142 // When the Cluster and the shim object are both owners, 143 // it's safe for us to remove the shim and garbage collect any potential orphaned resource. 144 if s.Current.InfrastructureCluster != nil && s.Current.ControlPlane.Object != nil { 145 clusterOwnsAll := hasOwnerReferenceFrom(s.Current.InfrastructureCluster, s.Current.Cluster) && 146 hasOwnerReferenceFrom(s.Current.ControlPlane.Object, s.Current.Cluster) 147 shimOwnsAtLeastOne := hasOwnerReferenceFrom(s.Current.InfrastructureCluster, shim) || 148 hasOwnerReferenceFrom(s.Current.ControlPlane.Object, shim) 149 150 if clusterOwnsAll && shimOwnsAtLeastOne { 151 if err := r.Client.Delete(ctx, shim); err != nil { 152 if !apierrors.IsNotFound(err) { 153 return errors.Wrapf(err, "failed to delete the cluster shim object") 154 } 155 } 156 } 157 } 158 return nil 159 } 160 161 func clusterShim(c *clusterv1.Cluster) *corev1.Secret { 162 shim := &corev1.Secret{ 163 TypeMeta: metav1.TypeMeta{ 164 Kind: "Secret", 165 APIVersion: corev1.SchemeGroupVersion.String(), 166 }, 167 ObjectMeta: metav1.ObjectMeta{ 168 Name: fmt.Sprintf("%s-shim", c.Name), 169 Namespace: c.Namespace, 170 OwnerReferences: []metav1.OwnerReference{ 171 *ownerReferenceTo(c), 172 }, 173 }, 174 Type: clusterv1.ClusterSecretType, 175 } 176 return shim 177 } 178 179 func hasOwnerReferenceFrom(obj, owner client.Object) bool { 180 for _, o := range obj.GetOwnerReferences() { 181 if o.Kind == owner.GetObjectKind().GroupVersionKind().Kind && o.Name == owner.GetName() { 182 return true 183 } 184 } 185 return false 186 } 187 188 func getOwnerReferenceFrom(obj, owner client.Object) *metav1.OwnerReference { 189 for _, o := range obj.GetOwnerReferences() { 190 if o.Kind == owner.GetObjectKind().GroupVersionKind().Kind && o.Name == owner.GetName() { 191 return &o 192 } 193 } 194 return nil 195 } 196 197 func (r *Reconciler) callAfterHooks(ctx context.Context, s *scope.Scope) error { 198 if err := r.callAfterControlPlaneInitialized(ctx, s); err != nil { 199 return err 200 } 201 202 return r.callAfterClusterUpgrade(ctx, s) 203 } 204 205 func (r *Reconciler) callAfterControlPlaneInitialized(ctx context.Context, s *scope.Scope) error { 206 // If the cluster topology is being created then track to intent to call the AfterControlPlaneInitialized hook so that we can call it later. 207 if s.Current.Cluster.Spec.InfrastructureRef == nil && s.Current.Cluster.Spec.ControlPlaneRef == nil { 208 if err := hooks.MarkAsPending(ctx, r.Client, s.Current.Cluster, runtimehooksv1.AfterControlPlaneInitialized); err != nil { 209 return err 210 } 211 } 212 213 // Call the hook only if we are tracking the intent to do so. If it is not tracked it means we don't need to call the 214 // hook because already called the hook after the control plane is initialized. 215 if hooks.IsPending(runtimehooksv1.AfterControlPlaneInitialized, s.Current.Cluster) { 216 if isControlPlaneInitialized(s.Current.Cluster) { 217 // The control plane is initialized for the first time. Call all the registered extensions for the hook. 218 hookRequest := &runtimehooksv1.AfterControlPlaneInitializedRequest{ 219 Cluster: *s.Current.Cluster, 220 } 221 hookResponse := &runtimehooksv1.AfterControlPlaneInitializedResponse{} 222 if err := r.RuntimeClient.CallAllExtensions(ctx, runtimehooksv1.AfterControlPlaneInitialized, s.Current.Cluster, hookRequest, hookResponse); err != nil { 223 return err 224 } 225 s.HookResponseTracker.Add(runtimehooksv1.AfterControlPlaneInitialized, hookResponse) 226 if err := hooks.MarkAsDone(ctx, r.Client, s.Current.Cluster, runtimehooksv1.AfterControlPlaneInitialized); err != nil { 227 return err 228 } 229 } 230 } 231 232 return nil 233 } 234 235 func isControlPlaneInitialized(cluster *clusterv1.Cluster) bool { 236 for _, condition := range cluster.GetConditions() { 237 if condition.Type == clusterv1.ControlPlaneInitializedCondition { 238 if condition.Status == corev1.ConditionTrue { 239 return true 240 } 241 } 242 } 243 return false 244 } 245 246 func (r *Reconciler) callAfterClusterUpgrade(ctx context.Context, s *scope.Scope) error { 247 // Call the hook only if we are tracking the intent to do so. If it is not tracked it means we don't need to call the 248 // hook because we didn't go through an upgrade or we already called the hook after the upgrade. 249 if hooks.IsPending(runtimehooksv1.AfterClusterUpgrade, s.Current.Cluster) { 250 // Call the registered extensions for the hook after the cluster is fully upgraded. 251 // A clusters is considered fully upgraded if: 252 // - Control plane is stable (not upgrading, not scaling, not about to upgrade) 253 // - MachineDeployments/MachinePools are not currently upgrading 254 // - MachineDeployments/MachinePools are not pending an upgrade 255 // - MachineDeployments/MachinePools are not pending create 256 if isControlPlaneStable(s) && // Control Plane stable checks 257 len(s.UpgradeTracker.MachineDeployments.UpgradingNames()) == 0 && // Machine deployments are not upgrading or not about to upgrade 258 !s.UpgradeTracker.MachineDeployments.IsAnyPendingCreate() && // No MachineDeployments are pending create 259 !s.UpgradeTracker.MachineDeployments.IsAnyPendingUpgrade() && // No MachineDeployments are pending an upgrade 260 !s.UpgradeTracker.MachineDeployments.DeferredUpgrade() && // No MachineDeployments have deferred an upgrade 261 len(s.UpgradeTracker.MachinePools.UpgradingNames()) == 0 && // Machine pools are not upgrading or not about to upgrade 262 !s.UpgradeTracker.MachinePools.IsAnyPendingCreate() && // No MachinePools are pending create 263 !s.UpgradeTracker.MachinePools.IsAnyPendingUpgrade() && // No MachinePools are pending an upgrade 264 !s.UpgradeTracker.MachinePools.DeferredUpgrade() { // No MachinePools have deferred an upgrade 265 // Everything is stable and the cluster can be considered fully upgraded. 266 hookRequest := &runtimehooksv1.AfterClusterUpgradeRequest{ 267 Cluster: *s.Current.Cluster, 268 KubernetesVersion: s.Current.Cluster.Spec.Topology.Version, 269 } 270 hookResponse := &runtimehooksv1.AfterClusterUpgradeResponse{} 271 if err := r.RuntimeClient.CallAllExtensions(ctx, runtimehooksv1.AfterClusterUpgrade, s.Current.Cluster, hookRequest, hookResponse); err != nil { 272 return err 273 } 274 s.HookResponseTracker.Add(runtimehooksv1.AfterClusterUpgrade, hookResponse) 275 // The hook is successfully called; we can remove this hook from the list of pending-hooks. 276 if err := hooks.MarkAsDone(ctx, r.Client, s.Current.Cluster, runtimehooksv1.AfterClusterUpgrade); err != nil { 277 return err 278 } 279 } 280 } 281 282 return nil 283 } 284 285 // reconcileInfrastructureCluster reconciles the desired state of the InfrastructureCluster object. 286 func (r *Reconciler) reconcileInfrastructureCluster(ctx context.Context, s *scope.Scope) error { 287 ctx, _ = tlog.LoggerFrom(ctx).WithObject(s.Desired.InfrastructureCluster).Into(ctx) 288 289 ignorePaths, err := contract.InfrastructureCluster().IgnorePaths(s.Desired.InfrastructureCluster) 290 if err != nil { 291 return errors.Wrap(err, "failed to calculate ignore paths") 292 } 293 294 return r.reconcileReferencedObject(ctx, reconcileReferencedObjectInput{ 295 cluster: s.Current.Cluster, 296 current: s.Current.InfrastructureCluster, 297 desired: s.Desired.InfrastructureCluster, 298 ignorePaths: ignorePaths, 299 }) 300 } 301 302 // reconcileControlPlane works to bring the current state of a managed topology in line with the desired state. This involves 303 // updating the cluster where needed. 304 func (r *Reconciler) reconcileControlPlane(ctx context.Context, s *scope.Scope) error { 305 // If the ControlPlane has defined a current or desired MachineHealthCheck attempt to reconcile it. 306 // MHC changes are not Kubernetes version dependent, therefore proceed with MHC reconciliation 307 // even if the Control Plane is pending an upgrade. 308 if s.Desired.ControlPlane.MachineHealthCheck != nil || s.Current.ControlPlane.MachineHealthCheck != nil { 309 // Reconcile the current and desired state of the MachineHealthCheck. 310 if err := r.reconcileMachineHealthCheck(ctx, s.Current.ControlPlane.MachineHealthCheck, s.Desired.ControlPlane.MachineHealthCheck); err != nil { 311 return err 312 } 313 } 314 315 // Return early if the control plane is pending an upgrade. 316 // Do not reconcile the control plane yet to avoid updating the control plane while it is still pending a 317 // version upgrade. This will prevent the control plane from performing a double rollout. 318 if s.UpgradeTracker.ControlPlane.IsPendingUpgrade { 319 return nil 320 } 321 // If the clusterClass mandates the controlPlane has infrastructureMachines, reconcile it. 322 if s.Blueprint.HasControlPlaneInfrastructureMachine() { 323 ctx, _ := tlog.LoggerFrom(ctx).WithObject(s.Desired.ControlPlane.InfrastructureMachineTemplate).Into(ctx) 324 325 cpInfraRef, err := contract.ControlPlane().MachineTemplate().InfrastructureRef().Get(s.Desired.ControlPlane.Object) 326 if err != nil { 327 return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: s.Desired.ControlPlane.InfrastructureMachineTemplate}) 328 } 329 330 // Create or update the MachineInfrastructureTemplate of the control plane. 331 if err = r.reconcileReferencedTemplate(ctx, reconcileReferencedTemplateInput{ 332 cluster: s.Current.Cluster, 333 ref: cpInfraRef, 334 current: s.Current.ControlPlane.InfrastructureMachineTemplate, 335 desired: s.Desired.ControlPlane.InfrastructureMachineTemplate, 336 compatibilityChecker: check.ObjectsAreCompatible, 337 templateNamePrefix: controlPlaneInfrastructureMachineTemplateNamePrefix(s.Current.Cluster.Name), 338 }, 339 ); err != nil { 340 return err 341 } 342 343 // The controlPlaneObject.Spec.machineTemplate.infrastructureRef has to be updated in the desired object 344 err = contract.ControlPlane().MachineTemplate().InfrastructureRef().Set(s.Desired.ControlPlane.Object, refToUnstructured(cpInfraRef)) 345 if err != nil { 346 return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: s.Desired.ControlPlane.Object}) 347 } 348 } 349 350 // Create or update the ControlPlaneObject for the ControlPlaneState. 351 ctx, _ = tlog.LoggerFrom(ctx).WithObject(s.Desired.ControlPlane.Object).Into(ctx) 352 if err := r.reconcileReferencedObject(ctx, reconcileReferencedObjectInput{ 353 cluster: s.Current.Cluster, 354 current: s.Current.ControlPlane.Object, 355 desired: s.Desired.ControlPlane.Object, 356 versionGetter: contract.ControlPlane().Version().Get, 357 }); err != nil { 358 return err 359 } 360 361 // If the controlPlane has infrastructureMachines and the InfrastructureMachineTemplate has changed on this reconcile 362 // delete the old template. 363 // This is a best effort deletion only and may leak templates if an error occurs during reconciliation. 364 if s.Blueprint.HasControlPlaneInfrastructureMachine() && s.Current.ControlPlane.InfrastructureMachineTemplate != nil { 365 if s.Current.ControlPlane.InfrastructureMachineTemplate.GetName() != s.Desired.ControlPlane.InfrastructureMachineTemplate.GetName() { 366 if err := r.Client.Delete(ctx, s.Current.ControlPlane.InfrastructureMachineTemplate); err != nil { 367 return errors.Wrapf(err, "failed to delete oldinfrastructure machine template %s of control plane %s", 368 tlog.KObj{Obj: s.Current.ControlPlane.InfrastructureMachineTemplate}, 369 tlog.KObj{Obj: s.Current.ControlPlane.Object}, 370 ) 371 } 372 } 373 } 374 375 return nil 376 } 377 378 // reconcileMachineHealthCheck creates, updates, deletes or leaves untouched a MachineHealthCheck depending on the difference between the 379 // current state and the desired state. 380 func (r *Reconciler) reconcileMachineHealthCheck(ctx context.Context, current, desired *clusterv1.MachineHealthCheck) error { 381 log := tlog.LoggerFrom(ctx) 382 383 // If a current MachineHealthCheck doesn't exist but there is a desired MachineHealthCheck attempt to create. 384 if current == nil && desired != nil { 385 log.Infof("Creating %s", tlog.KObj{Obj: desired}) 386 helper, err := r.patchHelperFactory(ctx, nil, desired) 387 if err != nil { 388 return errors.Wrapf(err, "failed to create patch helper for %s", tlog.KObj{Obj: desired}) 389 } 390 if err := helper.Patch(ctx); err != nil { 391 return errors.Wrapf(err, "failed to create %s", tlog.KObj{Obj: desired}) 392 } 393 r.recorder.Eventf(desired, corev1.EventTypeNormal, createEventReason, "Created %q", tlog.KObj{Obj: desired}) 394 return nil 395 } 396 397 // If a current MachineHealthCheck exists but there is no desired MachineHealthCheck attempt to delete. 398 if current != nil && desired == nil { 399 log.Infof("Deleting %s", tlog.KObj{Obj: current}) 400 if err := r.Client.Delete(ctx, current); err != nil { 401 // If the object to be deleted is not found don't throw an error. 402 if !apierrors.IsNotFound(err) { 403 return errors.Wrapf(err, "failed to delete %s", tlog.KObj{Obj: current}) 404 } 405 } 406 r.recorder.Eventf(current, corev1.EventTypeNormal, deleteEventReason, "Deleted %q", tlog.KObj{Obj: current}) 407 return nil 408 } 409 410 ctx, log = log.WithObject(current).Into(ctx) 411 412 // Check differences between current and desired MachineHealthChecks, and patch if required. 413 // NOTE: we want to be authoritative on the entire spec because the users are 414 // expected to change MHC fields from the ClusterClass only. 415 patchHelper, err := r.patchHelperFactory(ctx, current, desired) 416 if err != nil { 417 return errors.Wrapf(err, "failed to create patch helper for %s", tlog.KObj{Obj: current}) 418 } 419 if !patchHelper.HasChanges() { 420 log.V(3).Infof("No changes for %s", tlog.KObj{Obj: current}) 421 return nil 422 } 423 424 log.Infof("Patching %s", tlog.KObj{Obj: current}) 425 if err := patchHelper.Patch(ctx); err != nil { 426 return errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: current}) 427 } 428 r.recorder.Eventf(current, corev1.EventTypeNormal, updateEventReason, "Updated %q", tlog.KObj{Obj: current}) 429 return nil 430 } 431 432 // reconcileCluster reconciles the desired state of the Cluster object. 433 // NOTE: this assumes reconcileInfrastructureCluster and reconcileControlPlane being already completed; 434 // most specifically, after a Cluster is created it is assumed that the reference to the InfrastructureCluster / 435 // ControlPlane objects should never change (only the content of the objects can change). 436 func (r *Reconciler) reconcileCluster(ctx context.Context, s *scope.Scope) error { 437 ctx, log := tlog.LoggerFrom(ctx).WithObject(s.Desired.Cluster).Into(ctx) 438 439 // Check differences between current and desired state, and eventually patch the current object. 440 patchHelper, err := r.patchHelperFactory(ctx, s.Current.Cluster, s.Desired.Cluster) 441 if err != nil { 442 return errors.Wrapf(err, "failed to create patch helper for %s", tlog.KObj{Obj: s.Current.Cluster}) 443 } 444 if !patchHelper.HasChanges() { 445 log.V(3).Infof("No changes for %s", tlog.KObj{Obj: s.Current.Cluster}) 446 return nil 447 } 448 449 log.Infof("Patching %s", tlog.KObj{Obj: s.Current.Cluster}) 450 if err := patchHelper.Patch(ctx); err != nil { 451 return errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: s.Current.Cluster}) 452 } 453 r.recorder.Eventf(s.Current.Cluster, corev1.EventTypeNormal, updateEventReason, "Updated %q", tlog.KObj{Obj: s.Current.Cluster}) 454 455 // Wait until Cluster is updated in the cache. 456 // Note: We have to do this because otherwise using a cached client in the Reconcile func could 457 // return a stale state of the Cluster we just patched (because the cache might be stale). 458 // Note: It is good enough to check that the resource version changed. Other controllers might have updated the 459 // Cluster as well, but the combination of the patch call above without a conflict and a changed resource 460 // version here guarantees that we see the changes of our own update. 461 err = wait.PollUntilContextTimeout(ctx, 5*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) { 462 key := client.ObjectKey{Namespace: s.Current.Cluster.GetNamespace(), Name: s.Current.Cluster.GetName()} 463 cachedCluster := &clusterv1.Cluster{} 464 if err := r.Client.Get(ctx, key, cachedCluster); err != nil { 465 return false, err 466 } 467 return s.Current.Cluster.GetResourceVersion() != cachedCluster.GetResourceVersion(), nil 468 }) 469 if err != nil { 470 return errors.Wrapf(err, "failed waiting for Cluster %s to be updated in the cache after patch", tlog.KObj{Obj: s.Current.Cluster}) 471 } 472 return nil 473 } 474 475 // reconcileMachineDeployments reconciles the desired state of the MachineDeployment objects. 476 func (r *Reconciler) reconcileMachineDeployments(ctx context.Context, s *scope.Scope) error { 477 diff := calculateMachineDeploymentDiff(s.Current.MachineDeployments, s.Desired.MachineDeployments) 478 479 // Create MachineDeployments. 480 if len(diff.toCreate) > 0 { 481 // In current state we only got the MD list via a cached call. 482 // As a consequence, in order to prevent the creation of duplicate MD due to stale reads, 483 // we are now using a live client to double-check here that the MachineDeployment 484 // to be created doesn't exist yet. 485 currentMDTopologyNames, err := r.getCurrentMachineDeployments(ctx, s) 486 if err != nil { 487 return err 488 } 489 for _, mdTopologyName := range diff.toCreate { 490 md := s.Desired.MachineDeployments[mdTopologyName] 491 492 // Skip the MD creation if the MD already exists. 493 if currentMDTopologyNames.Has(mdTopologyName) { 494 log := tlog.LoggerFrom(ctx).WithMachineDeployment(md.Object) 495 log.V(3).Infof(fmt.Sprintf("Skipping creation of MachineDeployment %s because MachineDeployment for topology %s already exists (only considered creation because of stale cache)", tlog.KObj{Obj: md.Object}, mdTopologyName)) 496 continue 497 } 498 499 if err := r.createMachineDeployment(ctx, s, md); err != nil { 500 return err 501 } 502 } 503 } 504 505 // Update MachineDeployments. 506 for _, mdTopologyName := range diff.toUpdate { 507 currentMD := s.Current.MachineDeployments[mdTopologyName] 508 desiredMD := s.Desired.MachineDeployments[mdTopologyName] 509 if err := r.updateMachineDeployment(ctx, s, mdTopologyName, currentMD, desiredMD); err != nil { 510 return err 511 } 512 } 513 514 // Delete MachineDeployments. 515 for _, mdTopologyName := range diff.toDelete { 516 md := s.Current.MachineDeployments[mdTopologyName] 517 if err := r.deleteMachineDeployment(ctx, s.Current.Cluster, md); err != nil { 518 return err 519 } 520 } 521 return nil 522 } 523 524 // getCurrentMachineDeployments gets the current list of MachineDeployments via the APIReader. 525 func (r *Reconciler) getCurrentMachineDeployments(ctx context.Context, s *scope.Scope) (sets.Set[string], error) { 526 // TODO: We should consider using PartialObjectMetadataList here. Currently this doesn't work as our 527 // implementation for topology dryrun doesn't support PartialObjectMetadataList. 528 mdList := &clusterv1.MachineDeploymentList{} 529 err := r.APIReader.List(ctx, mdList, 530 client.MatchingLabels{ 531 clusterv1.ClusterNameLabel: s.Current.Cluster.Name, 532 clusterv1.ClusterTopologyOwnedLabel: "", 533 }, 534 client.InNamespace(s.Current.Cluster.Namespace), 535 ) 536 if err != nil { 537 return nil, errors.Wrap(err, "failed to read MachineDeployments for managed topology") 538 } 539 540 currentMDs := sets.Set[string]{} 541 for _, md := range mdList.Items { 542 mdTopologyName, ok := md.ObjectMeta.Labels[clusterv1.ClusterTopologyMachineDeploymentNameLabel] 543 if ok || mdTopologyName != "" { 544 currentMDs.Insert(mdTopologyName) 545 } 546 } 547 return currentMDs, nil 548 } 549 550 // createMachineDeployment creates a MachineDeployment and the corresponding Templates. 551 func (r *Reconciler) createMachineDeployment(ctx context.Context, s *scope.Scope, md *scope.MachineDeploymentState) error { 552 // Do not create the MachineDeployment if it is marked as pending create. 553 // This will also block MHC creation because creating the MHC without the corresponding 554 // MachineDeployment is unnecessary. 555 mdTopologyName, ok := md.Object.Labels[clusterv1.ClusterTopologyMachineDeploymentNameLabel] 556 if !ok || mdTopologyName == "" { 557 // Note: This is only an additional safety check and should not happen. The label will always be added when computing 558 // the desired MachineDeployment. 559 return errors.Errorf("new MachineDeployment is missing the %q label", clusterv1.ClusterTopologyMachineDeploymentNameLabel) 560 } 561 // Return early if the MachineDeployment is pending create. 562 if s.UpgradeTracker.MachineDeployments.IsPendingCreate(mdTopologyName) { 563 return nil 564 } 565 566 log := tlog.LoggerFrom(ctx).WithMachineDeployment(md.Object) 567 cluster := s.Current.Cluster 568 infraCtx, _ := log.WithObject(md.InfrastructureMachineTemplate).Into(ctx) 569 if err := r.reconcileReferencedTemplate(infraCtx, reconcileReferencedTemplateInput{ 570 cluster: cluster, 571 desired: md.InfrastructureMachineTemplate, 572 }); err != nil { 573 return errors.Wrapf(err, "failed to create %s", md.Object.Kind) 574 } 575 576 bootstrapCtx, _ := log.WithObject(md.BootstrapTemplate).Into(ctx) 577 if err := r.reconcileReferencedTemplate(bootstrapCtx, reconcileReferencedTemplateInput{ 578 cluster: cluster, 579 desired: md.BootstrapTemplate, 580 }); err != nil { 581 return errors.Wrapf(err, "failed to create %s", md.Object.Kind) 582 } 583 584 log = log.WithObject(md.Object) 585 log.Infof(fmt.Sprintf("Creating %s", tlog.KObj{Obj: md.Object})) 586 helper, err := r.patchHelperFactory(ctx, nil, md.Object) 587 if err != nil { 588 return createErrorWithoutObjectName(ctx, err, md.Object) 589 } 590 if err := helper.Patch(ctx); err != nil { 591 return createErrorWithoutObjectName(ctx, err, md.Object) 592 } 593 r.recorder.Eventf(cluster, corev1.EventTypeNormal, createEventReason, "Created %q", tlog.KObj{Obj: md.Object}) 594 595 // Wait until MachineDeployment is visible in the cache. 596 // Note: We have to do this because otherwise using a cached client in current state could 597 // miss a newly created MachineDeployment (because the cache might be stale). 598 err = wait.PollUntilContextTimeout(ctx, 5*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) { 599 key := client.ObjectKey{Namespace: md.Object.Namespace, Name: md.Object.Name} 600 if err := r.Client.Get(ctx, key, &clusterv1.MachineDeployment{}); err != nil { 601 if apierrors.IsNotFound(err) { 602 return false, nil 603 } 604 return false, err 605 } 606 return true, nil 607 }) 608 if err != nil { 609 return errors.Wrapf(err, "failed waiting for MachineDeployment %s to be visible in the cache after create", md.Object.Kind) 610 } 611 612 // If the MachineDeployment has defined a MachineHealthCheck reconcile it. 613 if md.MachineHealthCheck != nil { 614 if err := r.reconcileMachineHealthCheck(ctx, nil, md.MachineHealthCheck); err != nil { 615 return err 616 } 617 } 618 return nil 619 } 620 621 // updateMachineDeployment updates a MachineDeployment. Also rotates the corresponding Templates if necessary. 622 func (r *Reconciler) updateMachineDeployment(ctx context.Context, s *scope.Scope, mdTopologyName string, currentMD, desiredMD *scope.MachineDeploymentState) error { 623 log := tlog.LoggerFrom(ctx).WithMachineDeployment(desiredMD.Object) 624 625 // Patch MachineHealthCheck for the MachineDeployment. 626 // MHC changes are not Kubernetes version dependent, therefore proceed with MHC reconciliation 627 // even if the MachineDeployment is pending an upgrade. 628 if desiredMD.MachineHealthCheck != nil || currentMD.MachineHealthCheck != nil { 629 if err := r.reconcileMachineHealthCheck(ctx, currentMD.MachineHealthCheck, desiredMD.MachineHealthCheck); err != nil { 630 return err 631 } 632 } 633 634 // Return early if the MachineDeployment is pending an upgrade. 635 // Do not reconcile the MachineDeployment yet to avoid updating the MachineDeployment while it is still pending a 636 // version upgrade. This will prevent the MachineDeployment from performing a double rollout. 637 if s.UpgradeTracker.MachineDeployments.IsPendingUpgrade(currentMD.Object.Name) { 638 return nil 639 } 640 641 cluster := s.Current.Cluster 642 infraCtx, _ := log.WithObject(desiredMD.InfrastructureMachineTemplate).Into(ctx) 643 if err := r.reconcileReferencedTemplate(infraCtx, reconcileReferencedTemplateInput{ 644 cluster: cluster, 645 ref: &desiredMD.Object.Spec.Template.Spec.InfrastructureRef, 646 current: currentMD.InfrastructureMachineTemplate, 647 desired: desiredMD.InfrastructureMachineTemplate, 648 templateNamePrefix: infrastructureMachineTemplateNamePrefix(cluster.Name, mdTopologyName), 649 compatibilityChecker: check.ObjectsAreCompatible, 650 }); err != nil { 651 return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: currentMD.Object}) 652 } 653 654 bootstrapCtx, _ := log.WithObject(desiredMD.BootstrapTemplate).Into(ctx) 655 if err := r.reconcileReferencedTemplate(bootstrapCtx, reconcileReferencedTemplateInput{ 656 cluster: cluster, 657 ref: desiredMD.Object.Spec.Template.Spec.Bootstrap.ConfigRef, 658 current: currentMD.BootstrapTemplate, 659 desired: desiredMD.BootstrapTemplate, 660 templateNamePrefix: bootstrapTemplateNamePrefix(cluster.Name, mdTopologyName), 661 compatibilityChecker: check.ObjectsAreInTheSameNamespace, 662 }); err != nil { 663 return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: currentMD.Object}) 664 } 665 666 // Check differences between current and desired MachineDeployment, and eventually patch the current object. 667 log = log.WithObject(desiredMD.Object) 668 patchHelper, err := r.patchHelperFactory(ctx, currentMD.Object, desiredMD.Object) 669 if err != nil { 670 return errors.Wrapf(err, "failed to create patch helper for %s", tlog.KObj{Obj: currentMD.Object}) 671 } 672 if !patchHelper.HasChanges() { 673 log.V(3).Infof("No changes for %s", tlog.KObj{Obj: currentMD.Object}) 674 return nil 675 } 676 677 log.Infof("Patching %s", tlog.KObj{Obj: currentMD.Object}) 678 if err := patchHelper.Patch(ctx); err != nil { 679 return errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: currentMD.Object}) 680 } 681 r.recorder.Eventf(cluster, corev1.EventTypeNormal, updateEventReason, "Updated %q%s", tlog.KObj{Obj: currentMD.Object}, logMachineDeploymentVersionChange(currentMD.Object, desiredMD.Object)) 682 683 // Wait until MachineDeployment is updated in the cache. 684 // Note: We have to do this because otherwise using a cached client in current state could 685 // return a stale state of a MachineDeployment we just patched (because the cache might be stale). 686 // Note: It is good enough to check that the resource version changed. Other controllers might have updated the 687 // MachineDeployment as well, but the combination of the patch call above without a conflict and a changed resource 688 // version here guarantees that we see the changes of our own update. 689 err = wait.PollUntilContextTimeout(ctx, 5*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) { 690 key := client.ObjectKey{Namespace: currentMD.Object.GetNamespace(), Name: currentMD.Object.GetName()} 691 cachedMD := &clusterv1.MachineDeployment{} 692 if err := r.Client.Get(ctx, key, cachedMD); err != nil { 693 return false, err 694 } 695 return currentMD.Object.GetResourceVersion() != cachedMD.GetResourceVersion(), nil 696 }) 697 if err != nil { 698 return errors.Wrapf(err, "failed waiting for MachineDeployment %s to be updated in the cache after patch", tlog.KObj{Obj: currentMD.Object}) 699 } 700 701 // We want to call both cleanup functions even if one of them fails to clean up as much as possible. 702 return nil 703 } 704 705 func logMachineDeploymentVersionChange(current, desired *clusterv1.MachineDeployment) string { 706 if current.Spec.Template.Spec.Version == nil || desired.Spec.Template.Spec.Version == nil { 707 return "" 708 } 709 710 if *current.Spec.Template.Spec.Version != *desired.Spec.Template.Spec.Version { 711 return fmt.Sprintf(" with version change from %s to %s", *current.Spec.Template.Spec.Version, *desired.Spec.Template.Spec.Version) 712 } 713 return "" 714 } 715 716 // deleteMachineDeployment deletes a MachineDeployment. 717 func (r *Reconciler) deleteMachineDeployment(ctx context.Context, cluster *clusterv1.Cluster, md *scope.MachineDeploymentState) error { 718 log := tlog.LoggerFrom(ctx).WithMachineDeployment(md.Object).WithObject(md.Object) 719 720 // delete MachineHealthCheck for the MachineDeployment. 721 if md.MachineHealthCheck != nil { 722 if err := r.reconcileMachineHealthCheck(ctx, md.MachineHealthCheck, nil); err != nil { 723 return err 724 } 725 } 726 log.Infof("Deleting %s", tlog.KObj{Obj: md.Object}) 727 if err := r.Client.Delete(ctx, md.Object); err != nil && !apierrors.IsNotFound(err) { 728 return errors.Wrapf(err, "failed to delete %s", tlog.KObj{Obj: md.Object}) 729 } 730 r.recorder.Eventf(cluster, corev1.EventTypeNormal, deleteEventReason, "Deleted %q", tlog.KObj{Obj: md.Object}) 731 return nil 732 } 733 734 // reconcileMachinePools reconciles the desired state of the MachinePool objects. 735 func (r *Reconciler) reconcileMachinePools(ctx context.Context, s *scope.Scope) error { 736 diff := calculateMachinePoolDiff(s.Current.MachinePools, s.Desired.MachinePools) 737 738 // Create MachinePools. 739 if len(diff.toCreate) > 0 { 740 // In current state we only got the MP list via a cached call. 741 // As a consequence, in order to prevent the creation of duplicate MP due to stale reads, 742 // we are now using a live client to double-check here that the MachinePool 743 // to be created doesn't exist yet. 744 currentMPTopologyNames, err := r.getCurrentMachinePools(ctx, s) 745 if err != nil { 746 return err 747 } 748 for _, mpTopologyName := range diff.toCreate { 749 mp := s.Desired.MachinePools[mpTopologyName] 750 751 // Skip the MP creation if the MP already exists. 752 if currentMPTopologyNames.Has(mpTopologyName) { 753 log := tlog.LoggerFrom(ctx).WithMachinePool(mp.Object) 754 log.V(3).Infof(fmt.Sprintf("Skipping creation of MachinePool %s because MachinePool for topology %s already exists (only considered creation because of stale cache)", tlog.KObj{Obj: mp.Object}, mpTopologyName)) 755 continue 756 } 757 758 if err := r.createMachinePool(ctx, s, mp); err != nil { 759 return err 760 } 761 } 762 } 763 764 // Update MachinePools. 765 for _, mpTopologyName := range diff.toUpdate { 766 currentMP := s.Current.MachinePools[mpTopologyName] 767 desiredMP := s.Desired.MachinePools[mpTopologyName] 768 if err := r.updateMachinePool(ctx, s, currentMP, desiredMP); err != nil { 769 return err 770 } 771 } 772 773 // Delete MachinePools. 774 for _, mpTopologyName := range diff.toDelete { 775 mp := s.Current.MachinePools[mpTopologyName] 776 if err := r.deleteMachinePool(ctx, s.Current.Cluster, mp); err != nil { 777 return err 778 } 779 } 780 781 return nil 782 } 783 784 // getCurrentMachinePools gets the current list of MachinePools via the APIReader. 785 func (r *Reconciler) getCurrentMachinePools(ctx context.Context, s *scope.Scope) (sets.Set[string], error) { 786 // TODO: We should consider using PartialObjectMetadataList here. Currently this doesn't work as our 787 // implementation for topology dryrun doesn't support PartialObjectMetadataList. 788 mpList := &expv1.MachinePoolList{} 789 err := r.APIReader.List(ctx, mpList, 790 client.MatchingLabels{ 791 clusterv1.ClusterNameLabel: s.Current.Cluster.Name, 792 clusterv1.ClusterTopologyOwnedLabel: "", 793 }, 794 client.InNamespace(s.Current.Cluster.Namespace), 795 ) 796 if err != nil { 797 return nil, errors.Wrap(err, "failed to read MachinePools for managed topology") 798 } 799 800 currentMPs := sets.Set[string]{} 801 for _, mp := range mpList.Items { 802 mpTopologyName, ok := mp.ObjectMeta.Labels[clusterv1.ClusterTopologyMachinePoolNameLabel] 803 if ok || mpTopologyName != "" { 804 currentMPs.Insert(mpTopologyName) 805 } 806 } 807 return currentMPs, nil 808 } 809 810 // createMachinePool creates a MachinePool and the corresponding templates. 811 func (r *Reconciler) createMachinePool(ctx context.Context, s *scope.Scope, mp *scope.MachinePoolState) error { 812 // Do not create the MachinePool if it is marked as pending create. 813 mpTopologyName, ok := mp.Object.Labels[clusterv1.ClusterTopologyMachinePoolNameLabel] 814 if !ok || mpTopologyName == "" { 815 // Note: This is only an additional safety check and should not happen. The label will always be added when computing 816 // the desired MachinePool. 817 return errors.Errorf("new MachinePool is missing the %q label", clusterv1.ClusterTopologyMachinePoolNameLabel) 818 } 819 // Return early if the MachinePool is pending create. 820 if s.UpgradeTracker.MachinePools.IsPendingCreate(mpTopologyName) { 821 return nil 822 } 823 824 log := tlog.LoggerFrom(ctx).WithMachinePool(mp.Object) 825 cluster := s.Current.Cluster 826 infraCtx, _ := log.WithObject(mp.InfrastructureMachinePoolObject).Into(ctx) 827 if err := r.reconcileReferencedObject(infraCtx, reconcileReferencedObjectInput{ 828 cluster: cluster, 829 desired: mp.InfrastructureMachinePoolObject, 830 }); err != nil { 831 return errors.Wrapf(err, "failed to create %s", mp.Object.Kind) 832 } 833 834 bootstrapCtx, _ := log.WithObject(mp.BootstrapObject).Into(ctx) 835 if err := r.reconcileReferencedObject(bootstrapCtx, reconcileReferencedObjectInput{ 836 cluster: cluster, 837 desired: mp.BootstrapObject, 838 }); err != nil { 839 return errors.Wrapf(err, "failed to create %s", mp.Object.Kind) 840 } 841 842 log = log.WithObject(mp.Object) 843 log.Infof(fmt.Sprintf("Creating %s", tlog.KObj{Obj: mp.Object})) 844 helper, err := r.patchHelperFactory(ctx, nil, mp.Object) 845 if err != nil { 846 return createErrorWithoutObjectName(ctx, err, mp.Object) 847 } 848 if err := helper.Patch(ctx); err != nil { 849 return createErrorWithoutObjectName(ctx, err, mp.Object) 850 } 851 r.recorder.Eventf(cluster, corev1.EventTypeNormal, createEventReason, "Created %q", tlog.KObj{Obj: mp.Object}) 852 853 // Wait until MachinePool is visible in the cache. 854 // Note: We have to do this because otherwise using a cached client in current state could 855 // miss a newly created MachinePool (because the cache might be stale). 856 err = wait.PollUntilContextTimeout(ctx, 5*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) { 857 key := client.ObjectKey{Namespace: mp.Object.Namespace, Name: mp.Object.Name} 858 if err := r.Client.Get(ctx, key, &expv1.MachinePool{}); err != nil { 859 if apierrors.IsNotFound(err) { 860 return false, nil 861 } 862 return false, err 863 } 864 return true, nil 865 }) 866 if err != nil { 867 return errors.Wrapf(err, "failed waiting for MachinePool %s to be visible in the cache after create", mp.Object.Kind) 868 } 869 870 return nil 871 } 872 873 // updateMachinePool updates a MachinePool. Also updates the corresponding objects if necessary. 874 func (r *Reconciler) updateMachinePool(ctx context.Context, s *scope.Scope, currentMP, desiredMP *scope.MachinePoolState) error { 875 log := tlog.LoggerFrom(ctx).WithMachinePool(desiredMP.Object) 876 877 // Return early if the MachinePool is pending an upgrade. 878 // Do not reconcile the MachinePool yet to avoid updating the MachinePool while it is still pending a 879 // version upgrade. This will prevent the MachinePool from performing a double rollout. 880 if s.UpgradeTracker.MachinePools.IsPendingUpgrade(currentMP.Object.Name) { 881 return nil 882 } 883 884 cluster := s.Current.Cluster 885 infraCtx, _ := log.WithObject(desiredMP.InfrastructureMachinePoolObject).Into(ctx) 886 if err := r.reconcileReferencedObject(infraCtx, reconcileReferencedObjectInput{ 887 cluster: cluster, 888 current: currentMP.InfrastructureMachinePoolObject, 889 desired: desiredMP.InfrastructureMachinePoolObject, 890 }); err != nil { 891 return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: currentMP.Object}) 892 } 893 894 bootstrapCtx, _ := log.WithObject(desiredMP.BootstrapObject).Into(ctx) 895 if err := r.reconcileReferencedObject(bootstrapCtx, reconcileReferencedObjectInput{ 896 cluster: cluster, 897 current: currentMP.BootstrapObject, 898 desired: desiredMP.BootstrapObject, 899 }); err != nil { 900 return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: currentMP.Object}) 901 } 902 903 // Check differences between current and desired MachinePool, and eventually patch the current object. 904 log = log.WithObject(desiredMP.Object) 905 patchHelper, err := r.patchHelperFactory(ctx, currentMP.Object, desiredMP.Object) 906 if err != nil { 907 return errors.Wrapf(err, "failed to create patch helper for %s", tlog.KObj{Obj: currentMP.Object}) 908 } 909 if !patchHelper.HasChanges() { 910 log.V(3).Infof("No changes for %s", tlog.KObj{Obj: currentMP.Object}) 911 return nil 912 } 913 914 log.Infof("Patching %s", tlog.KObj{Obj: currentMP.Object}) 915 if err := patchHelper.Patch(ctx); err != nil { 916 return errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: currentMP.Object}) 917 } 918 r.recorder.Eventf(cluster, corev1.EventTypeNormal, updateEventReason, "Updated %q%s", tlog.KObj{Obj: currentMP.Object}, logMachinePoolVersionChange(currentMP.Object, desiredMP.Object)) 919 920 // Wait until MachinePool is updated in the cache. 921 // Note: We have to do this because otherwise using a cached client in current state could 922 // return a stale state of a MachinePool we just patched (because the cache might be stale). 923 // Note: It is good enough to check that the resource version changed. Other controllers might have updated the 924 // MachinePool as well, but the combination of the patch call above without a conflict and a changed resource 925 // version here guarantees that we see the changes of our own update. 926 err = wait.PollUntilContextTimeout(ctx, 5*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) { 927 key := client.ObjectKey{Namespace: currentMP.Object.GetNamespace(), Name: currentMP.Object.GetName()} 928 cachedMP := &expv1.MachinePool{} 929 if err := r.Client.Get(ctx, key, cachedMP); err != nil { 930 return false, err 931 } 932 return currentMP.Object.GetResourceVersion() != cachedMP.GetResourceVersion(), nil 933 }) 934 if err != nil { 935 return errors.Wrapf(err, "failed waiting for MachinePool %s to be updated in the cache after patch", tlog.KObj{Obj: currentMP.Object}) 936 } 937 938 // We want to call both cleanup functions even if one of them fails to clean up as much as possible. 939 return nil 940 } 941 942 func logMachinePoolVersionChange(current, desired *expv1.MachinePool) string { 943 if current.Spec.Template.Spec.Version == nil || desired.Spec.Template.Spec.Version == nil { 944 return "" 945 } 946 947 if *current.Spec.Template.Spec.Version != *desired.Spec.Template.Spec.Version { 948 return fmt.Sprintf(" with version change from %s to %s", *current.Spec.Template.Spec.Version, *desired.Spec.Template.Spec.Version) 949 } 950 return "" 951 } 952 953 // deleteMachinePool deletes a MachinePool. 954 func (r *Reconciler) deleteMachinePool(ctx context.Context, cluster *clusterv1.Cluster, mp *scope.MachinePoolState) error { 955 log := tlog.LoggerFrom(ctx).WithMachinePool(mp.Object).WithObject(mp.Object) 956 log.Infof("Deleting %s", tlog.KObj{Obj: mp.Object}) 957 if err := r.Client.Delete(ctx, mp.Object); err != nil && !apierrors.IsNotFound(err) { 958 return errors.Wrapf(err, "failed to delete %s", tlog.KObj{Obj: mp.Object}) 959 } 960 r.recorder.Eventf(cluster, corev1.EventTypeNormal, deleteEventReason, "Deleted %q", tlog.KObj{Obj: mp.Object}) 961 return nil 962 } 963 964 type machineDiff struct { 965 toCreate, toUpdate, toDelete []string 966 } 967 968 // calculateMachineDeploymentDiff compares two maps of MachineDeploymentState and calculates which 969 // MachineDeployments should be created, updated or deleted. 970 func calculateMachineDeploymentDiff(current, desired map[string]*scope.MachineDeploymentState) machineDiff { 971 var diff machineDiff 972 973 for md := range desired { 974 if _, ok := current[md]; ok { 975 diff.toUpdate = append(diff.toUpdate, md) 976 } else { 977 diff.toCreate = append(diff.toCreate, md) 978 } 979 } 980 981 for md := range current { 982 if _, ok := desired[md]; !ok { 983 diff.toDelete = append(diff.toDelete, md) 984 } 985 } 986 987 return diff 988 } 989 990 // calculateMachinePoolDiff compares two maps of MachinePoolState and calculates which 991 // MachinePools should be created, updated or deleted. 992 func calculateMachinePoolDiff(current, desired map[string]*scope.MachinePoolState) machineDiff { 993 var diff machineDiff 994 995 for mp := range desired { 996 if _, ok := current[mp]; ok { 997 diff.toUpdate = append(diff.toUpdate, mp) 998 } else { 999 diff.toCreate = append(diff.toCreate, mp) 1000 } 1001 } 1002 1003 for mp := range current { 1004 if _, ok := desired[mp]; !ok { 1005 diff.toDelete = append(diff.toDelete, mp) 1006 } 1007 } 1008 1009 return diff 1010 } 1011 1012 type unstructuredVersionGetter func(obj *unstructured.Unstructured) (*string, error) 1013 1014 type reconcileReferencedObjectInput struct { 1015 cluster *clusterv1.Cluster 1016 current *unstructured.Unstructured 1017 desired *unstructured.Unstructured 1018 versionGetter unstructuredVersionGetter 1019 ignorePaths []contract.Path 1020 } 1021 1022 // reconcileReferencedObject reconciles the desired state of the referenced object. 1023 // NOTE: After a referenced object is created it is assumed that the reference should 1024 // never change (only the content of the object can eventually change). Thus, we are checking for strict compatibility. 1025 func (r *Reconciler) reconcileReferencedObject(ctx context.Context, in reconcileReferencedObjectInput) error { 1026 log := tlog.LoggerFrom(ctx) 1027 1028 // If there is no current object, create it. 1029 if in.current == nil { 1030 log.Infof("Creating %s", tlog.KObj{Obj: in.desired}) 1031 helper, err := r.patchHelperFactory(ctx, nil, in.desired, structuredmerge.IgnorePaths(in.ignorePaths)) 1032 if err != nil { 1033 return errors.Wrap(createErrorWithoutObjectName(ctx, err, in.desired), "failed to create patch helper") 1034 } 1035 if err := helper.Patch(ctx); err != nil { 1036 return createErrorWithoutObjectName(ctx, err, in.desired) 1037 } 1038 r.recorder.Eventf(in.cluster, corev1.EventTypeNormal, createEventReason, "Created %q", tlog.KObj{Obj: in.desired}) 1039 return nil 1040 } 1041 1042 // Check if the current and desired referenced object are compatible. 1043 if allErrs := check.ObjectsAreStrictlyCompatible(in.current, in.desired); len(allErrs) > 0 { 1044 return allErrs.ToAggregate() 1045 } 1046 1047 // Check differences between current and desired state, and eventually patch the current object. 1048 patchHelper, err := r.patchHelperFactory(ctx, in.current, in.desired, structuredmerge.IgnorePaths(in.ignorePaths)) 1049 if err != nil { 1050 return errors.Wrapf(err, "failed to create patch helper for %s", tlog.KObj{Obj: in.current}) 1051 } 1052 if !patchHelper.HasChanges() { 1053 log.V(3).Infof("No changes for %s", tlog.KObj{Obj: in.desired}) 1054 return nil 1055 } 1056 1057 log.Infof("Patching %s", tlog.KObj{Obj: in.desired}) 1058 if err := patchHelper.Patch(ctx); err != nil { 1059 return errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: in.current}) 1060 } 1061 r.recorder.Eventf(in.cluster, corev1.EventTypeNormal, updateEventReason, "Updated %q%s", tlog.KObj{Obj: in.desired}, logUnstructuredVersionChange(in.current, in.desired, in.versionGetter)) 1062 return nil 1063 } 1064 1065 func logUnstructuredVersionChange(current, desired *unstructured.Unstructured, versionGetter unstructuredVersionGetter) string { 1066 if versionGetter == nil { 1067 return "" 1068 } 1069 1070 currentVersion, err := versionGetter(current) 1071 if err != nil || currentVersion == nil { 1072 return "" 1073 } 1074 desiredVersion, err := versionGetter(desired) 1075 if err != nil || desiredVersion == nil { 1076 return "" 1077 } 1078 1079 if *currentVersion != *desiredVersion { 1080 return fmt.Sprintf(" with version change from %s to %s", *currentVersion, *desiredVersion) 1081 } 1082 return "" 1083 } 1084 1085 type reconcileReferencedTemplateInput struct { 1086 cluster *clusterv1.Cluster 1087 ref *corev1.ObjectReference 1088 current *unstructured.Unstructured 1089 desired *unstructured.Unstructured 1090 templateNamePrefix string 1091 compatibilityChecker func(current, desired client.Object) field.ErrorList 1092 } 1093 1094 // reconcileReferencedTemplate reconciles the desired state of a referenced Template. 1095 // NOTE: According to Cluster API operational practices, when a referenced Template changes a template rotation is required: 1096 // 1. create a new Template 1097 // 2. update the reference 1098 // 3. delete the old Template 1099 // This function specifically takes care of the first step and updates the reference locally. So the remaining steps 1100 // can be executed afterwards. 1101 // NOTE: This func has a side effect in case of template rotation, changing both the desired object and the object reference. 1102 func (r *Reconciler) reconcileReferencedTemplate(ctx context.Context, in reconcileReferencedTemplateInput) error { 1103 log := tlog.LoggerFrom(ctx) 1104 1105 // If there is no current object, create the desired object. 1106 if in.current == nil { 1107 log.Infof("Creating %s", tlog.KObj{Obj: in.desired}) 1108 helper, err := r.patchHelperFactory(ctx, nil, in.desired) 1109 if err != nil { 1110 return errors.Wrap(createErrorWithoutObjectName(ctx, err, in.desired), "failed to create patch helper") 1111 } 1112 if err := helper.Patch(ctx); err != nil { 1113 return createErrorWithoutObjectName(ctx, err, in.desired) 1114 } 1115 r.recorder.Eventf(in.cluster, corev1.EventTypeNormal, createEventReason, "Created %q", tlog.KObj{Obj: in.desired}) 1116 return nil 1117 } 1118 1119 if in.ref == nil { 1120 return errors.Errorf("failed to rotate %s: ref should not be nil", in.desired.GroupVersionKind()) 1121 } 1122 1123 // Check if the current and desired referenced object are compatible. 1124 if allErrs := in.compatibilityChecker(in.current, in.desired); len(allErrs) > 0 { 1125 return allErrs.ToAggregate() 1126 } 1127 1128 // Check differences between current and desired objects, and if there are changes eventually start the template rotation. 1129 patchHelper, err := r.patchHelperFactory(ctx, in.current, in.desired) 1130 if err != nil { 1131 return errors.Wrapf(err, "failed to create patch helper for %s", tlog.KObj{Obj: in.current}) 1132 } 1133 1134 // Return if no changes are detected. 1135 if !patchHelper.HasChanges() { 1136 log.V(3).Infof("No changes for %s", tlog.KObj{Obj: in.desired}) 1137 return nil 1138 } 1139 1140 // If there are no changes in the spec, and thus only changes in metadata, instead of doing a full template 1141 // rotation we patch the object in place. This avoids recreating machines. 1142 if !patchHelper.HasSpecChanges() { 1143 log.Infof("Patching %s", tlog.KObj{Obj: in.desired}) 1144 if err := patchHelper.Patch(ctx); err != nil { 1145 return errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: in.desired}) 1146 } 1147 r.recorder.Eventf(in.cluster, corev1.EventTypeNormal, updateEventReason, "Updated %q (metadata changes)", tlog.KObj{Obj: in.desired}) 1148 return nil 1149 } 1150 1151 // Create the new template. 1152 1153 // NOTE: it is required to assign a new name, because during compute the desired object name is enforced to be equal to the current one. 1154 // TODO: find a way to make side effect more explicit 1155 newName := names.SimpleNameGenerator.GenerateName(in.templateNamePrefix) 1156 in.desired.SetName(newName) 1157 1158 log.Infof("Rotating %s, new name %s", tlog.KObj{Obj: in.current}, newName) 1159 log.Infof("Creating %s", tlog.KObj{Obj: in.desired}) 1160 helper, err := r.patchHelperFactory(ctx, nil, in.desired) 1161 if err != nil { 1162 return errors.Wrap(createErrorWithoutObjectName(ctx, err, in.desired), "failed to create patch helper") 1163 } 1164 if err := helper.Patch(ctx); err != nil { 1165 return createErrorWithoutObjectName(ctx, err, in.desired) 1166 } 1167 r.recorder.Eventf(in.cluster, corev1.EventTypeNormal, createEventReason, "Created %q as a replacement for %q (template rotation)", tlog.KObj{Obj: in.desired}, in.ref.Name) 1168 1169 // Update the reference with the new name. 1170 // NOTE: Updating the object hosting reference to the template is executed outside this func. 1171 // TODO: find a way to make side effect more explicit 1172 in.ref.Name = newName 1173 1174 return nil 1175 } 1176 1177 // createErrorWithoutObjectName removes the name of the object from the error message. As each new Create call involves an 1178 // object with a unique generated name each error appears to be a different error. As the errors are being surfaced in a condition 1179 // on the Cluster, the name is removed here to prevent each creation error from triggering a new reconciliation. 1180 func createErrorWithoutObjectName(ctx context.Context, err error, obj client.Object) error { 1181 log := ctrl.LoggerFrom(ctx) 1182 if obj != nil { 1183 log = log.WithValues(obj.GetObjectKind().GroupVersionKind().Kind, klog.KObj(obj)) 1184 } 1185 log.Error(err, "Failed to create object") 1186 1187 var statusError *apierrors.StatusError 1188 if errors.As(err, &statusError) { 1189 var msg string 1190 if statusError.Status().Details != nil { 1191 var causes []string 1192 for _, cause := range statusError.Status().Details.Causes { 1193 causes = append(causes, fmt.Sprintf("%s: %s: %s", cause.Type, cause.Field, cause.Message)) 1194 } 1195 if len(causes) > 0 { 1196 msg = fmt.Sprintf("failed to create %s.%s: %s", statusError.Status().Details.Kind, statusError.Status().Details.Group, strings.Join(causes, " ")) 1197 } else { 1198 msg = fmt.Sprintf("failed to create %s.%s", statusError.Status().Details.Kind, statusError.Status().Details.Group) 1199 } 1200 statusError.ErrStatus.Message = msg 1201 return statusError 1202 } 1203 1204 if statusError.Status().Message != "" { 1205 if obj != nil { 1206 msg = fmt.Sprintf("failed to create %s", obj.GetObjectKind().GroupVersionKind().GroupKind().String()) 1207 } else { 1208 msg = "failed to create object" 1209 } 1210 } 1211 statusError.ErrStatus.Message = msg 1212 return statusError 1213 } 1214 // If this isn't a StatusError return a more generic error with the object details. 1215 if obj != nil { 1216 return errors.Errorf("failed to create %s", obj.GetObjectKind().GroupVersionKind().GroupKind().String()) 1217 } 1218 return errors.New("failed to create object") 1219 }