sigs.k8s.io/cluster-api@v1.7.1/internal/controllers/machinedeployment/machinedeployment_sync.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package machinedeployment 18 19 import ( 20 "context" 21 "fmt" 22 "sort" 23 "time" 24 25 "github.com/pkg/errors" 26 corev1 "k8s.io/api/core/v1" 27 apierrors "k8s.io/apimachinery/pkg/api/errors" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/apimachinery/pkg/types" 30 kerrors "k8s.io/apimachinery/pkg/util/errors" 31 apirand "k8s.io/apimachinery/pkg/util/rand" 32 "k8s.io/apimachinery/pkg/util/sets" 33 "k8s.io/apimachinery/pkg/util/wait" 34 "k8s.io/klog/v2" 35 "k8s.io/utils/ptr" 36 ctrl "sigs.k8s.io/controller-runtime" 37 "sigs.k8s.io/controller-runtime/pkg/client" 38 39 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 40 "sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil" 41 "sigs.k8s.io/cluster-api/internal/util/hash" 42 "sigs.k8s.io/cluster-api/internal/util/ssa" 43 "sigs.k8s.io/cluster-api/util/conditions" 44 "sigs.k8s.io/cluster-api/util/patch" 45 ) 46 47 // sync is responsible for reconciling deployments on scaling events or when they 48 // are paused. 49 func (r *Reconciler) sync(ctx context.Context, md *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet) error { 50 newMS, oldMSs, err := r.getAllMachineSetsAndSyncRevision(ctx, md, msList, false) 51 if err != nil { 52 return err 53 } 54 55 if err := r.scale(ctx, md, newMS, oldMSs); err != nil { 56 // If we get an error while trying to scale, the deployment will be requeued 57 // so we can abort this resync 58 return err 59 } 60 61 // 62 // // TODO: Clean up the deployment when it's paused and no rollback is in flight. 63 // 64 allMSs := append(oldMSs, newMS) 65 return r.syncDeploymentStatus(allMSs, newMS, md) 66 } 67 68 // getAllMachineSetsAndSyncRevision returns all the machine sets for the provided deployment (new and all old), with new MS's and deployment's revision updated. 69 // 70 // msList should come from getMachineSetsForDeployment(d). 71 // machineMap should come from getMachineMapForDeployment(d, msList). 72 // 73 // 1. Get all old MSes this deployment targets, and calculate the max revision number among them (maxOldV). 74 // 2. Get new MS this deployment targets (whose machine template matches deployment's), and update new MS's revision number to (maxOldV + 1), 75 // only if its revision number is smaller than (maxOldV + 1). If this step failed, we'll update it in the next deployment sync loop. 76 // 3. Copy new MS's revision number to deployment (update deployment's revision). If this step failed, we'll update it in the next deployment sync loop. 77 // 78 // Note that currently the deployment controller is using caches to avoid querying the server for reads. 79 // This may lead to stale reads of machine sets, thus incorrect deployment status. 80 func (r *Reconciler) getAllMachineSetsAndSyncRevision(ctx context.Context, md *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet, createIfNotExisted bool) (*clusterv1.MachineSet, []*clusterv1.MachineSet, error) { 81 reconciliationTime := metav1.Now() 82 allOldMSs := mdutil.FindOldMachineSets(md, msList, &reconciliationTime) 83 84 // Get new machine set with the updated revision number 85 newMS, err := r.getNewMachineSet(ctx, md, msList, allOldMSs, createIfNotExisted, &reconciliationTime) 86 if err != nil { 87 return nil, nil, err 88 } 89 90 return newMS, allOldMSs, nil 91 } 92 93 // Returns a MachineSet that matches the intent of the given MachineDeployment. 94 // If there does not exist such a MachineSet and createIfNotExisted is true, create a new MachineSet. 95 // If there is already such a MachineSet, update it to propagate in-place mutable fields from the MachineDeployment. 96 func (r *Reconciler) getNewMachineSet(ctx context.Context, md *clusterv1.MachineDeployment, msList, oldMSs []*clusterv1.MachineSet, createIfNotExists bool, reconciliationTime *metav1.Time) (*clusterv1.MachineSet, error) { 97 // Try to find a MachineSet which matches the MachineDeployments intent, while ignore diffs between 98 // the in-place mutable fields. 99 // If we find a matching MachineSet we just update it to propagate any changes to the in-place mutable 100 // fields and thus we do not trigger an unnecessary rollout (i.e. create a new MachineSet). 101 // If we don't find a matching MachineSet, we need a rollout and thus create a new MachineSet. 102 // Note: The in-place mutable fields can be just updated inline, because they do not affect the actual machines 103 // themselves (i.e. the infrastructure and the software running on the Machines not the Machine object). 104 matchingMS := mdutil.FindNewMachineSet(md, msList, reconciliationTime) 105 106 // If there is a MachineSet that matches the intent of the MachineDeployment, update the MachineSet 107 // to propagate all in-place mutable fields from MachineDeployment to the MachineSet. 108 if matchingMS != nil { 109 updatedMS, err := r.updateMachineSet(ctx, md, matchingMS, oldMSs) 110 if err != nil { 111 return nil, err 112 } 113 114 // Ensure MachineDeployment has the latest MachineSet revision in its revision annotation. 115 mdutil.SetDeploymentRevision(md, updatedMS.Annotations[clusterv1.RevisionAnnotation]) 116 return updatedMS, nil 117 } 118 119 if !createIfNotExists { 120 return nil, nil 121 } 122 123 // Create a new MachineSet and wait until the new MachineSet exists in the cache. 124 newMS, err := r.createMachineSetAndWait(ctx, md, oldMSs) 125 if err != nil { 126 return nil, err 127 } 128 129 mdutil.SetDeploymentRevision(md, newMS.Annotations[clusterv1.RevisionAnnotation]) 130 131 return newMS, nil 132 } 133 134 // updateMachineSet updates an existing MachineSet to propagate in-place mutable fields from the MachineDeployment. 135 func (r *Reconciler) updateMachineSet(ctx context.Context, deployment *clusterv1.MachineDeployment, ms *clusterv1.MachineSet, oldMSs []*clusterv1.MachineSet) (*clusterv1.MachineSet, error) { 136 log := ctrl.LoggerFrom(ctx) 137 138 // Compute the desired MachineSet. 139 updatedMS, err := r.computeDesiredMachineSet(ctx, deployment, ms, oldMSs) 140 if err != nil { 141 return nil, errors.Wrapf(err, "failed to update MachineSet %q", klog.KObj(ms)) 142 } 143 144 // Update the MachineSet to propagate in-place mutable fields from the MachineDeployment. 145 err = ssa.Patch(ctx, r.Client, machineDeploymentManagerName, updatedMS, ssa.WithCachingProxy{Cache: r.ssaCache, Original: ms}) 146 if err != nil { 147 r.recorder.Eventf(deployment, corev1.EventTypeWarning, "FailedUpdate", "Failed to update MachineSet %s: %v", klog.KObj(updatedMS), err) 148 return nil, errors.Wrapf(err, "failed to update MachineSet %s", klog.KObj(updatedMS)) 149 } 150 151 log.V(4).Info("Updated MachineSet", "MachineSet", klog.KObj(updatedMS)) 152 return updatedMS, nil 153 } 154 155 // createMachineSetAndWait creates a new MachineSet with the desired intent of the MachineDeployment. 156 // It waits for the cache to be updated with the newly created MachineSet. 157 func (r *Reconciler) createMachineSetAndWait(ctx context.Context, deployment *clusterv1.MachineDeployment, oldMSs []*clusterv1.MachineSet) (*clusterv1.MachineSet, error) { 158 log := ctrl.LoggerFrom(ctx) 159 160 // Compute the desired MachineSet. 161 newMS, err := r.computeDesiredMachineSet(ctx, deployment, nil, oldMSs) 162 if err != nil { 163 return nil, errors.Wrap(err, "failed to create new MachineSet") 164 } 165 166 // Create the MachineSet. 167 if err := ssa.Patch(ctx, r.Client, machineDeploymentManagerName, newMS); err != nil { 168 r.recorder.Eventf(deployment, corev1.EventTypeWarning, "FailedCreate", "Failed to create MachineSet %s: %v", klog.KObj(newMS), err) 169 return nil, errors.Wrapf(err, "failed to create new MachineSet %s", klog.KObj(newMS)) 170 } 171 log.V(4).Info("Created new MachineSet", "MachineSet", klog.KObj(newMS)) 172 r.recorder.Eventf(deployment, corev1.EventTypeNormal, "SuccessfulCreate", "Created MachineSet %s", klog.KObj(newMS)) 173 174 // Keep trying to get the MachineSet. This will force the cache to update and prevent any future reconciliation of 175 // the MachineDeployment to reconcile with an outdated list of MachineSets which could lead to unwanted creation of 176 // a duplicate MachineSet. 177 var pollErrors []error 178 if err := wait.PollUntilContextTimeout(ctx, 100*time.Millisecond, 10*time.Second, true, func(ctx context.Context) (bool, error) { 179 ms := &clusterv1.MachineSet{} 180 if err := r.Client.Get(ctx, client.ObjectKeyFromObject(newMS), ms); err != nil { 181 // Do not return error here. Continue to poll even if we hit an error 182 // so that we avoid existing because of transient errors like network flakes. 183 // Capture all the errors and return the aggregate error if the poll fails eventually. 184 pollErrors = append(pollErrors, err) 185 return false, nil 186 } 187 return true, nil 188 }); err != nil { 189 return nil, errors.Wrapf(kerrors.NewAggregate(pollErrors), "failed to get the MachineSet %s after creation", klog.KObj(newMS)) 190 } 191 return newMS, nil 192 } 193 194 // computeDesiredMachineSet computes the desired MachineSet. 195 // This MachineSet will be used during reconciliation to: 196 // * create a MachineSet 197 // * update an existing MachineSet 198 // Because we are using Server-Side-Apply we always have to calculate the full object. 199 // There are small differences in how we calculate the MachineSet depending on if it 200 // is a create or update. Example: for a new MachineSet we have to calculate a new name, 201 // while for an existing MachineSet we have to use the name of the existing MachineSet. 202 func (r *Reconciler) computeDesiredMachineSet(ctx context.Context, deployment *clusterv1.MachineDeployment, existingMS *clusterv1.MachineSet, oldMSs []*clusterv1.MachineSet) (*clusterv1.MachineSet, error) { 203 var name string 204 var uid types.UID 205 var finalizers []string 206 var uniqueIdentifierLabelValue string 207 var machineTemplateSpec clusterv1.MachineSpec 208 var replicas int32 209 var err error 210 211 // For a new MachineSet: 212 // * compute a new uniqueIdentifier, a new MachineSet name, finalizers, replicas and 213 // machine template spec (take the one from MachineDeployment) 214 if existingMS == nil { 215 // Note: In previous Cluster API versions (< v1.4.0), the label value was the hash of the full machine 216 // template. With the introduction of in-place mutation the machine template of the MachineSet can change. 217 // Because of that it is impossible that the label's value to always be the hash of the full machine template. 218 // (Because the hash changes when the machine template changes). 219 // As a result, we use the hash of the machine template while ignoring all in-place mutable fields, i.e. the 220 // machine template with only fields that could trigger a rollout for the machine-template-hash, making it 221 // independent of the changes to any in-place mutable fields. 222 templateHash, err := hash.Compute(mdutil.MachineTemplateDeepCopyRolloutFields(&deployment.Spec.Template)) 223 if err != nil { 224 return nil, errors.Wrap(err, "failed to compute desired MachineSet: failed to compute machine template hash") 225 } 226 // Append a random string at the end of template hash. This is required to distinguish MachineSets that 227 // could be created with the same spec as a result of rolloutAfter. If not, computeDesiredMachineSet 228 // will end up updating the existing MachineSet instead of creating a new one. 229 var randomSuffix string 230 name, randomSuffix = computeNewMachineSetName(deployment.Name + "-") 231 uniqueIdentifierLabelValue = fmt.Sprintf("%d-%s", templateHash, randomSuffix) 232 233 // Add foregroundDeletion finalizer to MachineSet if the MachineDeployment has it. 234 if sets.New[string](deployment.Finalizers...).Has(metav1.FinalizerDeleteDependents) { 235 finalizers = []string{metav1.FinalizerDeleteDependents} 236 } 237 238 replicas, err = mdutil.NewMSNewReplicas(deployment, oldMSs, 0) 239 if err != nil { 240 return nil, errors.Wrap(err, "failed to compute desired MachineSet") 241 } 242 243 machineTemplateSpec = *deployment.Spec.Template.Spec.DeepCopy() 244 } else { 245 // For updating an existing MachineSet: 246 // * get the uniqueIdentifier from labels of the existingMS 247 // * use name, uid, finalizers, replicas and machine template spec from existingMS. 248 // Note: We use the uid, to ensure that the Server-Side-Apply only updates existingMS. 249 // Note: We carry over those fields because we don't want to mutate them for an existingMS. 250 var uniqueIdentifierLabelExists bool 251 uniqueIdentifierLabelValue, uniqueIdentifierLabelExists = existingMS.Labels[clusterv1.MachineDeploymentUniqueLabel] 252 if !uniqueIdentifierLabelExists { 253 return nil, errors.Errorf("failed to compute desired MachineSet: failed to get unique identifier from %q annotation", 254 clusterv1.MachineDeploymentUniqueLabel) 255 } 256 257 name = existingMS.Name 258 uid = existingMS.UID 259 260 // Keep foregroundDeletion finalizer if the existingMS has it. 261 // Note: This case is a little different from the create case. In the update case we preserve 262 // the finalizer on the MachineSet if it already exists. Because of SSA we should not build 263 // the finalizer information from the MachineDeployment when updating a MachineSet because that could lead 264 // to dropping the finalizer from the MachineSet if it is dropped from the MachineDeployment. 265 // We should not drop the finalizer on the MachineSet if the finalizer is dropped from the MachineDeployment. 266 if sets.New[string](existingMS.Finalizers...).Has(metav1.FinalizerDeleteDependents) { 267 finalizers = []string{metav1.FinalizerDeleteDependents} 268 } 269 270 replicas = *existingMS.Spec.Replicas 271 272 machineTemplateSpec = *existingMS.Spec.Template.Spec.DeepCopy() 273 } 274 275 // Construct the basic MachineSet. 276 desiredMS := &clusterv1.MachineSet{ 277 TypeMeta: metav1.TypeMeta{ 278 APIVersion: clusterv1.GroupVersion.String(), 279 Kind: "MachineSet", 280 }, 281 ObjectMeta: metav1.ObjectMeta{ 282 Name: name, 283 Namespace: deployment.Namespace, 284 // Note: By setting the ownerRef on creation we signal to the MachineSet controller that this is not a stand-alone MachineSet. 285 OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(deployment, machineDeploymentKind)}, 286 UID: uid, 287 Finalizers: finalizers, 288 }, 289 Spec: clusterv1.MachineSetSpec{ 290 Replicas: &replicas, 291 ClusterName: deployment.Spec.ClusterName, 292 Template: clusterv1.MachineTemplateSpec{ 293 Spec: machineTemplateSpec, 294 }, 295 }, 296 } 297 298 // Set the in-place mutable fields. 299 // When we create a new MachineSet we will just create the MachineSet with those fields. 300 // When we update an existing MachineSet will we update the fields on the existing MachineSet (in-place mutate). 301 302 // Set labels and .spec.template.labels. 303 desiredMS.Labels = mdutil.CloneAndAddLabel(deployment.Spec.Template.Labels, 304 clusterv1.MachineDeploymentUniqueLabel, uniqueIdentifierLabelValue) 305 // Always set the MachineDeploymentNameLabel. 306 // Note: If a client tries to create a MachineDeployment without a selector, the MachineDeployment webhook 307 // will add this label automatically. But we want this label to always be present even if the MachineDeployment 308 // has a selector which doesn't include it. Therefore, we have to set it here explicitly. 309 desiredMS.Labels[clusterv1.MachineDeploymentNameLabel] = deployment.Name 310 desiredMS.Spec.Template.Labels = mdutil.CloneAndAddLabel(deployment.Spec.Template.Labels, 311 clusterv1.MachineDeploymentUniqueLabel, uniqueIdentifierLabelValue) 312 313 // Set selector. 314 desiredMS.Spec.Selector = *mdutil.CloneSelectorAndAddLabel(&deployment.Spec.Selector, clusterv1.MachineDeploymentUniqueLabel, uniqueIdentifierLabelValue) 315 316 // Set annotations and .spec.template.annotations. 317 if desiredMS.Annotations, err = mdutil.ComputeMachineSetAnnotations(ctx, deployment, oldMSs, existingMS); err != nil { 318 return nil, errors.Wrap(err, "failed to compute desired MachineSet: failed to compute annotations") 319 } 320 desiredMS.Spec.Template.Annotations = cloneStringMap(deployment.Spec.Template.Annotations) 321 322 // Set all other in-place mutable fields. 323 desiredMS.Spec.MinReadySeconds = ptr.Deref(deployment.Spec.MinReadySeconds, 0) 324 if deployment.Spec.Strategy != nil && deployment.Spec.Strategy.RollingUpdate != nil { 325 desiredMS.Spec.DeletePolicy = ptr.Deref(deployment.Spec.Strategy.RollingUpdate.DeletePolicy, "") 326 } else { 327 desiredMS.Spec.DeletePolicy = "" 328 } 329 desiredMS.Spec.Template.Spec.NodeDrainTimeout = deployment.Spec.Template.Spec.NodeDrainTimeout 330 desiredMS.Spec.Template.Spec.NodeDeletionTimeout = deployment.Spec.Template.Spec.NodeDeletionTimeout 331 desiredMS.Spec.Template.Spec.NodeVolumeDetachTimeout = deployment.Spec.Template.Spec.NodeVolumeDetachTimeout 332 333 return desiredMS, nil 334 } 335 336 // cloneStringMap clones a string map. 337 func cloneStringMap(in map[string]string) map[string]string { 338 out := map[string]string{} 339 for k, v := range in { 340 out[k] = v 341 } 342 return out 343 } 344 345 const ( 346 maxNameLength = 63 347 randomLength = 5 348 maxGeneratedNameLength = maxNameLength - randomLength 349 ) 350 351 // computeNewMachineSetName generates a new name for the MachineSet just like 352 // the upstream SimpleNameGenerator. 353 // Note: We had to extract the logic as we want to use the MachineSet name suffix as 354 // unique identifier for the MachineSet. 355 func computeNewMachineSetName(base string) (string, string) { 356 if len(base) > maxGeneratedNameLength { 357 base = base[:maxGeneratedNameLength] 358 } 359 r := apirand.String(randomLength) 360 return fmt.Sprintf("%s%s", base, r), r 361 } 362 363 // scale scales proportionally in order to mitigate risk. Otherwise, scaling up can increase the size 364 // of the new machine set and scaling down can decrease the sizes of the old ones, both of which would 365 // have the effect of hastening the rollout progress, which could produce a higher proportion of unavailable 366 // replicas in the event of a problem with the rolled out template. Should run only on scaling events or 367 // when a deployment is paused and not during the normal rollout process. 368 func (r *Reconciler) scale(ctx context.Context, deployment *clusterv1.MachineDeployment, newMS *clusterv1.MachineSet, oldMSs []*clusterv1.MachineSet) error { 369 log := ctrl.LoggerFrom(ctx) 370 371 if deployment.Spec.Replicas == nil { 372 return errors.Errorf("spec replicas for deployment %v is nil, this is unexpected", deployment.Name) 373 } 374 375 // If there is only one active machine set then we should scale that up to the full count of the 376 // deployment. If there is no active machine set, then we should scale up the newest machine set. 377 if activeOrLatest := mdutil.FindOneActiveOrLatest(newMS, oldMSs); activeOrLatest != nil { 378 if activeOrLatest.Spec.Replicas == nil { 379 return errors.Errorf("spec replicas for machine set %v is nil, this is unexpected", activeOrLatest.Name) 380 } 381 382 if *(activeOrLatest.Spec.Replicas) == *(deployment.Spec.Replicas) { 383 return nil 384 } 385 386 err := r.scaleMachineSet(ctx, activeOrLatest, *(deployment.Spec.Replicas), deployment) 387 return err 388 } 389 390 // If the new machine set is saturated, old machine sets should be fully scaled down. 391 // This case handles machine set adoption during a saturated new machine set. 392 if mdutil.IsSaturated(deployment, newMS) { 393 for _, old := range mdutil.FilterActiveMachineSets(oldMSs) { 394 if err := r.scaleMachineSet(ctx, old, 0, deployment); err != nil { 395 return err 396 } 397 } 398 return nil 399 } 400 401 // There are old machine sets with machines and the new machine set is not saturated. 402 // We need to proportionally scale all machine sets (new and old) in case of a 403 // rolling deployment. 404 if mdutil.IsRollingUpdate(deployment) { 405 allMSs := mdutil.FilterActiveMachineSets(append(oldMSs, newMS)) 406 totalMSReplicas := mdutil.GetReplicaCountForMachineSets(allMSs) 407 408 allowedSize := int32(0) 409 if *(deployment.Spec.Replicas) > 0 { 410 allowedSize = *(deployment.Spec.Replicas) + mdutil.MaxSurge(*deployment) 411 } 412 413 // Number of additional replicas that can be either added or removed from the total 414 // replicas count. These replicas should be distributed proportionally to the active 415 // machine sets. 416 deploymentReplicasToAdd := allowedSize - totalMSReplicas 417 418 // The additional replicas should be distributed proportionally amongst the active 419 // machine sets from the larger to the smaller in size machine set. Scaling direction 420 // drives what happens in case we are trying to scale machine sets of the same size. 421 // In such a case when scaling up, we should scale up newer machine sets first, and 422 // when scaling down, we should scale down older machine sets first. 423 switch { 424 case deploymentReplicasToAdd > 0: 425 sort.Sort(mdutil.MachineSetsBySizeNewer(allMSs)) 426 case deploymentReplicasToAdd < 0: 427 sort.Sort(mdutil.MachineSetsBySizeOlder(allMSs)) 428 } 429 430 // Iterate over all active machine sets and estimate proportions for each of them. 431 // The absolute value of deploymentReplicasAdded should never exceed the absolute 432 // value of deploymentReplicasToAdd. 433 deploymentReplicasAdded := int32(0) 434 nameToSize := make(map[string]int32) 435 for i := range allMSs { 436 ms := allMSs[i] 437 if ms.Spec.Replicas == nil { 438 log.Info("Spec.Replicas for machine set is nil, this is unexpected.", "MachineSet", ms.Name) 439 continue 440 } 441 442 // Estimate proportions if we have replicas to add, otherwise simply populate 443 // nameToSize with the current sizes for each machine set. 444 if deploymentReplicasToAdd != 0 { 445 proportion := mdutil.GetProportion(ms, *deployment, deploymentReplicasToAdd, deploymentReplicasAdded, log) 446 nameToSize[ms.Name] = *(ms.Spec.Replicas) + proportion 447 deploymentReplicasAdded += proportion 448 } else { 449 nameToSize[ms.Name] = *(ms.Spec.Replicas) 450 } 451 } 452 453 // Update all machine sets 454 for i := range allMSs { 455 ms := allMSs[i] 456 457 // Add/remove any leftovers to the largest machine set. 458 if i == 0 && deploymentReplicasToAdd != 0 { 459 leftover := deploymentReplicasToAdd - deploymentReplicasAdded 460 nameToSize[ms.Name] += leftover 461 if nameToSize[ms.Name] < 0 { 462 nameToSize[ms.Name] = 0 463 } 464 } 465 466 if err := r.scaleMachineSet(ctx, ms, nameToSize[ms.Name], deployment); err != nil { 467 // Return as soon as we fail, the deployment is requeued 468 return err 469 } 470 } 471 } 472 473 return nil 474 } 475 476 // syncDeploymentStatus checks if the status is up-to-date and sync it if necessary. 477 func (r *Reconciler) syncDeploymentStatus(allMSs []*clusterv1.MachineSet, newMS *clusterv1.MachineSet, md *clusterv1.MachineDeployment) error { 478 md.Status = calculateStatus(allMSs, newMS, md) 479 480 // minReplicasNeeded will be equal to md.Spec.Replicas when the strategy is not RollingUpdateMachineDeploymentStrategyType. 481 minReplicasNeeded := *(md.Spec.Replicas) - mdutil.MaxUnavailable(*md) 482 483 if md.Status.AvailableReplicas >= minReplicasNeeded { 484 // NOTE: The structure of calculateStatus() does not allow us to update the machinedeployment directly, we can only update the status obj it returns. Ideally, we should change calculateStatus() --> updateStatus() to be consistent with the rest of the code base, until then, we update conditions here. 485 conditions.MarkTrue(md, clusterv1.MachineDeploymentAvailableCondition) 486 } else { 487 conditions.MarkFalse(md, clusterv1.MachineDeploymentAvailableCondition, clusterv1.WaitingForAvailableMachinesReason, clusterv1.ConditionSeverityWarning, "Minimum availability requires %d replicas, current %d available", minReplicasNeeded, md.Status.AvailableReplicas) 488 } 489 490 if newMS != nil { 491 // Report a summary of current status of the MachineSet object owned by this MachineDeployment. 492 conditions.SetMirror(md, clusterv1.MachineSetReadyCondition, 493 newMS, 494 conditions.WithFallbackValue(false, clusterv1.WaitingForMachineSetFallbackReason, clusterv1.ConditionSeverityInfo, ""), 495 ) 496 } else { 497 conditions.MarkFalse(md, clusterv1.MachineSetReadyCondition, clusterv1.WaitingForMachineSetFallbackReason, clusterv1.ConditionSeverityInfo, "MachineSet not found") 498 } 499 500 return nil 501 } 502 503 // calculateStatus calculates the latest status for the provided deployment by looking into the provided MachineSets. 504 func calculateStatus(allMSs []*clusterv1.MachineSet, newMS *clusterv1.MachineSet, deployment *clusterv1.MachineDeployment) clusterv1.MachineDeploymentStatus { 505 availableReplicas := mdutil.GetAvailableReplicaCountForMachineSets(allMSs) 506 totalReplicas := mdutil.GetReplicaCountForMachineSets(allMSs) 507 unavailableReplicas := totalReplicas - availableReplicas 508 509 // If unavailableReplicas is negative, then that means the Deployment has more available replicas running than 510 // desired, e.g. whenever it scales down. In such a case we should simply default unavailableReplicas to zero. 511 if unavailableReplicas < 0 { 512 unavailableReplicas = 0 513 } 514 515 // Calculate the label selector. We check the error in the MD reconcile function, ignore here. 516 selector, _ := metav1.LabelSelectorAsSelector(&deployment.Spec.Selector) 517 518 status := clusterv1.MachineDeploymentStatus{ 519 // TODO: Ensure that if we start retrying status updates, we won't pick up a new Generation value. 520 ObservedGeneration: deployment.Generation, 521 Selector: selector.String(), 522 Replicas: mdutil.GetActualReplicaCountForMachineSets(allMSs), 523 UpdatedReplicas: mdutil.GetActualReplicaCountForMachineSets([]*clusterv1.MachineSet{newMS}), 524 ReadyReplicas: mdutil.GetReadyReplicaCountForMachineSets(allMSs), 525 AvailableReplicas: availableReplicas, 526 UnavailableReplicas: unavailableReplicas, 527 Conditions: deployment.Status.Conditions, 528 } 529 530 if *deployment.Spec.Replicas == status.ReadyReplicas { 531 status.Phase = string(clusterv1.MachineDeploymentPhaseRunning) 532 } 533 if *deployment.Spec.Replicas > status.ReadyReplicas { 534 status.Phase = string(clusterv1.MachineDeploymentPhaseScalingUp) 535 } 536 // This is the same as unavailableReplicas, but we have to recalculate because unavailableReplicas 537 // would have been reset to zero above if it was negative 538 if totalReplicas-availableReplicas < 0 { 539 status.Phase = string(clusterv1.MachineDeploymentPhaseScalingDown) 540 } 541 for _, ms := range allMSs { 542 if ms != nil { 543 if ms.Status.FailureReason != nil || ms.Status.FailureMessage != nil { 544 status.Phase = string(clusterv1.MachineDeploymentPhaseFailed) 545 break 546 } 547 } 548 } 549 return status 550 } 551 552 func (r *Reconciler) scaleMachineSet(ctx context.Context, ms *clusterv1.MachineSet, newScale int32, deployment *clusterv1.MachineDeployment) error { 553 if ms.Spec.Replicas == nil { 554 return errors.Errorf("spec.replicas for MachineSet %v is nil, this is unexpected", client.ObjectKeyFromObject(ms)) 555 } 556 557 if deployment.Spec.Replicas == nil { 558 return errors.Errorf("spec.replicas for MachineDeployment %v is nil, this is unexpected", client.ObjectKeyFromObject(deployment)) 559 } 560 561 annotationsNeedUpdate := mdutil.ReplicasAnnotationsNeedUpdate( 562 ms, 563 *(deployment.Spec.Replicas), 564 *(deployment.Spec.Replicas)+mdutil.MaxSurge(*deployment), 565 ) 566 567 // No need to scale nor setting annotations, return. 568 if *(ms.Spec.Replicas) == newScale && !annotationsNeedUpdate { 569 return nil 570 } 571 572 // If we're here, a scaling operation is required. 573 patchHelper, err := patch.NewHelper(ms, r.Client) 574 if err != nil { 575 return err 576 } 577 578 // Save original replicas to log in event. 579 originalReplicas := *(ms.Spec.Replicas) 580 581 // Mutate replicas and the related annotation. 582 ms.Spec.Replicas = &newScale 583 mdutil.SetReplicasAnnotations(ms, *(deployment.Spec.Replicas), *(deployment.Spec.Replicas)+mdutil.MaxSurge(*deployment)) 584 585 if err := patchHelper.Patch(ctx, ms); err != nil { 586 r.recorder.Eventf(deployment, corev1.EventTypeWarning, "FailedScale", "Failed to scale MachineSet %v: %v", 587 client.ObjectKeyFromObject(ms), err) 588 return err 589 } 590 591 r.recorder.Eventf(deployment, corev1.EventTypeNormal, "SuccessfulScale", "Scaled MachineSet %v: %d -> %d", 592 client.ObjectKeyFromObject(ms), originalReplicas, *ms.Spec.Replicas) 593 594 return nil 595 } 596 597 // cleanupDeployment is responsible for cleaning up a deployment i.e. retains all but the latest N old machine sets 598 // where N=d.Spec.RevisionHistoryLimit. Old machine sets are older versions of the machinetemplate of a deployment kept 599 // around by default 1) for historical reasons and 2) for the ability to rollback a deployment. 600 func (r *Reconciler) cleanupDeployment(ctx context.Context, oldMSs []*clusterv1.MachineSet, deployment *clusterv1.MachineDeployment) error { 601 log := ctrl.LoggerFrom(ctx) 602 603 if deployment.Spec.RevisionHistoryLimit == nil { 604 return nil 605 } 606 607 // Avoid deleting machine set with deletion timestamp set 608 aliveFilter := func(ms *clusterv1.MachineSet) bool { 609 return ms != nil && ms.ObjectMeta.DeletionTimestamp.IsZero() 610 } 611 612 cleanableMSes := mdutil.FilterMachineSets(oldMSs, aliveFilter) 613 614 diff := int32(len(cleanableMSes)) - *deployment.Spec.RevisionHistoryLimit 615 if diff <= 0 { 616 return nil 617 } 618 619 sort.Sort(mdutil.MachineSetsByCreationTimestamp(cleanableMSes)) 620 log.V(4).Info("Looking to cleanup old machine sets for deployment") 621 622 for i := int32(0); i < diff; i++ { 623 ms := cleanableMSes[i] 624 if ms.Spec.Replicas == nil { 625 return errors.Errorf("spec replicas for machine set %v is nil, this is unexpected", ms.Name) 626 } 627 628 // Avoid delete machine set with non-zero replica counts 629 if ms.Status.Replicas != 0 || *(ms.Spec.Replicas) != 0 || ms.Generation > ms.Status.ObservedGeneration || !ms.DeletionTimestamp.IsZero() { 630 continue 631 } 632 633 log.V(4).Info("Trying to cleanup machine set for deployment", "machineset", ms.Name) 634 if err := r.Client.Delete(ctx, ms); err != nil && !apierrors.IsNotFound(err) { 635 // Return error instead of aggregating and continuing DELETEs on the theory 636 // that we may be overloading the api server. 637 r.recorder.Eventf(deployment, corev1.EventTypeWarning, "FailedDelete", "Failed to delete MachineSet %q: %v", ms.Name, err) 638 return err 639 } 640 r.recorder.Eventf(deployment, corev1.EventTypeNormal, "SuccessfulDelete", "Deleted MachineSet %q", ms.Name) 641 } 642 643 return nil 644 }