sigs.k8s.io/cluster-api@v1.6.3/internal/controllers/machinedeployment/machinedeployment_sync.go

sigs.k8s.io/cluster-api@v1.6.3/internal/controllers/machinedeployment/machinedeployment_sync.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package machinedeployment
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sort"
    23  	"time"
    24  
    25  	"github.com/go-logr/logr"
    26  	"github.com/pkg/errors"
    27  	corev1 "k8s.io/api/core/v1"
    28  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	"k8s.io/apimachinery/pkg/types"
    31  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    32  	apirand "k8s.io/apimachinery/pkg/util/rand"
    33  	"k8s.io/apimachinery/pkg/util/sets"
    34  	"k8s.io/apimachinery/pkg/util/wait"
    35  	"k8s.io/client-go/util/retry"
    36  	"k8s.io/klog/v2"
    37  	"k8s.io/utils/pointer"
    38  	ctrl "sigs.k8s.io/controller-runtime"
    39  	"sigs.k8s.io/controller-runtime/pkg/client"
    40  
    41  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    42  	"sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil"
    43  	"sigs.k8s.io/cluster-api/internal/util/hash"
    44  	"sigs.k8s.io/cluster-api/internal/util/ssa"
    45  	"sigs.k8s.io/cluster-api/util"
    46  	"sigs.k8s.io/cluster-api/util/conditions"
    47  	"sigs.k8s.io/cluster-api/util/patch"
    48  )
    49  
    50  // sync is responsible for reconciling deployments on scaling events or when they
    51  // are paused.
    52  func (r *Reconciler) sync(ctx context.Context, md *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet) error {
    53  	newMS, oldMSs, err := r.getAllMachineSetsAndSyncRevision(ctx, md, msList, false)
    54  	if err != nil {
    55  		return err
    56  	}
    57  
    58  	if err := r.scale(ctx, md, newMS, oldMSs); err != nil {
    59  		// If we get an error while trying to scale, the deployment will be requeued
    60  		// so we can abort this resync
    61  		return err
    62  	}
    63  
    64  	//
    65  	// // TODO: Clean up the deployment when it's paused and no rollback is in flight.
    66  	//
    67  	allMSs := append(oldMSs, newMS)
    68  	return r.syncDeploymentStatus(allMSs, newMS, md)
    69  }
    70  
    71  // getAllMachineSetsAndSyncRevision returns all the machine sets for the provided deployment (new and all old), with new MS's and deployment's revision updated.
    72  //
    73  // msList should come from getMachineSetsForDeployment(d).
    74  // machineMap should come from getMachineMapForDeployment(d, msList).
    75  //
    76  //  1. Get all old MSes this deployment targets, and calculate the max revision number among them (maxOldV).
    77  //  2. Get new MS this deployment targets (whose machine template matches deployment's), and update new MS's revision number to (maxOldV + 1),
    78  //     only if its revision number is smaller than (maxOldV + 1). If this step failed, we'll update it in the next deployment sync loop.
    79  //  3. Copy new MS's revision number to deployment (update deployment's revision). If this step failed, we'll update it in the next deployment sync loop.
    80  //
    81  // Note that currently the deployment controller is using caches to avoid querying the server for reads.
    82  // This may lead to stale reads of machine sets, thus incorrect deployment status.
    83  func (r *Reconciler) getAllMachineSetsAndSyncRevision(ctx context.Context, md *clusterv1.MachineDeployment, msList []*clusterv1.MachineSet, createIfNotExisted bool) (*clusterv1.MachineSet, []*clusterv1.MachineSet, error) {
    84  	reconciliationTime := metav1.Now()
    85  	allOldMSs := mdutil.FindOldMachineSets(md, msList, &reconciliationTime)
    86  
    87  	// Get new machine set with the updated revision number
    88  	newMS, err := r.getNewMachineSet(ctx, md, msList, allOldMSs, createIfNotExisted, &reconciliationTime)
    89  	if err != nil {
    90  		return nil, nil, err
    91  	}
    92  
    93  	return newMS, allOldMSs, nil
    94  }
    95  
    96  // Returns a MachineSet that matches the intent of the given MachineDeployment.
    97  // If there does not exist such a MachineSet and createIfNotExisted is true, create a new MachineSet.
    98  // If there is already such a MachineSet, update it to propagate in-place mutable fields from the MachineDeployment.
    99  func (r *Reconciler) getNewMachineSet(ctx context.Context, md *clusterv1.MachineDeployment, msList, oldMSs []*clusterv1.MachineSet, createIfNotExists bool, reconciliationTime *metav1.Time) (*clusterv1.MachineSet, error) {
   100  	// Try to find a MachineSet which matches the MachineDeployments intent, while ignore diffs between
   101  	// the in-place mutable fields.
   102  	// If we find a matching MachineSet we just update it to propagate any changes to the in-place mutable
   103  	// fields and thus we do not trigger an unnecessary rollout (i.e. create a new MachineSet).
   104  	// If we don't find a matching MachineSet, we need a rollout and thus create a new MachineSet.
   105  	// Note: The in-place mutable fields can be just updated inline, because they do not affect the actual machines
   106  	// themselves (i.e. the infrastructure and the software running on the Machines not the Machine object).
   107  	matchingMS := mdutil.FindNewMachineSet(md, msList, reconciliationTime)
   108  
   109  	// If there is a MachineSet that matches the intent of the MachineDeployment, update the MachineSet
   110  	// to propagate all in-place mutable fields from MachineDeployment to the MachineSet.
   111  	if matchingMS != nil {
   112  		updatedMS, err := r.updateMachineSet(ctx, md, matchingMS, oldMSs)
   113  		if err != nil {
   114  			return nil, err
   115  		}
   116  
   117  		// Ensure MachineDeployment has the latest MachineSet revision in its revision annotation.
   118  		err = r.updateMachineDeployment(ctx, md, func(innerDeployment *clusterv1.MachineDeployment) {
   119  			mdutil.SetDeploymentRevision(md, updatedMS.Annotations[clusterv1.RevisionAnnotation])
   120  		})
   121  		if err != nil {
   122  			return nil, errors.Wrap(err, "failed to update revision annotation on MachineDeployment")
   123  		}
   124  		return updatedMS, nil
   125  	}
   126  
   127  	if !createIfNotExists {
   128  		return nil, nil
   129  	}
   130  
   131  	// Create a new MachineSet and wait until the new MachineSet exists in the cache.
   132  	newMS, err := r.createMachineSetAndWait(ctx, md, oldMSs)
   133  	if err != nil {
   134  		return nil, err
   135  	}
   136  
   137  	// Ensure MachineDeployment has the latest MachineSet revision in its revision annotation.
   138  	err = r.updateMachineDeployment(ctx, md, func(innerDeployment *clusterv1.MachineDeployment) {
   139  		mdutil.SetDeploymentRevision(md, newMS.Annotations[clusterv1.RevisionAnnotation])
   140  	})
   141  	if err != nil {
   142  		return nil, errors.Wrap(err, "failed to update revision annotation on MachineDeployment")
   143  	}
   144  
   145  	return newMS, nil
   146  }
   147  
   148  // updateMachineSet updates an existing MachineSet to propagate in-place mutable fields from the MachineDeployment.
   149  func (r *Reconciler) updateMachineSet(ctx context.Context, deployment *clusterv1.MachineDeployment, ms *clusterv1.MachineSet, oldMSs []*clusterv1.MachineSet) (*clusterv1.MachineSet, error) {
   150  	log := ctrl.LoggerFrom(ctx)
   151  
   152  	// Compute the desired MachineSet.
   153  	updatedMS, err := r.computeDesiredMachineSet(deployment, ms, oldMSs, log)
   154  	if err != nil {
   155  		return nil, errors.Wrapf(err, "failed to update MachineSet %q", klog.KObj(ms))
   156  	}
   157  
   158  	// Update the MachineSet to propagate in-place mutable fields from the MachineDeployment.
   159  	err = ssa.Patch(ctx, r.Client, machineDeploymentManagerName, updatedMS, ssa.WithCachingProxy{Cache: r.ssaCache, Original: ms})
   160  	if err != nil {
   161  		r.recorder.Eventf(deployment, corev1.EventTypeWarning, "FailedUpdate", "Failed to update MachineSet %s: %v", klog.KObj(updatedMS), err)
   162  		return nil, errors.Wrapf(err, "failed to update MachineSet %s", klog.KObj(updatedMS))
   163  	}
   164  
   165  	log.V(4).Info("Updated MachineSet", "MachineSet", klog.KObj(updatedMS))
   166  	return updatedMS, nil
   167  }
   168  
   169  // createMachineSetAndWait creates a new MachineSet with the desired intent of the MachineDeployment.
   170  // It waits for the cache to be updated with the newly created MachineSet.
   171  func (r *Reconciler) createMachineSetAndWait(ctx context.Context, deployment *clusterv1.MachineDeployment, oldMSs []*clusterv1.MachineSet) (*clusterv1.MachineSet, error) {
   172  	log := ctrl.LoggerFrom(ctx)
   173  
   174  	// Compute the desired MachineSet.
   175  	newMS, err := r.computeDesiredMachineSet(deployment, nil, oldMSs, log)
   176  	if err != nil {
   177  		return nil, errors.Wrap(err, "failed to create new MachineSet")
   178  	}
   179  
   180  	// Create the MachineSet.
   181  	if err := ssa.Patch(ctx, r.Client, machineDeploymentManagerName, newMS); err != nil {
   182  		r.recorder.Eventf(deployment, corev1.EventTypeWarning, "FailedCreate", "Failed to create MachineSet %s: %v", klog.KObj(newMS), err)
   183  		return nil, errors.Wrapf(err, "failed to create new MachineSet %s", klog.KObj(newMS))
   184  	}
   185  	log.V(4).Info("Created new MachineSet", "MachineSet", klog.KObj(newMS))
   186  	r.recorder.Eventf(deployment, corev1.EventTypeNormal, "SuccessfulCreate", "Created MachineSet %s", klog.KObj(newMS))
   187  
   188  	// Keep trying to get the MachineSet. This will force the cache to update and prevent any future reconciliation of
   189  	// the MachineDeployment to reconcile with an outdated list of MachineSets which could lead to unwanted creation of
   190  	// a duplicate MachineSet.
   191  	var pollErrors []error
   192  	if err := wait.PollUntilContextTimeout(ctx, 100*time.Millisecond, 10*time.Second, true, func(ctx context.Context) (bool, error) {
   193  		ms := &clusterv1.MachineSet{}
   194  		if err := r.Client.Get(ctx, client.ObjectKeyFromObject(newMS), ms); err != nil {
   195  			// Do not return error here. Continue to poll even if we hit an error
   196  			// so that we avoid existing because of transient errors like network flakes.
   197  			// Capture all the errors and return the aggregate error if the poll fails eventually.
   198  			pollErrors = append(pollErrors, err)
   199  			return false, nil
   200  		}
   201  		return true, nil
   202  	}); err != nil {
   203  		return nil, errors.Wrapf(kerrors.NewAggregate(pollErrors), "failed to get the MachineSet %s after creation", klog.KObj(newMS))
   204  	}
   205  	return newMS, nil
   206  }
   207  
   208  // computeDesiredMachineSet computes the desired MachineSet.
   209  // This MachineSet will be used during reconciliation to:
   210  // * create a MachineSet
   211  // * update an existing MachineSet
   212  // Because we are using Server-Side-Apply we always have to calculate the full object.
   213  // There are small differences in how we calculate the MachineSet depending on if it
   214  // is a create or update. Example: for a new MachineSet we have to calculate a new name,
   215  // while for an existing MachineSet we have to use the name of the existing MachineSet.
   216  func (r *Reconciler) computeDesiredMachineSet(deployment *clusterv1.MachineDeployment, existingMS *clusterv1.MachineSet, oldMSs []*clusterv1.MachineSet, log logr.Logger) (*clusterv1.MachineSet, error) {
   217  	var name string
   218  	var uid types.UID
   219  	var finalizers []string
   220  	var uniqueIdentifierLabelValue string
   221  	var machineTemplateSpec clusterv1.MachineSpec
   222  	var replicas int32
   223  	var err error
   224  
   225  	// For a new MachineSet:
   226  	// * compute a new uniqueIdentifier, a new MachineSet name, finalizers, replicas and
   227  	//   machine template spec (take the one from MachineDeployment)
   228  	if existingMS == nil {
   229  		// Note: In previous Cluster API versions (< v1.4.0), the label value was the hash of the full machine
   230  		// template. With the introduction of in-place mutation the machine template of the MachineSet can change.
   231  		// Because of that it is impossible that the label's value to always be the hash of the full machine template.
   232  		// (Because the hash changes when the machine template changes).
   233  		// As a result, we use the hash of the machine template while ignoring all in-place mutable fields, i.e. the
   234  		// machine template with only fields that could trigger a rollout for the machine-template-hash, making it
   235  		// independent of the changes to any in-place mutable fields.
   236  		templateHash, err := hash.Compute(mdutil.MachineTemplateDeepCopyRolloutFields(&deployment.Spec.Template))
   237  		if err != nil {
   238  			return nil, errors.Wrap(err, "failed to compute desired MachineSet: failed to compute machine template hash")
   239  		}
   240  		// Append a random string at the end of template hash. This is required to distinguish MachineSets that
   241  		// could be created with the same spec as a result of rolloutAfter. If not, computeDesiredMachineSet
   242  		// will end up updating the existing MachineSet instead of creating a new one.
   243  		var randomSuffix string
   244  		name, randomSuffix = computeNewMachineSetName(deployment.Name + "-")
   245  		uniqueIdentifierLabelValue = fmt.Sprintf("%d-%s", templateHash, randomSuffix)
   246  
   247  		// Add foregroundDeletion finalizer to MachineSet if the MachineDeployment has it.
   248  		if sets.New[string](deployment.Finalizers...).Has(metav1.FinalizerDeleteDependents) {
   249  			finalizers = []string{metav1.FinalizerDeleteDependents}
   250  		}
   251  
   252  		replicas, err = mdutil.NewMSNewReplicas(deployment, oldMSs, 0)
   253  		if err != nil {
   254  			return nil, errors.Wrap(err, "failed to compute desired MachineSet")
   255  		}
   256  
   257  		machineTemplateSpec = *deployment.Spec.Template.Spec.DeepCopy()
   258  	} else {
   259  		// For updating an existing MachineSet:
   260  		// * get the uniqueIdentifier from labels of the existingMS
   261  		// * use name, uid, finalizers, replicas and machine template spec from existingMS.
   262  		// Note: We use the uid, to ensure that the Server-Side-Apply only updates existingMS.
   263  		// Note: We carry over those fields because we don't want to mutate them for an existingMS.
   264  		var uniqueIdentifierLabelExists bool
   265  		uniqueIdentifierLabelValue, uniqueIdentifierLabelExists = existingMS.Labels[clusterv1.MachineDeploymentUniqueLabel]
   266  		if !uniqueIdentifierLabelExists {
   267  			return nil, errors.Errorf("failed to compute desired MachineSet: failed to get unique identifier from %q annotation",
   268  				clusterv1.MachineDeploymentUniqueLabel)
   269  		}
   270  
   271  		name = existingMS.Name
   272  		uid = existingMS.UID
   273  
   274  		// Keep foregroundDeletion finalizer if the existingMS has it.
   275  		// Note: This case is a little different from the create case. In the update case we preserve
   276  		// the finalizer on the MachineSet if it already exists. Because of SSA we should not build
   277  		// the finalizer information from the MachineDeployment when updating a MachineSet because that could lead
   278  		// to dropping the finalizer from the MachineSet if it is dropped from the MachineDeployment.
   279  		// We should not drop the finalizer on the MachineSet if the finalizer is dropped from the MachineDeployment.
   280  		if sets.New[string](existingMS.Finalizers...).Has(metav1.FinalizerDeleteDependents) {
   281  			finalizers = []string{metav1.FinalizerDeleteDependents}
   282  		}
   283  
   284  		replicas = *existingMS.Spec.Replicas
   285  
   286  		machineTemplateSpec = *existingMS.Spec.Template.Spec.DeepCopy()
   287  	}
   288  
   289  	// Construct the basic MachineSet.
   290  	desiredMS := &clusterv1.MachineSet{
   291  		TypeMeta: metav1.TypeMeta{
   292  			APIVersion: clusterv1.GroupVersion.String(),
   293  			Kind:       "MachineSet",
   294  		},
   295  		ObjectMeta: metav1.ObjectMeta{
   296  			Name:      name,
   297  			Namespace: deployment.Namespace,
   298  			// Note: By setting the ownerRef on creation we signal to the MachineSet controller that this is not a stand-alone MachineSet.
   299  			OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(deployment, machineDeploymentKind)},
   300  			UID:             uid,
   301  			Finalizers:      finalizers,
   302  		},
   303  		Spec: clusterv1.MachineSetSpec{
   304  			Replicas:    &replicas,
   305  			ClusterName: deployment.Spec.ClusterName,
   306  			Template: clusterv1.MachineTemplateSpec{
   307  				Spec: machineTemplateSpec,
   308  			},
   309  		},
   310  	}
   311  
   312  	// Set the in-place mutable fields.
   313  	// When we create a new MachineSet we will just create the MachineSet with those fields.
   314  	// When we update an existing MachineSet will we update the fields on the existing MachineSet (in-place mutate).
   315  
   316  	// Set labels and .spec.template.labels.
   317  	desiredMS.Labels = mdutil.CloneAndAddLabel(deployment.Spec.Template.Labels,
   318  		clusterv1.MachineDeploymentUniqueLabel, uniqueIdentifierLabelValue)
   319  	// Always set the MachineDeploymentNameLabel.
   320  	// Note: If a client tries to create a MachineDeployment without a selector, the MachineDeployment webhook
   321  	// will add this label automatically. But we want this label to always be present even if the MachineDeployment
   322  	// has a selector which doesn't include it. Therefore, we have to set it here explicitly.
   323  	desiredMS.Labels[clusterv1.MachineDeploymentNameLabel] = deployment.Name
   324  	desiredMS.Spec.Template.Labels = mdutil.CloneAndAddLabel(deployment.Spec.Template.Labels,
   325  		clusterv1.MachineDeploymentUniqueLabel, uniqueIdentifierLabelValue)
   326  
   327  	// Set selector.
   328  	desiredMS.Spec.Selector = *mdutil.CloneSelectorAndAddLabel(&deployment.Spec.Selector, clusterv1.MachineDeploymentUniqueLabel, uniqueIdentifierLabelValue)
   329  
   330  	// Set annotations and .spec.template.annotations.
   331  	if desiredMS.Annotations, err = mdutil.ComputeMachineSetAnnotations(log, deployment, oldMSs, existingMS); err != nil {
   332  		return nil, errors.Wrap(err, "failed to compute desired MachineSet: failed to compute annotations")
   333  	}
   334  	desiredMS.Spec.Template.Annotations = cloneStringMap(deployment.Spec.Template.Annotations)
   335  
   336  	// Set all other in-place mutable fields.
   337  	desiredMS.Spec.MinReadySeconds = pointer.Int32Deref(deployment.Spec.MinReadySeconds, 0)
   338  	if deployment.Spec.Strategy != nil && deployment.Spec.Strategy.RollingUpdate != nil {
   339  		desiredMS.Spec.DeletePolicy = pointer.StringDeref(deployment.Spec.Strategy.RollingUpdate.DeletePolicy, "")
   340  	} else {
   341  		desiredMS.Spec.DeletePolicy = ""
   342  	}
   343  	desiredMS.Spec.Template.Spec.NodeDrainTimeout = deployment.Spec.Template.Spec.NodeDrainTimeout
   344  	desiredMS.Spec.Template.Spec.NodeDeletionTimeout = deployment.Spec.Template.Spec.NodeDeletionTimeout
   345  	desiredMS.Spec.Template.Spec.NodeVolumeDetachTimeout = deployment.Spec.Template.Spec.NodeVolumeDetachTimeout
   346  
   347  	return desiredMS, nil
   348  }
   349  
   350  // cloneStringMap clones a string map.
   351  func cloneStringMap(in map[string]string) map[string]string {
   352  	out := map[string]string{}
   353  	for k, v := range in {
   354  		out[k] = v
   355  	}
   356  	return out
   357  }
   358  
   359  const (
   360  	maxNameLength          = 63
   361  	randomLength           = 5
   362  	maxGeneratedNameLength = maxNameLength - randomLength
   363  )
   364  
   365  // computeNewMachineSetName generates a new name for the MachineSet just like
   366  // the upstream SimpleNameGenerator.
   367  // Note: We had to extract the logic as we want to use the MachineSet name suffix as
   368  // unique identifier for the MachineSet.
   369  func computeNewMachineSetName(base string) (string, string) {
   370  	if len(base) > maxGeneratedNameLength {
   371  		base = base[:maxGeneratedNameLength]
   372  	}
   373  	r := apirand.String(randomLength)
   374  	return fmt.Sprintf("%s%s", base, r), r
   375  }
   376  
   377  // scale scales proportionally in order to mitigate risk. Otherwise, scaling up can increase the size
   378  // of the new machine set and scaling down can decrease the sizes of the old ones, both of which would
   379  // have the effect of hastening the rollout progress, which could produce a higher proportion of unavailable
   380  // replicas in the event of a problem with the rolled out template. Should run only on scaling events or
   381  // when a deployment is paused and not during the normal rollout process.
   382  func (r *Reconciler) scale(ctx context.Context, deployment *clusterv1.MachineDeployment, newMS *clusterv1.MachineSet, oldMSs []*clusterv1.MachineSet) error {
   383  	log := ctrl.LoggerFrom(ctx)
   384  
   385  	if deployment.Spec.Replicas == nil {
   386  		return errors.Errorf("spec replicas for deployment %v is nil, this is unexpected", deployment.Name)
   387  	}
   388  
   389  	// If there is only one active machine set then we should scale that up to the full count of the
   390  	// deployment. If there is no active machine set, then we should scale up the newest machine set.
   391  	if activeOrLatest := mdutil.FindOneActiveOrLatest(newMS, oldMSs); activeOrLatest != nil {
   392  		if activeOrLatest.Spec.Replicas == nil {
   393  			return errors.Errorf("spec replicas for machine set %v is nil, this is unexpected", activeOrLatest.Name)
   394  		}
   395  
   396  		if *(activeOrLatest.Spec.Replicas) == *(deployment.Spec.Replicas) {
   397  			return nil
   398  		}
   399  
   400  		err := r.scaleMachineSet(ctx, activeOrLatest, *(deployment.Spec.Replicas), deployment)
   401  		return err
   402  	}
   403  
   404  	// If the new machine set is saturated, old machine sets should be fully scaled down.
   405  	// This case handles machine set adoption during a saturated new machine set.
   406  	if mdutil.IsSaturated(deployment, newMS) {
   407  		for _, old := range mdutil.FilterActiveMachineSets(oldMSs) {
   408  			if err := r.scaleMachineSet(ctx, old, 0, deployment); err != nil {
   409  				return err
   410  			}
   411  		}
   412  		return nil
   413  	}
   414  
   415  	// There are old machine sets with machines and the new machine set is not saturated.
   416  	// We need to proportionally scale all machine sets (new and old) in case of a
   417  	// rolling deployment.
   418  	if mdutil.IsRollingUpdate(deployment) {
   419  		allMSs := mdutil.FilterActiveMachineSets(append(oldMSs, newMS))
   420  		totalMSReplicas := mdutil.GetReplicaCountForMachineSets(allMSs)
   421  
   422  		allowedSize := int32(0)
   423  		if *(deployment.Spec.Replicas) > 0 {
   424  			allowedSize = *(deployment.Spec.Replicas) + mdutil.MaxSurge(*deployment)
   425  		}
   426  
   427  		// Number of additional replicas that can be either added or removed from the total
   428  		// replicas count. These replicas should be distributed proportionally to the active
   429  		// machine sets.
   430  		deploymentReplicasToAdd := allowedSize - totalMSReplicas
   431  
   432  		// The additional replicas should be distributed proportionally amongst the active
   433  		// machine sets from the larger to the smaller in size machine set. Scaling direction
   434  		// drives what happens in case we are trying to scale machine sets of the same size.
   435  		// In such a case when scaling up, we should scale up newer machine sets first, and
   436  		// when scaling down, we should scale down older machine sets first.
   437  		switch {
   438  		case deploymentReplicasToAdd > 0:
   439  			sort.Sort(mdutil.MachineSetsBySizeNewer(allMSs))
   440  		case deploymentReplicasToAdd < 0:
   441  			sort.Sort(mdutil.MachineSetsBySizeOlder(allMSs))
   442  		}
   443  
   444  		// Iterate over all active machine sets and estimate proportions for each of them.
   445  		// The absolute value of deploymentReplicasAdded should never exceed the absolute
   446  		// value of deploymentReplicasToAdd.
   447  		deploymentReplicasAdded := int32(0)
   448  		nameToSize := make(map[string]int32)
   449  		for i := range allMSs {
   450  			ms := allMSs[i]
   451  			if ms.Spec.Replicas == nil {
   452  				log.Info("Spec.Replicas for machine set is nil, this is unexpected.", "MachineSet", ms.Name)
   453  				continue
   454  			}
   455  
   456  			// Estimate proportions if we have replicas to add, otherwise simply populate
   457  			// nameToSize with the current sizes for each machine set.
   458  			if deploymentReplicasToAdd != 0 {
   459  				proportion := mdutil.GetProportion(ms, *deployment, deploymentReplicasToAdd, deploymentReplicasAdded, log)
   460  				nameToSize[ms.Name] = *(ms.Spec.Replicas) + proportion
   461  				deploymentReplicasAdded += proportion
   462  			} else {
   463  				nameToSize[ms.Name] = *(ms.Spec.Replicas)
   464  			}
   465  		}
   466  
   467  		// Update all machine sets
   468  		for i := range allMSs {
   469  			ms := allMSs[i]
   470  
   471  			// Add/remove any leftovers to the largest machine set.
   472  			if i == 0 && deploymentReplicasToAdd != 0 {
   473  				leftover := deploymentReplicasToAdd - deploymentReplicasAdded
   474  				nameToSize[ms.Name] += leftover
   475  				if nameToSize[ms.Name] < 0 {
   476  					nameToSize[ms.Name] = 0
   477  				}
   478  			}
   479  
   480  			if err := r.scaleMachineSet(ctx, ms, nameToSize[ms.Name], deployment); err != nil {
   481  				// Return as soon as we fail, the deployment is requeued
   482  				return err
   483  			}
   484  		}
   485  	}
   486  
   487  	return nil
   488  }
   489  
   490  // syncDeploymentStatus checks if the status is up-to-date and sync it if necessary.
   491  func (r *Reconciler) syncDeploymentStatus(allMSs []*clusterv1.MachineSet, newMS *clusterv1.MachineSet, md *clusterv1.MachineDeployment) error {
   492  	md.Status = calculateStatus(allMSs, newMS, md)
   493  
   494  	// minReplicasNeeded will be equal to md.Spec.Replicas when the strategy is not RollingUpdateMachineDeploymentStrategyType.
   495  	minReplicasNeeded := *(md.Spec.Replicas) - mdutil.MaxUnavailable(*md)
   496  
   497  	if md.Status.AvailableReplicas >= minReplicasNeeded {
   498  		// NOTE: The structure of calculateStatus() does not allow us to update the machinedeployment directly, we can only update the status obj it returns. Ideally, we should change calculateStatus() --> updateStatus() to be consistent with the rest of the code base, until then, we update conditions here.
   499  		conditions.MarkTrue(md, clusterv1.MachineDeploymentAvailableCondition)
   500  	} else {
   501  		conditions.MarkFalse(md, clusterv1.MachineDeploymentAvailableCondition, clusterv1.WaitingForAvailableMachinesReason, clusterv1.ConditionSeverityWarning, "Minimum availability requires %d replicas, current %d available", minReplicasNeeded, md.Status.AvailableReplicas)
   502  	}
   503  	return nil
   504  }
   505  
   506  // calculateStatus calculates the latest status for the provided deployment by looking into the provided MachineSets.
   507  func calculateStatus(allMSs []*clusterv1.MachineSet, newMS *clusterv1.MachineSet, deployment *clusterv1.MachineDeployment) clusterv1.MachineDeploymentStatus {
   508  	availableReplicas := mdutil.GetAvailableReplicaCountForMachineSets(allMSs)
   509  	totalReplicas := mdutil.GetReplicaCountForMachineSets(allMSs)
   510  	unavailableReplicas := totalReplicas - availableReplicas
   511  
   512  	// If unavailableReplicas is negative, then that means the Deployment has more available replicas running than
   513  	// desired, e.g. whenever it scales down. In such a case we should simply default unavailableReplicas to zero.
   514  	if unavailableReplicas < 0 {
   515  		unavailableReplicas = 0
   516  	}
   517  
   518  	// Calculate the label selector. We check the error in the MD reconcile function, ignore here.
   519  	selector, _ := metav1.LabelSelectorAsSelector(&deployment.Spec.Selector)
   520  
   521  	status := clusterv1.MachineDeploymentStatus{
   522  		// TODO: Ensure that if we start retrying status updates, we won't pick up a new Generation value.
   523  		ObservedGeneration:  deployment.Generation,
   524  		Selector:            selector.String(),
   525  		Replicas:            mdutil.GetActualReplicaCountForMachineSets(allMSs),
   526  		UpdatedReplicas:     mdutil.GetActualReplicaCountForMachineSets([]*clusterv1.MachineSet{newMS}),
   527  		ReadyReplicas:       mdutil.GetReadyReplicaCountForMachineSets(allMSs),
   528  		AvailableReplicas:   availableReplicas,
   529  		UnavailableReplicas: unavailableReplicas,
   530  		Conditions:          deployment.Status.Conditions,
   531  	}
   532  
   533  	if *deployment.Spec.Replicas == status.ReadyReplicas {
   534  		status.Phase = string(clusterv1.MachineDeploymentPhaseRunning)
   535  	}
   536  	if *deployment.Spec.Replicas > status.ReadyReplicas {
   537  		status.Phase = string(clusterv1.MachineDeploymentPhaseScalingUp)
   538  	}
   539  	// This is the same as unavailableReplicas, but we have to recalculate because unavailableReplicas
   540  	// would have been reset to zero above if it was negative
   541  	if totalReplicas-availableReplicas < 0 {
   542  		status.Phase = string(clusterv1.MachineDeploymentPhaseScalingDown)
   543  	}
   544  	for _, ms := range allMSs {
   545  		if ms != nil {
   546  			if ms.Status.FailureReason != nil || ms.Status.FailureMessage != nil {
   547  				status.Phase = string(clusterv1.MachineDeploymentPhaseFailed)
   548  				break
   549  			}
   550  		}
   551  	}
   552  	return status
   553  }
   554  
   555  func (r *Reconciler) scaleMachineSet(ctx context.Context, ms *clusterv1.MachineSet, newScale int32, deployment *clusterv1.MachineDeployment) error {
   556  	if ms.Spec.Replicas == nil {
   557  		return errors.Errorf("spec.replicas for MachineSet %v is nil, this is unexpected", client.ObjectKeyFromObject(ms))
   558  	}
   559  
   560  	if deployment.Spec.Replicas == nil {
   561  		return errors.Errorf("spec.replicas for MachineDeployment %v is nil, this is unexpected", client.ObjectKeyFromObject(deployment))
   562  	}
   563  
   564  	annotationsNeedUpdate := mdutil.ReplicasAnnotationsNeedUpdate(
   565  		ms,
   566  		*(deployment.Spec.Replicas),
   567  		*(deployment.Spec.Replicas)+mdutil.MaxSurge(*deployment),
   568  	)
   569  
   570  	// No need to scale nor setting annotations, return.
   571  	if *(ms.Spec.Replicas) == newScale && !annotationsNeedUpdate {
   572  		return nil
   573  	}
   574  
   575  	// If we're here, a scaling operation is required.
   576  	patchHelper, err := patch.NewHelper(ms, r.Client)
   577  	if err != nil {
   578  		return err
   579  	}
   580  
   581  	// Save original replicas to log in event.
   582  	originalReplicas := *(ms.Spec.Replicas)
   583  
   584  	// Mutate replicas and the related annotation.
   585  	ms.Spec.Replicas = &newScale
   586  	mdutil.SetReplicasAnnotations(ms, *(deployment.Spec.Replicas), *(deployment.Spec.Replicas)+mdutil.MaxSurge(*deployment))
   587  
   588  	if err := patchHelper.Patch(ctx, ms); err != nil {
   589  		r.recorder.Eventf(deployment, corev1.EventTypeWarning, "FailedScale", "Failed to scale MachineSet %v: %v",
   590  			client.ObjectKeyFromObject(ms), err)
   591  		return err
   592  	}
   593  
   594  	r.recorder.Eventf(deployment, corev1.EventTypeNormal, "SuccessfulScale", "Scaled MachineSet %v: %d -> %d",
   595  		client.ObjectKeyFromObject(ms), originalReplicas, *ms.Spec.Replicas)
   596  
   597  	return nil
   598  }
   599  
   600  // cleanupDeployment is responsible for cleaning up a deployment i.e. retains all but the latest N old machine sets
   601  // where N=d.Spec.RevisionHistoryLimit. Old machine sets are older versions of the machinetemplate of a deployment kept
   602  // around by default 1) for historical reasons and 2) for the ability to rollback a deployment.
   603  func (r *Reconciler) cleanupDeployment(ctx context.Context, oldMSs []*clusterv1.MachineSet, deployment *clusterv1.MachineDeployment) error {
   604  	log := ctrl.LoggerFrom(ctx)
   605  
   606  	if deployment.Spec.RevisionHistoryLimit == nil {
   607  		return nil
   608  	}
   609  
   610  	// Avoid deleting machine set with deletion timestamp set
   611  	aliveFilter := func(ms *clusterv1.MachineSet) bool {
   612  		return ms != nil && ms.ObjectMeta.DeletionTimestamp.IsZero()
   613  	}
   614  
   615  	cleanableMSes := mdutil.FilterMachineSets(oldMSs, aliveFilter)
   616  
   617  	diff := int32(len(cleanableMSes)) - *deployment.Spec.RevisionHistoryLimit
   618  	if diff <= 0 {
   619  		return nil
   620  	}
   621  
   622  	sort.Sort(mdutil.MachineSetsByCreationTimestamp(cleanableMSes))
   623  	log.V(4).Info("Looking to cleanup old machine sets for deployment")
   624  
   625  	for i := int32(0); i < diff; i++ {
   626  		ms := cleanableMSes[i]
   627  		if ms.Spec.Replicas == nil {
   628  			return errors.Errorf("spec replicas for machine set %v is nil, this is unexpected", ms.Name)
   629  		}
   630  
   631  		// Avoid delete machine set with non-zero replica counts
   632  		if ms.Status.Replicas != 0 || *(ms.Spec.Replicas) != 0 || ms.Generation > ms.Status.ObservedGeneration || !ms.DeletionTimestamp.IsZero() {
   633  			continue
   634  		}
   635  
   636  		log.V(4).Info("Trying to cleanup machine set for deployment", "machineset", ms.Name)
   637  		if err := r.Client.Delete(ctx, ms); err != nil && !apierrors.IsNotFound(err) {
   638  			// Return error instead of aggregating and continuing DELETEs on the theory
   639  			// that we may be overloading the api server.
   640  			r.recorder.Eventf(deployment, corev1.EventTypeWarning, "FailedDelete", "Failed to delete MachineSet %q: %v", ms.Name, err)
   641  			return err
   642  		}
   643  		r.recorder.Eventf(deployment, corev1.EventTypeNormal, "SuccessfulDelete", "Deleted MachineSet %q", ms.Name)
   644  	}
   645  
   646  	return nil
   647  }
   648  
   649  func (r *Reconciler) updateMachineDeployment(ctx context.Context, md *clusterv1.MachineDeployment, modify func(*clusterv1.MachineDeployment)) error {
   650  	return updateMachineDeployment(ctx, r.Client, md, modify)
   651  }
   652  
   653  // We have this as standalone variant to be able to use it from the tests.
   654  func updateMachineDeployment(ctx context.Context, c client.Client, md *clusterv1.MachineDeployment, modify func(*clusterv1.MachineDeployment)) error {
   655  	mdObjectKey := util.ObjectKey(md)
   656  	return retry.RetryOnConflict(retry.DefaultBackoff, func() error {
   657  		// Note: We intentionally don't re-use the passed in MachineDeployment md here as that would
   658  		// overwrite any local changes we might have previously made to the MachineDeployment with the version
   659  		// we get here from the apiserver.
   660  		md := &clusterv1.MachineDeployment{}
   661  		if err := c.Get(ctx, mdObjectKey, md); err != nil {
   662  			return err
   663  		}
   664  		patchHelper, err := patch.NewHelper(md, c)
   665  		if err != nil {
   666  			return err
   667  		}
   668  		modify(md)
   669  		return patchHelper.Patch(ctx, md)
   670  	})
   671  }