k8s.io/kubernetes@v1.29.3/pkg/controller/deployment/rolling.go (about)

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package deployment
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sort"
    23  
    24  	apps "k8s.io/api/apps/v1"
    25  	"k8s.io/klog/v2"
    26  	"k8s.io/kubernetes/pkg/controller"
    27  	deploymentutil "k8s.io/kubernetes/pkg/controller/deployment/util"
    28  	"k8s.io/utils/integer"
    29  )
    30  
    31  // rolloutRolling implements the logic for rolling a new replica set.
    32  func (dc *DeploymentController) rolloutRolling(ctx context.Context, d *apps.Deployment, rsList []*apps.ReplicaSet) error {
    33  	newRS, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(ctx, d, rsList, true)
    34  	if err != nil {
    35  		return err
    36  	}
    37  	allRSs := append(oldRSs, newRS)
    38  
    39  	// Scale up, if we can.
    40  	scaledUp, err := dc.reconcileNewReplicaSet(ctx, allRSs, newRS, d)
    41  	if err != nil {
    42  		return err
    43  	}
    44  	if scaledUp {
    45  		// Update DeploymentStatus
    46  		return dc.syncRolloutStatus(ctx, allRSs, newRS, d)
    47  	}
    48  
    49  	// Scale down, if we can.
    50  	scaledDown, err := dc.reconcileOldReplicaSets(ctx, allRSs, controller.FilterActiveReplicaSets(oldRSs), newRS, d)
    51  	if err != nil {
    52  		return err
    53  	}
    54  	if scaledDown {
    55  		// Update DeploymentStatus
    56  		return dc.syncRolloutStatus(ctx, allRSs, newRS, d)
    57  	}
    58  
    59  	if deploymentutil.DeploymentComplete(d, &d.Status) {
    60  		if err := dc.cleanupDeployment(ctx, oldRSs, d); err != nil {
    61  			return err
    62  		}
    63  	}
    64  
    65  	// Sync deployment status
    66  	return dc.syncRolloutStatus(ctx, allRSs, newRS, d)
    67  }
    68  
    69  func (dc *DeploymentController) reconcileNewReplicaSet(ctx context.Context, allRSs []*apps.ReplicaSet, newRS *apps.ReplicaSet, deployment *apps.Deployment) (bool, error) {
    70  	if *(newRS.Spec.Replicas) == *(deployment.Spec.Replicas) {
    71  		// Scaling not required.
    72  		return false, nil
    73  	}
    74  	if *(newRS.Spec.Replicas) > *(deployment.Spec.Replicas) {
    75  		// Scale down.
    76  		scaled, _, err := dc.scaleReplicaSetAndRecordEvent(ctx, newRS, *(deployment.Spec.Replicas), deployment)
    77  		return scaled, err
    78  	}
    79  	newReplicasCount, err := deploymentutil.NewRSNewReplicas(deployment, allRSs, newRS)
    80  	if err != nil {
    81  		return false, err
    82  	}
    83  	scaled, _, err := dc.scaleReplicaSetAndRecordEvent(ctx, newRS, newReplicasCount, deployment)
    84  	return scaled, err
    85  }
    86  
    87  func (dc *DeploymentController) reconcileOldReplicaSets(ctx context.Context, allRSs []*apps.ReplicaSet, oldRSs []*apps.ReplicaSet, newRS *apps.ReplicaSet, deployment *apps.Deployment) (bool, error) {
    88  	logger := klog.FromContext(ctx)
    89  	oldPodsCount := deploymentutil.GetReplicaCountForReplicaSets(oldRSs)
    90  	if oldPodsCount == 0 {
    91  		// Can't scale down further
    92  		return false, nil
    93  	}
    94  	allPodsCount := deploymentutil.GetReplicaCountForReplicaSets(allRSs)
    95  	logger.V(4).Info("New replica set", "replicaSet", klog.KObj(newRS), "availableReplicas", newRS.Status.AvailableReplicas)
    96  	maxUnavailable := deploymentutil.MaxUnavailable(*deployment)
    97  
    98  	// Check if we can scale down. We can scale down in the following 2 cases:
    99  	// * Some old replica sets have unhealthy replicas, we could safely scale down those unhealthy replicas since that won't further
   100  	//  increase unavailability.
   101  	// * New replica set has scaled up and it's replicas becomes ready, then we can scale down old replica sets in a further step.
   102  	//
   103  	// maxScaledDown := allPodsCount - minAvailable - newReplicaSetPodsUnavailable
   104  	// take into account not only maxUnavailable and any surge pods that have been created, but also unavailable pods from
   105  	// the newRS, so that the unavailable pods from the newRS would not make us scale down old replica sets in a further
   106  	// step(that will increase unavailability).
   107  	//
   108  	// Concrete example:
   109  	//
   110  	// * 10 replicas
   111  	// * 2 maxUnavailable (absolute number, not percent)
   112  	// * 3 maxSurge (absolute number, not percent)
   113  	//
   114  	// case 1:
   115  	// * Deployment is updated, newRS is created with 3 replicas, oldRS is scaled down to 8, and newRS is scaled up to 5.
   116  	// * The new replica set pods crashloop and never become available.
   117  	// * allPodsCount is 13. minAvailable is 8. newRSPodsUnavailable is 5.
   118  	// * A node fails and causes one of the oldRS pods to become unavailable. However, 13 - 8 - 5 = 0, so the oldRS won't be scaled down.
   119  	// * The user notices the crashloop and does kubectl rollout undo to rollback.
   120  	// * newRSPodsUnavailable is 1, since we rolled back to the good replica set, so maxScaledDown = 13 - 8 - 1 = 4. 4 of the crashlooping pods will be scaled down.
   121  	// * The total number of pods will then be 9 and the newRS can be scaled up to 10.
   122  	//
   123  	// case 2:
   124  	// Same example, but pushing a new pod template instead of rolling back (aka "roll over"):
   125  	// * The new replica set created must start with 0 replicas because allPodsCount is already at 13.
   126  	// * However, newRSPodsUnavailable would also be 0, so the 2 old replica sets could be scaled down by 5 (13 - 8 - 0), which would then
   127  	// allow the new replica set to be scaled up by 5.
   128  	minAvailable := *(deployment.Spec.Replicas) - maxUnavailable
   129  	newRSUnavailablePodCount := *(newRS.Spec.Replicas) - newRS.Status.AvailableReplicas
   130  	maxScaledDown := allPodsCount - minAvailable - newRSUnavailablePodCount
   131  	if maxScaledDown <= 0 {
   132  		return false, nil
   133  	}
   134  
   135  	// Clean up unhealthy replicas first, otherwise unhealthy replicas will block deployment
   136  	// and cause timeout. See https://github.com/kubernetes/kubernetes/issues/16737
   137  	oldRSs, cleanupCount, err := dc.cleanupUnhealthyReplicas(ctx, oldRSs, deployment, maxScaledDown)
   138  	if err != nil {
   139  		return false, nil
   140  	}
   141  	logger.V(4).Info("Cleaned up unhealthy replicas from old RSes", "count", cleanupCount)
   142  
   143  	// Scale down old replica sets, need check maxUnavailable to ensure we can scale down
   144  	allRSs = append(oldRSs, newRS)
   145  	scaledDownCount, err := dc.scaleDownOldReplicaSetsForRollingUpdate(ctx, allRSs, oldRSs, deployment)
   146  	if err != nil {
   147  		return false, nil
   148  	}
   149  	logger.V(4).Info("Scaled down old RSes", "deployment", klog.KObj(deployment), "count", scaledDownCount)
   150  
   151  	totalScaledDown := cleanupCount + scaledDownCount
   152  	return totalScaledDown > 0, nil
   153  }
   154  
   155  // cleanupUnhealthyReplicas will scale down old replica sets with unhealthy replicas, so that all unhealthy replicas will be deleted.
   156  func (dc *DeploymentController) cleanupUnhealthyReplicas(ctx context.Context, oldRSs []*apps.ReplicaSet, deployment *apps.Deployment, maxCleanupCount int32) ([]*apps.ReplicaSet, int32, error) {
   157  	logger := klog.FromContext(ctx)
   158  	sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs))
   159  	// Safely scale down all old replica sets with unhealthy replicas. Replica set will sort the pods in the order
   160  	// such that not-ready < ready, unscheduled < scheduled, and pending < running. This ensures that unhealthy replicas will
   161  	// been deleted first and won't increase unavailability.
   162  	totalScaledDown := int32(0)
   163  	for i, targetRS := range oldRSs {
   164  		if totalScaledDown >= maxCleanupCount {
   165  			break
   166  		}
   167  		if *(targetRS.Spec.Replicas) == 0 {
   168  			// cannot scale down this replica set.
   169  			continue
   170  		}
   171  		logger.V(4).Info("Found available pods in old RS", "replicaSet", klog.KObj(targetRS), "availableReplicas", targetRS.Status.AvailableReplicas)
   172  		if *(targetRS.Spec.Replicas) == targetRS.Status.AvailableReplicas {
   173  			// no unhealthy replicas found, no scaling required.
   174  			continue
   175  		}
   176  
   177  		scaledDownCount := int32(integer.IntMin(int(maxCleanupCount-totalScaledDown), int(*(targetRS.Spec.Replicas)-targetRS.Status.AvailableReplicas)))
   178  		newReplicasCount := *(targetRS.Spec.Replicas) - scaledDownCount
   179  		if newReplicasCount > *(targetRS.Spec.Replicas) {
   180  			return nil, 0, fmt.Errorf("when cleaning up unhealthy replicas, got invalid request to scale down %s/%s %d -> %d", targetRS.Namespace, targetRS.Name, *(targetRS.Spec.Replicas), newReplicasCount)
   181  		}
   182  		_, updatedOldRS, err := dc.scaleReplicaSetAndRecordEvent(ctx, targetRS, newReplicasCount, deployment)
   183  		if err != nil {
   184  			return nil, totalScaledDown, err
   185  		}
   186  		totalScaledDown += scaledDownCount
   187  		oldRSs[i] = updatedOldRS
   188  	}
   189  	return oldRSs, totalScaledDown, nil
   190  }
   191  
   192  // scaleDownOldReplicaSetsForRollingUpdate scales down old replica sets when deployment strategy is "RollingUpdate".
   193  // Need check maxUnavailable to ensure availability
   194  func (dc *DeploymentController) scaleDownOldReplicaSetsForRollingUpdate(ctx context.Context, allRSs []*apps.ReplicaSet, oldRSs []*apps.ReplicaSet, deployment *apps.Deployment) (int32, error) {
   195  	logger := klog.FromContext(ctx)
   196  	maxUnavailable := deploymentutil.MaxUnavailable(*deployment)
   197  
   198  	// Check if we can scale down.
   199  	minAvailable := *(deployment.Spec.Replicas) - maxUnavailable
   200  	// Find the number of available pods.
   201  	availablePodCount := deploymentutil.GetAvailableReplicaCountForReplicaSets(allRSs)
   202  	if availablePodCount <= minAvailable {
   203  		// Cannot scale down.
   204  		return 0, nil
   205  	}
   206  	logger.V(4).Info("Found available pods in deployment, scaling down old RSes", "deployment", klog.KObj(deployment), "availableReplicas", availablePodCount)
   207  
   208  	sort.Sort(controller.ReplicaSetsByCreationTimestamp(oldRSs))
   209  
   210  	totalScaledDown := int32(0)
   211  	totalScaleDownCount := availablePodCount - minAvailable
   212  	for _, targetRS := range oldRSs {
   213  		if totalScaledDown >= totalScaleDownCount {
   214  			// No further scaling required.
   215  			break
   216  		}
   217  		if *(targetRS.Spec.Replicas) == 0 {
   218  			// cannot scale down this ReplicaSet.
   219  			continue
   220  		}
   221  		// Scale down.
   222  		scaleDownCount := int32(integer.IntMin(int(*(targetRS.Spec.Replicas)), int(totalScaleDownCount-totalScaledDown)))
   223  		newReplicasCount := *(targetRS.Spec.Replicas) - scaleDownCount
   224  		if newReplicasCount > *(targetRS.Spec.Replicas) {
   225  			return 0, fmt.Errorf("when scaling down old RS, got invalid request to scale down %s/%s %d -> %d", targetRS.Namespace, targetRS.Name, *(targetRS.Spec.Replicas), newReplicasCount)
   226  		}
   227  		_, _, err := dc.scaleReplicaSetAndRecordEvent(ctx, targetRS, newReplicasCount, deployment)
   228  		if err != nil {
   229  			return totalScaledDown, err
   230  		}
   231  
   232  		totalScaledDown += scaleDownCount
   233  	}
   234  
   235  	return totalScaledDown, nil
   236  }