k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/daemon/update.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package daemon
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"fmt"
    23  	"reflect"
    24  	"sort"
    25  
    26  	"k8s.io/klog/v2"
    27  
    28  	apps "k8s.io/api/apps/v1"
    29  	v1 "k8s.io/api/core/v1"
    30  	"k8s.io/apimachinery/pkg/api/errors"
    31  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    32  	"k8s.io/apimachinery/pkg/labels"
    33  	"k8s.io/apimachinery/pkg/runtime"
    34  	"k8s.io/apimachinery/pkg/types"
    35  	"k8s.io/apimachinery/pkg/util/json"
    36  	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
    37  	"k8s.io/kubernetes/pkg/controller"
    38  	"k8s.io/kubernetes/pkg/controller/daemon/util"
    39  	labelsutil "k8s.io/kubernetes/pkg/util/labels"
    40  )
    41  
    42  // rollingUpdate identifies the set of old pods to delete, or additional pods to create on nodes,
    43  // remaining within the constraints imposed by the update strategy.
    44  func (dsc *DaemonSetsController) rollingUpdate(ctx context.Context, ds *apps.DaemonSet, nodeList []*v1.Node, hash string) error {
    45  	logger := klog.FromContext(ctx)
    46  	nodeToDaemonPods, err := dsc.getNodesToDaemonPods(ctx, ds, false)
    47  	if err != nil {
    48  		return fmt.Errorf("couldn't get node to daemon pod mapping for daemon set %q: %v", ds.Name, err)
    49  	}
    50  	maxSurge, maxUnavailable, desiredNumberScheduled, err := dsc.updatedDesiredNodeCounts(ctx, ds, nodeList, nodeToDaemonPods)
    51  	if err != nil {
    52  		return fmt.Errorf("couldn't get unavailable numbers: %v", err)
    53  	}
    54  
    55  	now := dsc.failedPodsBackoff.Clock.Now()
    56  
    57  	// When not surging, we delete just enough pods to stay under the maxUnavailable limit, if any
    58  	// are necessary, and let the core loop create new instances on those nodes.
    59  	//
    60  	// Assumptions:
    61  	// * Expect manage loop to allow no more than one pod per node
    62  	// * Expect manage loop will create new pods
    63  	// * Expect manage loop will handle failed pods
    64  	// * Deleted pods do not count as unavailable so that updates make progress when nodes are down
    65  	// Invariants:
    66  	// * The number of new pods that are unavailable must be less than maxUnavailable
    67  	// * A node with an available old pod is a candidate for deletion if it does not violate other invariants
    68  	//
    69  	if maxSurge == 0 {
    70  		var numUnavailable int
    71  		var allowedReplacementPods []string
    72  		var candidatePodsToDelete []string
    73  		for nodeName, pods := range nodeToDaemonPods {
    74  			newPod, oldPod, ok := findUpdatedPodsOnNode(ds, pods, hash)
    75  			if !ok {
    76  				// let the manage loop clean up this node, and treat it as an unavailable node
    77  				logger.V(3).Info("DaemonSet has excess pods on node, skipping to allow the core loop to process", "daemonset", klog.KObj(ds), "node", klog.KRef("", nodeName))
    78  				numUnavailable++
    79  				continue
    80  			}
    81  			switch {
    82  			case oldPod == nil && newPod == nil, oldPod != nil && newPod != nil:
    83  				// the manage loop will handle creating or deleting the appropriate pod, consider this unavailable
    84  				numUnavailable++
    85  			case newPod != nil:
    86  				// this pod is up to date, check its availability
    87  				if !podutil.IsPodAvailable(newPod, ds.Spec.MinReadySeconds, metav1.Time{Time: now}) {
    88  					// an unavailable new pod is counted against maxUnavailable
    89  					numUnavailable++
    90  				}
    91  			default:
    92  				// this pod is old, it is an update candidate
    93  				switch {
    94  				case !podutil.IsPodAvailable(oldPod, ds.Spec.MinReadySeconds, metav1.Time{Time: now}):
    95  					// the old pod isn't available, so it needs to be replaced
    96  					logger.V(5).Info("DaemonSet pod on node is out of date and not available, allowing replacement", "daemonset", klog.KObj(ds), "pod", klog.KObj(oldPod), "node", klog.KRef("", nodeName))
    97  					// record the replacement
    98  					if allowedReplacementPods == nil {
    99  						allowedReplacementPods = make([]string, 0, len(nodeToDaemonPods))
   100  					}
   101  					allowedReplacementPods = append(allowedReplacementPods, oldPod.Name)
   102  					numUnavailable++
   103  				case numUnavailable >= maxUnavailable:
   104  					// no point considering any other candidates
   105  					continue
   106  				default:
   107  					logger.V(5).Info("DaemonSet pod on node is out of date, this is a candidate to replace", "daemonset", klog.KObj(ds), "pod", klog.KObj(oldPod), "node", klog.KRef("", nodeName))
   108  					// record the candidate
   109  					if candidatePodsToDelete == nil {
   110  						candidatePodsToDelete = make([]string, 0, maxUnavailable)
   111  					}
   112  					candidatePodsToDelete = append(candidatePodsToDelete, oldPod.Name)
   113  				}
   114  			}
   115  		}
   116  
   117  		// use any of the candidates we can, including the allowedReplacemnntPods
   118  		logger.V(5).Info("DaemonSet allowing replacements", "daemonset", klog.KObj(ds), "replacements", len(allowedReplacementPods), "maxUnavailable", maxUnavailable, "numUnavailable", numUnavailable, "candidates", len(candidatePodsToDelete))
   119  		remainingUnavailable := maxUnavailable - numUnavailable
   120  		if remainingUnavailable < 0 {
   121  			remainingUnavailable = 0
   122  		}
   123  		if max := len(candidatePodsToDelete); remainingUnavailable > max {
   124  			remainingUnavailable = max
   125  		}
   126  		oldPodsToDelete := append(allowedReplacementPods, candidatePodsToDelete[:remainingUnavailable]...)
   127  
   128  		return dsc.syncNodes(ctx, ds, oldPodsToDelete, nil, hash)
   129  	}
   130  
   131  	// When surging, we create new pods whenever an old pod is unavailable, and we can create up
   132  	// to maxSurge extra pods
   133  	//
   134  	// Assumptions:
   135  	// * Expect manage loop to allow no more than two pods per node, one old, one new
   136  	// * Expect manage loop will create new pods if there are no pods on node
   137  	// * Expect manage loop will handle failed pods
   138  	// * Deleted pods do not count as unavailable so that updates make progress when nodes are down
   139  	// Invariants:
   140  	// * A node with an unavailable old pod is a candidate for immediate new pod creation
   141  	// * An old available pod is deleted if a new pod is available
   142  	// * No more than maxSurge new pods are created for old available pods at any one time
   143  	//
   144  	var oldPodsToDelete []string          // these pods are already updated or unavailable on sunsetted node
   145  	var shouldNotRunPodsToDelete []string // candidate pods to be deleted on sunsetted nodes
   146  	var candidateNewNodes []string
   147  	var allowedNewNodes []string
   148  	var numSurge int
   149  	var numAvailable int
   150  
   151  	for nodeName, pods := range nodeToDaemonPods {
   152  		newPod, oldPod, ok := findUpdatedPodsOnNode(ds, pods, hash)
   153  		if !ok {
   154  			// let the manage loop clean up this node, and treat it as a surge node
   155  			logger.V(3).Info("DaemonSet has excess pods on node, skipping to allow the core loop to process", "daemonset", klog.KObj(ds), "node", klog.KRef("", nodeName))
   156  			numSurge++
   157  			continue
   158  		}
   159  
   160  		// first count availability for all the nodes (even the ones that we are sunsetting due to scheduling constraints)
   161  		if oldPod != nil {
   162  			if podutil.IsPodAvailable(oldPod, ds.Spec.MinReadySeconds, metav1.Time{Time: now}) {
   163  				numAvailable++
   164  			}
   165  		} else if newPod != nil {
   166  			if podutil.IsPodAvailable(newPod, ds.Spec.MinReadySeconds, metav1.Time{Time: now}) {
   167  				numAvailable++
   168  			}
   169  		}
   170  
   171  		switch {
   172  		case oldPod == nil:
   173  			// we don't need to do anything to this node, the manage loop will handle it
   174  		case newPod == nil:
   175  			// this is a surge candidate
   176  			switch {
   177  			case !podutil.IsPodAvailable(oldPod, ds.Spec.MinReadySeconds, metav1.Time{Time: now}):
   178  				node, err := dsc.nodeLister.Get(nodeName)
   179  				if err != nil {
   180  					return fmt.Errorf("couldn't get node for nodeName %q: %v", nodeName, err)
   181  				}
   182  				if shouldRun, _ := NodeShouldRunDaemonPod(node, ds); !shouldRun {
   183  					logger.V(5).Info("DaemonSet pod on node is not available and does not match scheduling constraints, remove old pod", "daemonset", klog.KObj(ds), "node", nodeName, "oldPod", klog.KObj(oldPod))
   184  					oldPodsToDelete = append(oldPodsToDelete, oldPod.Name)
   185  					continue
   186  				}
   187  				// the old pod isn't available, allow it to become a replacement
   188  				logger.V(5).Info("Pod on node is out of date and not available, allowing replacement", "daemonset", klog.KObj(ds), "pod", klog.KObj(oldPod), "node", klog.KRef("", nodeName))
   189  				// record the replacement
   190  				if allowedNewNodes == nil {
   191  					allowedNewNodes = make([]string, 0, len(nodeToDaemonPods))
   192  				}
   193  				allowedNewNodes = append(allowedNewNodes, nodeName)
   194  			default:
   195  				node, err := dsc.nodeLister.Get(nodeName)
   196  				if err != nil {
   197  					return fmt.Errorf("couldn't get node for nodeName %q: %v", nodeName, err)
   198  				}
   199  				if shouldRun, _ := NodeShouldRunDaemonPod(node, ds); !shouldRun {
   200  					shouldNotRunPodsToDelete = append(shouldNotRunPodsToDelete, oldPod.Name)
   201  					continue
   202  				}
   203  				if numSurge >= maxSurge {
   204  					// no point considering any other candidates
   205  					continue
   206  				}
   207  				logger.V(5).Info("DaemonSet pod on node is out of date, this is a surge candidate", "daemonset", klog.KObj(ds), "pod", klog.KObj(oldPod), "node", klog.KRef("", nodeName))
   208  				// record the candidate
   209  				if candidateNewNodes == nil {
   210  					candidateNewNodes = make([]string, 0, maxSurge)
   211  				}
   212  				candidateNewNodes = append(candidateNewNodes, nodeName)
   213  			}
   214  		default:
   215  			// we have already surged onto this node, determine our state
   216  			if !podutil.IsPodAvailable(newPod, ds.Spec.MinReadySeconds, metav1.Time{Time: now}) {
   217  				// we're waiting to go available here
   218  				numSurge++
   219  				continue
   220  			}
   221  			// we're available, delete the old pod
   222  			logger.V(5).Info("DaemonSet pod on node is available, remove old pod", "daemonset", klog.KObj(ds), "newPod", klog.KObj(newPod), "node", nodeName, "oldPod", klog.KObj(oldPod))
   223  			oldPodsToDelete = append(oldPodsToDelete, oldPod.Name)
   224  		}
   225  	}
   226  
   227  	// use any of the candidates we can, including the allowedNewNodes
   228  	logger.V(5).Info("DaemonSet allowing replacements", "daemonset", klog.KObj(ds), "replacements", len(allowedNewNodes), "maxSurge", maxSurge, "numSurge", numSurge, "candidates", len(candidateNewNodes))
   229  	remainingSurge := maxSurge - numSurge
   230  
   231  	// With maxSurge, the application owner expects 100% availability.
   232  	// When the scheduling constraint change from node A to node B, we do not want the application to stay
   233  	// without any available pods. Only delete a pod on node A when a pod on node B becomes available.
   234  	if deletablePodsNumber := numAvailable - desiredNumberScheduled; deletablePodsNumber > 0 {
   235  		if shouldNotRunPodsToDeleteNumber := len(shouldNotRunPodsToDelete); deletablePodsNumber > shouldNotRunPodsToDeleteNumber {
   236  			deletablePodsNumber = shouldNotRunPodsToDeleteNumber
   237  		}
   238  		for _, podToDeleteName := range shouldNotRunPodsToDelete[:deletablePodsNumber] {
   239  			podToDelete, err := dsc.podLister.Pods(ds.Namespace).Get(podToDeleteName)
   240  			if err != nil {
   241  				if errors.IsNotFound(err) {
   242  					continue
   243  				}
   244  				return fmt.Errorf("couldn't get pod which should be deleted due to scheduling constraints %q: %v", podToDeleteName, err)
   245  			}
   246  			logger.V(5).Info("DaemonSet pod on node should be deleted due to scheduling constraints", "daemonset", klog.KObj(ds), "pod", klog.KObj(podToDelete), "node", podToDelete.Spec.NodeName)
   247  			oldPodsToDelete = append(oldPodsToDelete, podToDeleteName)
   248  		}
   249  	}
   250  
   251  	if remainingSurge < 0 {
   252  		remainingSurge = 0
   253  	}
   254  	if max := len(candidateNewNodes); remainingSurge > max {
   255  		remainingSurge = max
   256  	}
   257  	newNodesToCreate := append(allowedNewNodes, candidateNewNodes[:remainingSurge]...)
   258  
   259  	return dsc.syncNodes(ctx, ds, oldPodsToDelete, newNodesToCreate, hash)
   260  }
   261  
   262  // findUpdatedPodsOnNode looks at non-deleted pods on a given node and returns true if there
   263  // is at most one of each old and new pods, or false if there are multiples. We can skip
   264  // processing the particular node in those scenarios and let the manage loop prune the
   265  // excess pods for our next time around.
   266  func findUpdatedPodsOnNode(ds *apps.DaemonSet, podsOnNode []*v1.Pod, hash string) (newPod, oldPod *v1.Pod, ok bool) {
   267  	for _, pod := range podsOnNode {
   268  		if pod.DeletionTimestamp != nil {
   269  			continue
   270  		}
   271  		generation, err := util.GetTemplateGeneration(ds)
   272  		if err != nil {
   273  			generation = nil
   274  		}
   275  		if util.IsPodUpdated(pod, hash, generation) {
   276  			if newPod != nil {
   277  				return nil, nil, false
   278  			}
   279  			newPod = pod
   280  		} else {
   281  			if oldPod != nil {
   282  				return nil, nil, false
   283  			}
   284  			oldPod = pod
   285  		}
   286  	}
   287  	return newPod, oldPod, true
   288  }
   289  
   290  // constructHistory finds all histories controlled by the given DaemonSet, and
   291  // update current history revision number, or create current history if need to.
   292  // It also deduplicates current history, and adds missing unique labels to existing histories.
   293  func (dsc *DaemonSetsController) constructHistory(ctx context.Context, ds *apps.DaemonSet) (cur *apps.ControllerRevision, old []*apps.ControllerRevision, err error) {
   294  	var histories []*apps.ControllerRevision
   295  	var currentHistories []*apps.ControllerRevision
   296  	histories, err = dsc.controlledHistories(ctx, ds)
   297  	if err != nil {
   298  		return nil, nil, err
   299  	}
   300  	for _, history := range histories {
   301  		// Add the unique label if it's not already added to the history
   302  		// We use history name instead of computing hash, so that we don't need to worry about hash collision
   303  		if _, ok := history.Labels[apps.DefaultDaemonSetUniqueLabelKey]; !ok {
   304  			toUpdate := history.DeepCopy()
   305  			toUpdate.Labels[apps.DefaultDaemonSetUniqueLabelKey] = toUpdate.Name
   306  			history, err = dsc.kubeClient.AppsV1().ControllerRevisions(ds.Namespace).Update(ctx, toUpdate, metav1.UpdateOptions{})
   307  			if err != nil {
   308  				return nil, nil, err
   309  			}
   310  		}
   311  		// Compare histories with ds to separate cur and old history
   312  		found := false
   313  		found, err = Match(ds, history)
   314  		if err != nil {
   315  			return nil, nil, err
   316  		}
   317  		if found {
   318  			currentHistories = append(currentHistories, history)
   319  		} else {
   320  			old = append(old, history)
   321  		}
   322  	}
   323  
   324  	currRevision := maxRevision(old) + 1
   325  	switch len(currentHistories) {
   326  	case 0:
   327  		// Create a new history if the current one isn't found
   328  		cur, err = dsc.snapshot(ctx, ds, currRevision)
   329  		if err != nil {
   330  			return nil, nil, err
   331  		}
   332  	default:
   333  		cur, err = dsc.dedupCurHistories(ctx, ds, currentHistories)
   334  		if err != nil {
   335  			return nil, nil, err
   336  		}
   337  		// Update revision number if necessary
   338  		if cur.Revision < currRevision {
   339  			toUpdate := cur.DeepCopy()
   340  			toUpdate.Revision = currRevision
   341  			_, err = dsc.kubeClient.AppsV1().ControllerRevisions(ds.Namespace).Update(ctx, toUpdate, metav1.UpdateOptions{})
   342  			if err != nil {
   343  				return nil, nil, err
   344  			}
   345  		}
   346  	}
   347  	return cur, old, err
   348  }
   349  
   350  func (dsc *DaemonSetsController) cleanupHistory(ctx context.Context, ds *apps.DaemonSet, old []*apps.ControllerRevision) error {
   351  	// Include deleted terminal pods when maintaining history.
   352  	nodesToDaemonPods, err := dsc.getNodesToDaemonPods(ctx, ds, true)
   353  	if err != nil {
   354  		return fmt.Errorf("couldn't get node to daemon pod mapping for daemon set %q: %v", ds.Name, err)
   355  	}
   356  
   357  	toKeep := int(*ds.Spec.RevisionHistoryLimit)
   358  	toKill := len(old) - toKeep
   359  	if toKill <= 0 {
   360  		return nil
   361  	}
   362  
   363  	// Find all hashes of live pods
   364  	liveHashes := make(map[string]bool)
   365  	for _, pods := range nodesToDaemonPods {
   366  		for _, pod := range pods {
   367  			if hash := pod.Labels[apps.DefaultDaemonSetUniqueLabelKey]; len(hash) > 0 {
   368  				liveHashes[hash] = true
   369  			}
   370  		}
   371  	}
   372  
   373  	// Clean up old history from smallest to highest revision (from oldest to newest)
   374  	sort.Sort(historiesByRevision(old))
   375  	for _, history := range old {
   376  		if toKill <= 0 {
   377  			break
   378  		}
   379  		if hash := history.Labels[apps.DefaultDaemonSetUniqueLabelKey]; liveHashes[hash] {
   380  			continue
   381  		}
   382  		// Clean up
   383  		err := dsc.kubeClient.AppsV1().ControllerRevisions(ds.Namespace).Delete(ctx, history.Name, metav1.DeleteOptions{})
   384  		if err != nil {
   385  			return err
   386  		}
   387  		toKill--
   388  	}
   389  	return nil
   390  }
   391  
   392  // maxRevision returns the max revision number of the given list of histories
   393  func maxRevision(histories []*apps.ControllerRevision) int64 {
   394  	max := int64(0)
   395  	for _, history := range histories {
   396  		if history.Revision > max {
   397  			max = history.Revision
   398  		}
   399  	}
   400  	return max
   401  }
   402  
   403  func (dsc *DaemonSetsController) dedupCurHistories(ctx context.Context, ds *apps.DaemonSet, curHistories []*apps.ControllerRevision) (*apps.ControllerRevision, error) {
   404  	if len(curHistories) == 1 {
   405  		return curHistories[0], nil
   406  	}
   407  	var maxRevision int64
   408  	var keepCur *apps.ControllerRevision
   409  	for _, cur := range curHistories {
   410  		if cur.Revision >= maxRevision {
   411  			keepCur = cur
   412  			maxRevision = cur.Revision
   413  		}
   414  	}
   415  	// Relabel pods before dedup
   416  	pods, err := dsc.getDaemonPods(ctx, ds)
   417  	if err != nil {
   418  		return nil, err
   419  	}
   420  	for _, pod := range pods {
   421  		if pod.Labels[apps.DefaultDaemonSetUniqueLabelKey] != keepCur.Labels[apps.DefaultDaemonSetUniqueLabelKey] {
   422  			patchRaw := map[string]interface{}{
   423  				"metadata": map[string]interface{}{
   424  					"labels": map[string]interface{}{
   425  						apps.DefaultDaemonSetUniqueLabelKey: keepCur.Labels[apps.DefaultDaemonSetUniqueLabelKey],
   426  					},
   427  				},
   428  			}
   429  			patchJSON, err := json.Marshal(patchRaw)
   430  			if err != nil {
   431  				return nil, err
   432  			}
   433  			_, err = dsc.kubeClient.CoreV1().Pods(ds.Namespace).Patch(ctx, pod.Name, types.MergePatchType, patchJSON, metav1.PatchOptions{})
   434  			if err != nil {
   435  				return nil, err
   436  			}
   437  		}
   438  	}
   439  	// Clean up duplicates
   440  	for _, cur := range curHistories {
   441  		if cur.Name == keepCur.Name {
   442  			continue
   443  		}
   444  		// Remove duplicates
   445  		err = dsc.kubeClient.AppsV1().ControllerRevisions(ds.Namespace).Delete(ctx, cur.Name, metav1.DeleteOptions{})
   446  		if err != nil {
   447  			return nil, err
   448  		}
   449  	}
   450  	return keepCur, nil
   451  }
   452  
   453  // controlledHistories returns all ControllerRevisions controlled by the given DaemonSet.
   454  // This also reconciles ControllerRef by adopting/orphaning.
   455  // Note that returned histories are pointers to objects in the cache.
   456  // If you want to modify one, you need to deep-copy it first.
   457  func (dsc *DaemonSetsController) controlledHistories(ctx context.Context, ds *apps.DaemonSet) ([]*apps.ControllerRevision, error) {
   458  	selector, err := metav1.LabelSelectorAsSelector(ds.Spec.Selector)
   459  	if err != nil {
   460  		return nil, err
   461  	}
   462  
   463  	// List all histories to include those that don't match the selector anymore
   464  	// but have a ControllerRef pointing to the controller.
   465  	histories, err := dsc.historyLister.ControllerRevisions(ds.Namespace).List(labels.Everything())
   466  	if err != nil {
   467  		return nil, err
   468  	}
   469  	// If any adoptions are attempted, we should first recheck for deletion with
   470  	// an uncached quorum read sometime after listing Pods (see #42639).
   471  	canAdoptFunc := controller.RecheckDeletionTimestamp(func(ctx context.Context) (metav1.Object, error) {
   472  		fresh, err := dsc.kubeClient.AppsV1().DaemonSets(ds.Namespace).Get(ctx, ds.Name, metav1.GetOptions{})
   473  		if err != nil {
   474  			return nil, err
   475  		}
   476  		if fresh.UID != ds.UID {
   477  			return nil, fmt.Errorf("original DaemonSet %v/%v is gone: got uid %v, wanted %v", ds.Namespace, ds.Name, fresh.UID, ds.UID)
   478  		}
   479  		return fresh, nil
   480  	})
   481  	// Use ControllerRefManager to adopt/orphan as needed.
   482  	cm := controller.NewControllerRevisionControllerRefManager(dsc.crControl, ds, selector, controllerKind, canAdoptFunc)
   483  	return cm.ClaimControllerRevisions(ctx, histories)
   484  }
   485  
   486  // Match check if the given DaemonSet's template matches the template stored in the given history.
   487  func Match(ds *apps.DaemonSet, history *apps.ControllerRevision) (bool, error) {
   488  	patch, err := getPatch(ds)
   489  	if err != nil {
   490  		return false, err
   491  	}
   492  	return bytes.Equal(patch, history.Data.Raw), nil
   493  }
   494  
   495  // getPatch returns a strategic merge patch that can be applied to restore a Daemonset to a
   496  // previous version. If the returned error is nil the patch is valid. The current state that we save is just the
   497  // PodSpecTemplate. We can modify this later to encompass more state (or less) and remain compatible with previously
   498  // recorded patches.
   499  func getPatch(ds *apps.DaemonSet) ([]byte, error) {
   500  	dsBytes, err := json.Marshal(ds)
   501  	if err != nil {
   502  		return nil, err
   503  	}
   504  	var raw map[string]interface{}
   505  	err = json.Unmarshal(dsBytes, &raw)
   506  	if err != nil {
   507  		return nil, err
   508  	}
   509  	objCopy := make(map[string]interface{})
   510  	specCopy := make(map[string]interface{})
   511  
   512  	// Create a patch of the DaemonSet that replaces spec.template
   513  	spec := raw["spec"].(map[string]interface{})
   514  	template := spec["template"].(map[string]interface{})
   515  	specCopy["template"] = template
   516  	template["$patch"] = "replace"
   517  	objCopy["spec"] = specCopy
   518  	patch, err := json.Marshal(objCopy)
   519  	return patch, err
   520  }
   521  
   522  func (dsc *DaemonSetsController) snapshot(ctx context.Context, ds *apps.DaemonSet, revision int64) (*apps.ControllerRevision, error) {
   523  	patch, err := getPatch(ds)
   524  	if err != nil {
   525  		return nil, err
   526  	}
   527  	hash := controller.ComputeHash(&ds.Spec.Template, ds.Status.CollisionCount)
   528  	name := ds.Name + "-" + hash
   529  	history := &apps.ControllerRevision{
   530  		ObjectMeta: metav1.ObjectMeta{
   531  			Name:            name,
   532  			Namespace:       ds.Namespace,
   533  			Labels:          labelsutil.CloneAndAddLabel(ds.Spec.Template.Labels, apps.DefaultDaemonSetUniqueLabelKey, hash),
   534  			Annotations:     ds.Annotations,
   535  			OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(ds, controllerKind)},
   536  		},
   537  		Data:     runtime.RawExtension{Raw: patch},
   538  		Revision: revision,
   539  	}
   540  
   541  	history, err = dsc.kubeClient.AppsV1().ControllerRevisions(ds.Namespace).Create(ctx, history, metav1.CreateOptions{})
   542  	if outerErr := err; errors.IsAlreadyExists(outerErr) {
   543  		logger := klog.FromContext(ctx)
   544  		// TODO: Is it okay to get from historyLister?
   545  		existedHistory, getErr := dsc.kubeClient.AppsV1().ControllerRevisions(ds.Namespace).Get(ctx, name, metav1.GetOptions{})
   546  		if getErr != nil {
   547  			return nil, getErr
   548  		}
   549  		// Check if we already created it
   550  		done, matchErr := Match(ds, existedHistory)
   551  		if matchErr != nil {
   552  			return nil, matchErr
   553  		}
   554  		if done {
   555  			return existedHistory, nil
   556  		}
   557  
   558  		// Handle name collisions between different history
   559  		// Get the latest DaemonSet from the API server to make sure collision count is only increased when necessary
   560  		currDS, getErr := dsc.kubeClient.AppsV1().DaemonSets(ds.Namespace).Get(ctx, ds.Name, metav1.GetOptions{})
   561  		if getErr != nil {
   562  			return nil, getErr
   563  		}
   564  		// If the collision count used to compute hash was in fact stale, there's no need to bump collision count; retry again
   565  		if !reflect.DeepEqual(currDS.Status.CollisionCount, ds.Status.CollisionCount) {
   566  			return nil, fmt.Errorf("found a stale collision count (%d, expected %d) of DaemonSet %q while processing; will retry until it is updated", ds.Status.CollisionCount, currDS.Status.CollisionCount, ds.Name)
   567  		}
   568  		if currDS.Status.CollisionCount == nil {
   569  			currDS.Status.CollisionCount = new(int32)
   570  		}
   571  		*currDS.Status.CollisionCount++
   572  		_, updateErr := dsc.kubeClient.AppsV1().DaemonSets(ds.Namespace).UpdateStatus(ctx, currDS, metav1.UpdateOptions{})
   573  		if updateErr != nil {
   574  			return nil, updateErr
   575  		}
   576  		logger.V(2).Info("Found a hash collision for DaemonSet - bumping collisionCount to resolve it", "daemonset", klog.KObj(ds), "collisionCount", *currDS.Status.CollisionCount)
   577  		return nil, outerErr
   578  	}
   579  	return history, err
   580  }
   581  
   582  // updatedDesiredNodeCounts calculates the true number of allowed surge, unavailable or desired scheduled pods and
   583  // updates the nodeToDaemonPods array to include an empty array for every node that is not scheduled.
   584  func (dsc *DaemonSetsController) updatedDesiredNodeCounts(ctx context.Context, ds *apps.DaemonSet, nodeList []*v1.Node, nodeToDaemonPods map[string][]*v1.Pod) (int, int, int, error) {
   585  	var desiredNumberScheduled int
   586  	logger := klog.FromContext(ctx)
   587  	for i := range nodeList {
   588  		node := nodeList[i]
   589  		wantToRun, _ := NodeShouldRunDaemonPod(node, ds)
   590  		if !wantToRun {
   591  			continue
   592  		}
   593  		desiredNumberScheduled++
   594  
   595  		if _, exists := nodeToDaemonPods[node.Name]; !exists {
   596  			nodeToDaemonPods[node.Name] = nil
   597  		}
   598  	}
   599  
   600  	maxUnavailable, err := util.UnavailableCount(ds, desiredNumberScheduled)
   601  	if err != nil {
   602  		return -1, -1, -1, fmt.Errorf("invalid value for MaxUnavailable: %v", err)
   603  	}
   604  
   605  	maxSurge, err := util.SurgeCount(ds, desiredNumberScheduled)
   606  	if err != nil {
   607  		return -1, -1, -1, fmt.Errorf("invalid value for MaxSurge: %v", err)
   608  	}
   609  
   610  	// if the daemonset returned with an impossible configuration, obey the default of unavailable=1 (in the
   611  	// event the apiserver returns 0 for both surge and unavailability)
   612  	if desiredNumberScheduled > 0 && maxUnavailable == 0 && maxSurge == 0 {
   613  		logger.Info("DaemonSet is not configured for surge or unavailability, defaulting to accepting unavailability", "daemonset", klog.KObj(ds))
   614  		maxUnavailable = 1
   615  	}
   616  	logger.V(5).Info("DaemonSet with maxSurge and maxUnavailable", "daemonset", klog.KObj(ds), "maxSurge", maxSurge, "maxUnavailable", maxUnavailable)
   617  	return maxSurge, maxUnavailable, desiredNumberScheduled, nil
   618  }
   619  
   620  type historiesByRevision []*apps.ControllerRevision
   621  
   622  func (h historiesByRevision) Len() int      { return len(h) }
   623  func (h historiesByRevision) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
   624  func (h historiesByRevision) Less(i, j int) bool {
   625  	return h[i].Revision < h[j].Revision
   626  }