k8s.io/kubernetes@v1.29.3/pkg/controller/daemon/util/daemonset_util.go

k8s.io/kubernetes@v1.29.3/pkg/controller/daemon/util/daemonset_util.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package util
    18  
    19  import (
    20  	"fmt"
    21  	"strconv"
    22  
    23  	apps "k8s.io/api/apps/v1"
    24  	v1 "k8s.io/api/core/v1"
    25  	extensions "k8s.io/api/extensions/v1beta1"
    26  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    27  	intstrutil "k8s.io/apimachinery/pkg/util/intstr"
    28  	v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
    29  )
    30  
    31  // GetTemplateGeneration gets the template generation associated with a v1.DaemonSet by extracting it from the
    32  // deprecated annotation. If no annotation is found nil is returned. If the annotation is found and fails to parse
    33  // nil is returned with an error. If the generation can be parsed from the annotation, a pointer to the parsed int64
    34  // value is returned.
    35  func GetTemplateGeneration(ds *apps.DaemonSet) (*int64, error) {
    36  	annotation, found := ds.Annotations[apps.DeprecatedTemplateGeneration]
    37  	if !found {
    38  		return nil, nil
    39  	}
    40  	generation, err := strconv.ParseInt(annotation, 10, 64)
    41  	if err != nil {
    42  		return nil, err
    43  	}
    44  	return &generation, nil
    45  }
    46  
    47  // AddOrUpdateDaemonPodTolerations apply necessary tolerations to DaemonSet Pods, e.g. node.kubernetes.io/not-ready:NoExecute.
    48  func AddOrUpdateDaemonPodTolerations(spec *v1.PodSpec) {
    49  	// DaemonSet pods shouldn't be deleted by NodeController in case of node problems.
    50  	// Add infinite toleration for taint notReady:NoExecute here
    51  	// to survive taint-based eviction enforced by NodeController
    52  	// when node turns not ready.
    53  	v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
    54  		Key:      v1.TaintNodeNotReady,
    55  		Operator: v1.TolerationOpExists,
    56  		Effect:   v1.TaintEffectNoExecute,
    57  	})
    58  
    59  	// DaemonSet pods shouldn't be deleted by NodeController in case of node problems.
    60  	// Add infinite toleration for taint unreachable:NoExecute here
    61  	// to survive taint-based eviction enforced by NodeController
    62  	// when node turns unreachable.
    63  	v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
    64  		Key:      v1.TaintNodeUnreachable,
    65  		Operator: v1.TolerationOpExists,
    66  		Effect:   v1.TaintEffectNoExecute,
    67  	})
    68  
    69  	// According to TaintNodesByCondition feature, all DaemonSet pods should tolerate
    70  	// MemoryPressure, DiskPressure, PIDPressure, Unschedulable and NetworkUnavailable taints.
    71  	v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
    72  		Key:      v1.TaintNodeDiskPressure,
    73  		Operator: v1.TolerationOpExists,
    74  		Effect:   v1.TaintEffectNoSchedule,
    75  	})
    76  
    77  	v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
    78  		Key:      v1.TaintNodeMemoryPressure,
    79  		Operator: v1.TolerationOpExists,
    80  		Effect:   v1.TaintEffectNoSchedule,
    81  	})
    82  
    83  	v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
    84  		Key:      v1.TaintNodePIDPressure,
    85  		Operator: v1.TolerationOpExists,
    86  		Effect:   v1.TaintEffectNoSchedule,
    87  	})
    88  
    89  	v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
    90  		Key:      v1.TaintNodeUnschedulable,
    91  		Operator: v1.TolerationOpExists,
    92  		Effect:   v1.TaintEffectNoSchedule,
    93  	})
    94  
    95  	if spec.HostNetwork {
    96  		v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
    97  			Key:      v1.TaintNodeNetworkUnavailable,
    98  			Operator: v1.TolerationOpExists,
    99  			Effect:   v1.TaintEffectNoSchedule,
   100  		})
   101  	}
   102  }
   103  
   104  // CreatePodTemplate returns copy of provided template with additional
   105  // label which contains templateGeneration (for backward compatibility),
   106  // hash of provided template and sets default daemon tolerations.
   107  func CreatePodTemplate(template v1.PodTemplateSpec, generation *int64, hash string) v1.PodTemplateSpec {
   108  	newTemplate := *template.DeepCopy()
   109  
   110  	AddOrUpdateDaemonPodTolerations(&newTemplate.Spec)
   111  
   112  	if newTemplate.ObjectMeta.Labels == nil {
   113  		newTemplate.ObjectMeta.Labels = make(map[string]string)
   114  	}
   115  	if generation != nil {
   116  		newTemplate.ObjectMeta.Labels[extensions.DaemonSetTemplateGenerationKey] = fmt.Sprint(*generation)
   117  	}
   118  	// TODO: do we need to validate if the DaemonSet is RollingUpdate or not?
   119  	if len(hash) > 0 {
   120  		newTemplate.ObjectMeta.Labels[extensions.DefaultDaemonSetUniqueLabelKey] = hash
   121  	}
   122  	return newTemplate
   123  }
   124  
   125  // AllowsSurge returns true if the daemonset allows more than a single pod on any node.
   126  func AllowsSurge(ds *apps.DaemonSet) bool {
   127  	maxSurge, err := SurgeCount(ds, 1)
   128  	return err == nil && maxSurge > 0
   129  }
   130  
   131  // SurgeCount returns 0 if surge is not requested, the expected surge number to allow
   132  // out of numberToSchedule if surge is configured, or an error if the surge percentage
   133  // requested is invalid.
   134  func SurgeCount(ds *apps.DaemonSet, numberToSchedule int) (int, error) {
   135  	if ds.Spec.UpdateStrategy.Type != apps.RollingUpdateDaemonSetStrategyType {
   136  		return 0, nil
   137  	}
   138  
   139  	r := ds.Spec.UpdateStrategy.RollingUpdate
   140  	if r == nil {
   141  		return 0, nil
   142  	}
   143  	// If surge is not requested, we should default to 0.
   144  	if r.MaxSurge == nil {
   145  		return 0, nil
   146  	}
   147  	return intstrutil.GetScaledValueFromIntOrPercent(r.MaxSurge, numberToSchedule, true)
   148  }
   149  
   150  // UnavailableCount returns 0 if unavailability is not requested, the expected
   151  // unavailability number to allow out of numberToSchedule if requested, or an error if
   152  // the unavailability percentage requested is invalid.
   153  func UnavailableCount(ds *apps.DaemonSet, numberToSchedule int) (int, error) {
   154  	if ds.Spec.UpdateStrategy.Type != apps.RollingUpdateDaemonSetStrategyType {
   155  		return 0, nil
   156  	}
   157  	r := ds.Spec.UpdateStrategy.RollingUpdate
   158  	if r == nil {
   159  		return 0, nil
   160  	}
   161  	return intstrutil.GetScaledValueFromIntOrPercent(r.MaxUnavailable, numberToSchedule, true)
   162  }
   163  
   164  // IsPodUpdated checks if pod contains label value that either matches templateGeneration or hash
   165  func IsPodUpdated(pod *v1.Pod, hash string, dsTemplateGeneration *int64) bool {
   166  	// Compare with hash to see if the pod is updated, need to maintain backward compatibility of templateGeneration
   167  	templateMatches := dsTemplateGeneration != nil &&
   168  		pod.Labels[extensions.DaemonSetTemplateGenerationKey] == fmt.Sprint(*dsTemplateGeneration)
   169  	hashMatches := len(hash) > 0 && pod.Labels[extensions.DefaultDaemonSetUniqueLabelKey] == hash
   170  	return hashMatches || templateMatches
   171  }
   172  
   173  // ReplaceDaemonSetPodNodeNameNodeAffinity replaces the RequiredDuringSchedulingIgnoredDuringExecution
   174  // NodeAffinity of the given affinity with a new NodeAffinity that selects the given nodeName.
   175  // Note that this function assumes that no NodeAffinity conflicts with the selected nodeName.
   176  func ReplaceDaemonSetPodNodeNameNodeAffinity(affinity *v1.Affinity, nodename string) *v1.Affinity {
   177  	nodeSelReq := v1.NodeSelectorRequirement{
   178  		Key:      metav1.ObjectNameField,
   179  		Operator: v1.NodeSelectorOpIn,
   180  		Values:   []string{nodename},
   181  	}
   182  
   183  	nodeSelector := &v1.NodeSelector{
   184  		NodeSelectorTerms: []v1.NodeSelectorTerm{
   185  			{
   186  				MatchFields: []v1.NodeSelectorRequirement{nodeSelReq},
   187  			},
   188  		},
   189  	}
   190  
   191  	if affinity == nil {
   192  		return &v1.Affinity{
   193  			NodeAffinity: &v1.NodeAffinity{
   194  				RequiredDuringSchedulingIgnoredDuringExecution: nodeSelector,
   195  			},
   196  		}
   197  	}
   198  
   199  	if affinity.NodeAffinity == nil {
   200  		affinity.NodeAffinity = &v1.NodeAffinity{
   201  			RequiredDuringSchedulingIgnoredDuringExecution: nodeSelector,
   202  		}
   203  		return affinity
   204  	}
   205  
   206  	nodeAffinity := affinity.NodeAffinity
   207  
   208  	if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
   209  		nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = nodeSelector
   210  		return affinity
   211  	}
   212  
   213  	// Replace node selector with the new one.
   214  	nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms = []v1.NodeSelectorTerm{
   215  		{
   216  			MatchFields: []v1.NodeSelectorRequirement{nodeSelReq},
   217  		},
   218  	}
   219  
   220  	return affinity
   221  }
   222  
   223  // GetTargetNodeName get the target node name of DaemonSet pods. If `.spec.NodeName` is not empty (nil),
   224  // return `.spec.NodeName`; otherwise, retrieve node name of pending pods from NodeAffinity. Return error
   225  // if failed to retrieve node name from `.spec.NodeName` and NodeAffinity.
   226  func GetTargetNodeName(pod *v1.Pod) (string, error) {
   227  	if len(pod.Spec.NodeName) != 0 {
   228  		return pod.Spec.NodeName, nil
   229  	}
   230  
   231  	// Retrieve node name of unscheduled pods from NodeAffinity
   232  	if pod.Spec.Affinity == nil ||
   233  		pod.Spec.Affinity.NodeAffinity == nil ||
   234  		pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
   235  		return "", fmt.Errorf("no spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution for pod %s/%s",
   236  			pod.Namespace, pod.Name)
   237  	}
   238  
   239  	terms := pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms
   240  	if len(terms) < 1 {
   241  		return "", fmt.Errorf("no nodeSelectorTerms in requiredDuringSchedulingIgnoredDuringExecution of pod %s/%s",
   242  			pod.Namespace, pod.Name)
   243  	}
   244  
   245  	for _, term := range terms {
   246  		for _, exp := range term.MatchFields {
   247  			if exp.Key == metav1.ObjectNameField &&
   248  				exp.Operator == v1.NodeSelectorOpIn {
   249  				if len(exp.Values) != 1 {
   250  					return "", fmt.Errorf("the matchFields value of '%s' is not unique for pod %s/%s",
   251  						metav1.ObjectNameField, pod.Namespace, pod.Name)
   252  				}
   253  
   254  				return exp.Values[0], nil
   255  			}
   256  		}
   257  	}
   258  
   259  	return "", fmt.Errorf("no node name found for pod %s/%s", pod.Namespace, pod.Name)
   260  }