k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/kubelet/nodestatus/setters.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package nodestatus
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"math"
    23  	"net"
    24  	goruntime "runtime"
    25  	"strings"
    26  	"time"
    27  
    28  	cadvisorapiv1 "github.com/google/cadvisor/info/v1"
    29  
    30  	v1 "k8s.io/api/core/v1"
    31  	"k8s.io/apimachinery/pkg/api/resource"
    32  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    33  	"k8s.io/apimachinery/pkg/util/errors"
    34  	utilnet "k8s.io/apimachinery/pkg/util/net"
    35  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    36  	cloudprovider "k8s.io/cloud-provider"
    37  	cloudproviderapi "k8s.io/cloud-provider/api"
    38  	cloudprovidernodeutil "k8s.io/cloud-provider/node/helpers"
    39  	"k8s.io/component-base/version"
    40  	v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
    41  	"k8s.io/kubernetes/pkg/features"
    42  	"k8s.io/kubernetes/pkg/kubelet/cadvisor"
    43  	"k8s.io/kubernetes/pkg/kubelet/cm"
    44  	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
    45  	"k8s.io/kubernetes/pkg/kubelet/events"
    46  	netutils "k8s.io/utils/net"
    47  
    48  	"k8s.io/klog/v2"
    49  )
    50  
    51  const (
    52  	// MaxNamesPerImageInNodeStatus is max number of names
    53  	// per image stored in the node status.
    54  	MaxNamesPerImageInNodeStatus = 5
    55  )
    56  
    57  // Setter modifies the node in-place, and returns an error if the modification failed.
    58  // Setters may partially mutate the node before returning an error.
    59  type Setter func(ctx context.Context, node *v1.Node) error
    60  
    61  // NodeAddress returns a Setter that updates address-related information on the node.
    62  func NodeAddress(nodeIPs []net.IP, // typically Kubelet.nodeIPs
    63  	validateNodeIPFunc func(net.IP) error, // typically Kubelet.nodeIPValidator
    64  	hostname string, // typically Kubelet.hostname
    65  	hostnameOverridden bool, // was the hostname force set?
    66  	externalCloudProvider bool, // typically Kubelet.externalCloudProvider
    67  	cloud cloudprovider.Interface, // typically Kubelet.cloud
    68  	nodeAddressesFunc func() ([]v1.NodeAddress, error), // typically Kubelet.cloudResourceSyncManager.NodeAddresses
    69  ) Setter {
    70  	var nodeIP, secondaryNodeIP net.IP
    71  	if len(nodeIPs) > 0 {
    72  		nodeIP = nodeIPs[0]
    73  	}
    74  	preferIPv4 := nodeIP == nil || nodeIP.To4() != nil
    75  	isPreferredIPFamily := func(ip net.IP) bool { return (ip.To4() != nil) == preferIPv4 }
    76  	nodeIPSpecified := nodeIP != nil && !nodeIP.IsUnspecified()
    77  
    78  	if len(nodeIPs) > 1 {
    79  		secondaryNodeIP = nodeIPs[1]
    80  	}
    81  	secondaryNodeIPSpecified := secondaryNodeIP != nil && !secondaryNodeIP.IsUnspecified()
    82  
    83  	return func(ctx context.Context, node *v1.Node) error {
    84  		if nodeIPSpecified {
    85  			if err := validateNodeIPFunc(nodeIP); err != nil {
    86  				return fmt.Errorf("failed to validate nodeIP: %v", err)
    87  			}
    88  			klog.V(4).InfoS("Using node IP", "IP", nodeIP.String())
    89  		}
    90  		if secondaryNodeIPSpecified {
    91  			if err := validateNodeIPFunc(secondaryNodeIP); err != nil {
    92  				return fmt.Errorf("failed to validate secondaryNodeIP: %v", err)
    93  			}
    94  			klog.V(4).InfoS("Using secondary node IP", "IP", secondaryNodeIP.String())
    95  		}
    96  
    97  		if (externalCloudProvider || cloud != nil) && nodeIPSpecified {
    98  			// Annotate the Node object with nodeIP for external cloud provider.
    99  			//
   100  			// We do this even when external CCM is not configured to cover a situation
   101  			// during migration from legacy to external CCM: when CCM is running the
   102  			// node controller in the cluster but kubelet is still running the in-tree
   103  			// provider. Adding this annotation in all cases ensures that while
   104  			// Addresses flap between the competing controllers, they at least flap
   105  			// consistently.
   106  			//
   107  			// We do not add the annotation in the case where there is no cloud
   108  			// controller at all, as we don't expect to migrate these clusters to use an
   109  			// external CCM.
   110  			if node.ObjectMeta.Annotations == nil {
   111  				node.ObjectMeta.Annotations = make(map[string]string)
   112  			}
   113  			annotation := nodeIP.String()
   114  			if secondaryNodeIPSpecified {
   115  				annotation += "," + secondaryNodeIP.String()
   116  			}
   117  			node.ObjectMeta.Annotations[cloudproviderapi.AnnotationAlphaProvidedIPAddr] = annotation
   118  		} else if node.ObjectMeta.Annotations != nil {
   119  			// Clean up stale annotations if no longer using a cloud provider or
   120  			// no longer overriding node IP.
   121  			delete(node.ObjectMeta.Annotations, cloudproviderapi.AnnotationAlphaProvidedIPAddr)
   122  		}
   123  
   124  		if externalCloudProvider {
   125  			// If --cloud-provider=external and node address is already set,
   126  			// then we return early because provider set addresses should take precedence.
   127  			// Otherwise, we try to use the node IP defined via flags and let the cloud provider override it later
   128  			// This should alleviate a lot of the bootstrapping issues with out-of-tree providers
   129  			if len(node.Status.Addresses) > 0 {
   130  				return nil
   131  			}
   132  			// If nodeIPs are not specified wait for the external cloud-provider to set the node addresses.
   133  			// Otherwise uses them on the assumption that the installer/administrator has the previous knowledge
   134  			// required to ensure the external cloud provider will use the same addresses to avoid the issues explained
   135  			// in https://github.com/kubernetes/kubernetes/issues/120720.
   136  			// We are already hinting the external cloud provider via the annotation AnnotationAlphaProvidedIPAddr.
   137  			if !nodeIPSpecified {
   138  				node.Status.Addresses = []v1.NodeAddress{
   139  					{Type: v1.NodeHostName, Address: hostname},
   140  				}
   141  				return nil
   142  			}
   143  		}
   144  		if cloud != nil {
   145  			cloudNodeAddresses, err := nodeAddressesFunc()
   146  			if err != nil {
   147  				return err
   148  			}
   149  
   150  			nodeAddresses, err := cloudprovidernodeutil.GetNodeAddressesFromNodeIPLegacy(nodeIP, cloudNodeAddresses)
   151  			if err != nil {
   152  				return err
   153  			}
   154  
   155  			switch {
   156  			case len(cloudNodeAddresses) == 0:
   157  				// the cloud provider didn't specify any addresses
   158  				nodeAddresses = append(nodeAddresses, v1.NodeAddress{Type: v1.NodeHostName, Address: hostname})
   159  
   160  			case !hasAddressType(cloudNodeAddresses, v1.NodeHostName) && hasAddressValue(cloudNodeAddresses, hostname):
   161  				// the cloud provider didn't specify an address of type Hostname,
   162  				// but the auto-detected hostname matched an address reported by the cloud provider,
   163  				// so we can add it and count on the value being verifiable via cloud provider metadata
   164  				nodeAddresses = append(nodeAddresses, v1.NodeAddress{Type: v1.NodeHostName, Address: hostname})
   165  
   166  			case hostnameOverridden:
   167  				// the hostname was force-set via flag/config.
   168  				// this means the hostname might not be able to be validated via cloud provider metadata,
   169  				// but was a choice by the kubelet deployer we should honor
   170  				var existingHostnameAddress *v1.NodeAddress
   171  				for i := range nodeAddresses {
   172  					if nodeAddresses[i].Type == v1.NodeHostName {
   173  						existingHostnameAddress = &nodeAddresses[i]
   174  						break
   175  					}
   176  				}
   177  
   178  				if existingHostnameAddress == nil {
   179  					// no existing Hostname address found, add it
   180  					klog.InfoS("Adding overridden hostname to cloudprovider-reported addresses", "hostname", hostname)
   181  					nodeAddresses = append(nodeAddresses, v1.NodeAddress{Type: v1.NodeHostName, Address: hostname})
   182  				} else if existingHostnameAddress.Address != hostname {
   183  					// override the Hostname address reported by the cloud provider
   184  					klog.InfoS("Replacing cloudprovider-reported hostname with overridden hostname", "cloudProviderHostname", existingHostnameAddress.Address, "overriddenHostname", hostname)
   185  					existingHostnameAddress.Address = hostname
   186  				}
   187  			}
   188  			node.Status.Addresses = nodeAddresses
   189  		} else if nodeIPSpecified && secondaryNodeIPSpecified {
   190  			node.Status.Addresses = []v1.NodeAddress{
   191  				{Type: v1.NodeInternalIP, Address: nodeIP.String()},
   192  				{Type: v1.NodeInternalIP, Address: secondaryNodeIP.String()},
   193  				{Type: v1.NodeHostName, Address: hostname},
   194  			}
   195  		} else {
   196  			var ipAddr net.IP
   197  			var err error
   198  
   199  			// 1) Use nodeIP if set (and not "0.0.0.0"/"::")
   200  			// 2) If the user has specified an IP to HostnameOverride, use it
   201  			// 3) Lookup the IP from node name by DNS
   202  			// 4) Try to get the IP from the network interface used as default gateway
   203  			//
   204  			// For steps 3 and 4, IPv4 addresses are preferred to IPv6 addresses
   205  			// unless nodeIP is "::", in which case it is reversed.
   206  			if nodeIPSpecified {
   207  				ipAddr = nodeIP
   208  			} else if addr := netutils.ParseIPSloppy(hostname); addr != nil {
   209  				ipAddr = addr
   210  			} else {
   211  				var addrs []net.IP
   212  				addrs, _ = net.LookupIP(node.Name)
   213  				for _, addr := range addrs {
   214  					if err = validateNodeIPFunc(addr); err == nil {
   215  						if isPreferredIPFamily(addr) {
   216  							ipAddr = addr
   217  							break
   218  						} else if ipAddr == nil {
   219  							ipAddr = addr
   220  						}
   221  					}
   222  				}
   223  
   224  				if ipAddr == nil {
   225  					ipAddr, err = utilnet.ResolveBindAddress(nodeIP)
   226  				}
   227  			}
   228  
   229  			if ipAddr == nil {
   230  				// We tried everything we could, but the IP address wasn't fetchable; error out
   231  				return fmt.Errorf("can't get ip address of node %s. error: %v", node.Name, err)
   232  			}
   233  			node.Status.Addresses = []v1.NodeAddress{
   234  				{Type: v1.NodeInternalIP, Address: ipAddr.String()},
   235  				{Type: v1.NodeHostName, Address: hostname},
   236  			}
   237  		}
   238  		return nil
   239  	}
   240  }
   241  
   242  func hasAddressType(addresses []v1.NodeAddress, addressType v1.NodeAddressType) bool {
   243  	for _, address := range addresses {
   244  		if address.Type == addressType {
   245  			return true
   246  		}
   247  	}
   248  	return false
   249  }
   250  func hasAddressValue(addresses []v1.NodeAddress, addressValue string) bool {
   251  	for _, address := range addresses {
   252  		if address.Address == addressValue {
   253  			return true
   254  		}
   255  	}
   256  	return false
   257  }
   258  
   259  // MachineInfo returns a Setter that updates machine-related information on the node.
   260  func MachineInfo(nodeName string,
   261  	maxPods int,
   262  	podsPerCore int,
   263  	machineInfoFunc func() (*cadvisorapiv1.MachineInfo, error), // typically Kubelet.GetCachedMachineInfo
   264  	capacityFunc func(localStorageCapacityIsolation bool) v1.ResourceList, // typically Kubelet.containerManager.GetCapacity
   265  	devicePluginResourceCapacityFunc func() (v1.ResourceList, v1.ResourceList, []string), // typically Kubelet.containerManager.GetDevicePluginResourceCapacity
   266  	nodeAllocatableReservationFunc func() v1.ResourceList, // typically Kubelet.containerManager.GetNodeAllocatableReservation
   267  	recordEventFunc func(eventType, event, message string), // typically Kubelet.recordEvent
   268  	localStorageCapacityIsolation bool,
   269  ) Setter {
   270  	return func(ctx context.Context, node *v1.Node) error {
   271  		// Note: avoid blindly overwriting the capacity in case opaque
   272  		//       resources are being advertised.
   273  		if node.Status.Capacity == nil {
   274  			node.Status.Capacity = v1.ResourceList{}
   275  		}
   276  
   277  		var devicePluginAllocatable v1.ResourceList
   278  		var devicePluginCapacity v1.ResourceList
   279  		var removedDevicePlugins []string
   280  
   281  		// TODO: Post NotReady if we cannot get MachineInfo from cAdvisor. This needs to start
   282  		// cAdvisor locally, e.g. for test-cmd.sh, and in integration test.
   283  		info, err := machineInfoFunc()
   284  		if err != nil {
   285  			// TODO(roberthbailey): This is required for test-cmd.sh to pass.
   286  			// See if the test should be updated instead.
   287  			node.Status.Capacity[v1.ResourceCPU] = *resource.NewMilliQuantity(0, resource.DecimalSI)
   288  			node.Status.Capacity[v1.ResourceMemory] = resource.MustParse("0Gi")
   289  			node.Status.Capacity[v1.ResourcePods] = *resource.NewQuantity(int64(maxPods), resource.DecimalSI)
   290  			klog.ErrorS(err, "Error getting machine info")
   291  		} else {
   292  			node.Status.NodeInfo.MachineID = info.MachineID
   293  			node.Status.NodeInfo.SystemUUID = info.SystemUUID
   294  
   295  			for rName, rCap := range cadvisor.CapacityFromMachineInfo(info) {
   296  				node.Status.Capacity[rName] = rCap
   297  			}
   298  
   299  			if podsPerCore > 0 {
   300  				node.Status.Capacity[v1.ResourcePods] = *resource.NewQuantity(
   301  					int64(math.Min(float64(info.NumCores*podsPerCore), float64(maxPods))), resource.DecimalSI)
   302  			} else {
   303  				node.Status.Capacity[v1.ResourcePods] = *resource.NewQuantity(
   304  					int64(maxPods), resource.DecimalSI)
   305  			}
   306  
   307  			if node.Status.NodeInfo.BootID != "" &&
   308  				node.Status.NodeInfo.BootID != info.BootID {
   309  				// TODO: This requires a transaction, either both node status is updated
   310  				// and event is recorded or neither should happen, see issue #6055.
   311  				recordEventFunc(v1.EventTypeWarning, events.NodeRebooted,
   312  					fmt.Sprintf("Node %s has been rebooted, boot id: %s", nodeName, info.BootID))
   313  			}
   314  			node.Status.NodeInfo.BootID = info.BootID
   315  
   316  			// TODO: all the node resources should use ContainerManager.GetCapacity instead of deriving the
   317  			// capacity for every node status request
   318  			initialCapacity := capacityFunc(localStorageCapacityIsolation)
   319  			if initialCapacity != nil {
   320  				if v, exists := initialCapacity[v1.ResourceEphemeralStorage]; exists {
   321  					node.Status.Capacity[v1.ResourceEphemeralStorage] = v
   322  				}
   323  			}
   324  			//}
   325  
   326  			devicePluginCapacity, devicePluginAllocatable, removedDevicePlugins = devicePluginResourceCapacityFunc()
   327  			for k, v := range devicePluginCapacity {
   328  				if old, ok := node.Status.Capacity[k]; !ok || old.Value() != v.Value() {
   329  					klog.V(2).InfoS("Updated capacity for device plugin", "plugin", k, "capacity", v.Value())
   330  				}
   331  				node.Status.Capacity[k] = v
   332  			}
   333  
   334  			for _, removedResource := range removedDevicePlugins {
   335  				klog.V(2).InfoS("Set capacity for removed resource to 0 on device removal", "device", removedResource)
   336  				// Set the capacity of the removed resource to 0 instead of
   337  				// removing the resource from the node status. This is to indicate
   338  				// that the resource is managed by device plugin and had been
   339  				// registered before.
   340  				//
   341  				// This is required to differentiate the device plugin managed
   342  				// resources and the cluster-level resources, which are absent in
   343  				// node status.
   344  				node.Status.Capacity[v1.ResourceName(removedResource)] = *resource.NewQuantity(int64(0), resource.DecimalSI)
   345  			}
   346  		}
   347  
   348  		// Set Allocatable.
   349  		if node.Status.Allocatable == nil {
   350  			node.Status.Allocatable = make(v1.ResourceList)
   351  		}
   352  		// Remove extended resources from allocatable that are no longer
   353  		// present in capacity.
   354  		for k := range node.Status.Allocatable {
   355  			_, found := node.Status.Capacity[k]
   356  			if !found && v1helper.IsExtendedResourceName(k) {
   357  				delete(node.Status.Allocatable, k)
   358  			}
   359  		}
   360  		allocatableReservation := nodeAllocatableReservationFunc()
   361  		for k, v := range node.Status.Capacity {
   362  			value := v.DeepCopy()
   363  			if res, exists := allocatableReservation[k]; exists {
   364  				value.Sub(res)
   365  			}
   366  			if value.Sign() < 0 {
   367  				// Negative Allocatable resources don't make sense.
   368  				value.Set(0)
   369  			}
   370  			node.Status.Allocatable[k] = value
   371  		}
   372  
   373  		for k, v := range devicePluginAllocatable {
   374  			if old, ok := node.Status.Allocatable[k]; !ok || old.Value() != v.Value() {
   375  				klog.V(2).InfoS("Updated allocatable", "device", k, "allocatable", v.Value())
   376  			}
   377  			node.Status.Allocatable[k] = v
   378  		}
   379  		// for every huge page reservation, we need to remove it from allocatable memory
   380  		for k, v := range node.Status.Capacity {
   381  			if v1helper.IsHugePageResourceName(k) {
   382  				allocatableMemory := node.Status.Allocatable[v1.ResourceMemory]
   383  				value := v.DeepCopy()
   384  				allocatableMemory.Sub(value)
   385  				if allocatableMemory.Sign() < 0 {
   386  					// Negative Allocatable resources don't make sense.
   387  					allocatableMemory.Set(0)
   388  				}
   389  				node.Status.Allocatable[v1.ResourceMemory] = allocatableMemory
   390  			}
   391  		}
   392  		return nil
   393  	}
   394  }
   395  
   396  // VersionInfo returns a Setter that updates version-related information on the node.
   397  func VersionInfo(versionInfoFunc func() (*cadvisorapiv1.VersionInfo, error), // typically Kubelet.cadvisor.VersionInfo
   398  	runtimeTypeFunc func() string, // typically Kubelet.containerRuntime.Type
   399  	runtimeVersionFunc func(ctx context.Context) (kubecontainer.Version, error), // typically Kubelet.containerRuntime.Version
   400  ) Setter {
   401  	return func(ctx context.Context, node *v1.Node) error {
   402  		verinfo, err := versionInfoFunc()
   403  		if err != nil {
   404  			return fmt.Errorf("error getting version info: %v", err)
   405  		}
   406  
   407  		node.Status.NodeInfo.KernelVersion = verinfo.KernelVersion
   408  		node.Status.NodeInfo.OSImage = verinfo.ContainerOsVersion
   409  
   410  		runtimeVersion := "Unknown"
   411  		if runtimeVer, err := runtimeVersionFunc(ctx); err == nil {
   412  			runtimeVersion = runtimeVer.String()
   413  		}
   414  		node.Status.NodeInfo.ContainerRuntimeVersion = fmt.Sprintf("%s://%s", runtimeTypeFunc(), runtimeVersion)
   415  
   416  		node.Status.NodeInfo.KubeletVersion = version.Get().String()
   417  
   418  		if utilfeature.DefaultFeatureGate.Enabled(features.DisableNodeKubeProxyVersion) {
   419  			// This field is deprecated and should be cleared if it was previously set.
   420  			node.Status.NodeInfo.KubeProxyVersion = ""
   421  		} else {
   422  			node.Status.NodeInfo.KubeProxyVersion = version.Get().String()
   423  		}
   424  
   425  		return nil
   426  	}
   427  }
   428  
   429  // DaemonEndpoints returns a Setter that updates the daemon endpoints on the node.
   430  func DaemonEndpoints(daemonEndpoints *v1.NodeDaemonEndpoints) Setter {
   431  	return func(ctx context.Context, node *v1.Node) error {
   432  		node.Status.DaemonEndpoints = *daemonEndpoints
   433  		return nil
   434  	}
   435  }
   436  
   437  // Images returns a Setter that updates the images on the node.
   438  // imageListFunc is expected to return a list of images sorted in descending order by image size.
   439  // nodeStatusMaxImages is ignored if set to -1.
   440  func Images(nodeStatusMaxImages int32,
   441  	imageListFunc func() ([]kubecontainer.Image, error), // typically Kubelet.imageManager.GetImageList
   442  ) Setter {
   443  	return func(ctx context.Context, node *v1.Node) error {
   444  		// Update image list of this node
   445  		var imagesOnNode []v1.ContainerImage
   446  		containerImages, err := imageListFunc()
   447  		if err != nil {
   448  			node.Status.Images = imagesOnNode
   449  			return fmt.Errorf("error getting image list: %v", err)
   450  		}
   451  		// we expect imageListFunc to return a sorted list, so we just need to truncate
   452  		if int(nodeStatusMaxImages) > -1 &&
   453  			int(nodeStatusMaxImages) < len(containerImages) {
   454  			containerImages = containerImages[0:nodeStatusMaxImages]
   455  		}
   456  
   457  		for _, image := range containerImages {
   458  			// make a copy to avoid modifying slice members of the image items in the list
   459  			names := append([]string{}, image.RepoDigests...)
   460  			names = append(names, image.RepoTags...)
   461  			// Report up to MaxNamesPerImageInNodeStatus names per image.
   462  			if len(names) > MaxNamesPerImageInNodeStatus {
   463  				names = names[0:MaxNamesPerImageInNodeStatus]
   464  			}
   465  			imagesOnNode = append(imagesOnNode, v1.ContainerImage{
   466  				Names:     names,
   467  				SizeBytes: image.Size,
   468  			})
   469  		}
   470  
   471  		node.Status.Images = imagesOnNode
   472  		return nil
   473  	}
   474  }
   475  
   476  // GoRuntime returns a Setter that sets GOOS and GOARCH on the node.
   477  func GoRuntime() Setter {
   478  	return func(ctx context.Context, node *v1.Node) error {
   479  		node.Status.NodeInfo.OperatingSystem = goruntime.GOOS
   480  		node.Status.NodeInfo.Architecture = goruntime.GOARCH
   481  		return nil
   482  	}
   483  }
   484  
   485  // RuntimeHandlers returns a Setter that sets RuntimeHandlers on the node.
   486  func RuntimeHandlers(fn func() []kubecontainer.RuntimeHandler) Setter {
   487  	return func(ctx context.Context, node *v1.Node) error {
   488  		if !utilfeature.DefaultFeatureGate.Enabled(features.RecursiveReadOnlyMounts) {
   489  			return nil
   490  		}
   491  		handlers := fn()
   492  		node.Status.RuntimeHandlers = make([]v1.NodeRuntimeHandler, len(handlers))
   493  		for i, h := range handlers {
   494  			node.Status.RuntimeHandlers[i] = v1.NodeRuntimeHandler{
   495  				Name: h.Name,
   496  				Features: &v1.NodeRuntimeHandlerFeatures{
   497  					RecursiveReadOnlyMounts: &h.SupportsRecursiveReadOnlyMounts,
   498  				},
   499  			}
   500  		}
   501  		return nil
   502  	}
   503  }
   504  
   505  // ReadyCondition returns a Setter that updates the v1.NodeReady condition on the node.
   506  func ReadyCondition(
   507  	nowFunc func() time.Time, // typically Kubelet.clock.Now
   508  	runtimeErrorsFunc func() error, // typically Kubelet.runtimeState.runtimeErrors
   509  	networkErrorsFunc func() error, // typically Kubelet.runtimeState.networkErrors
   510  	storageErrorsFunc func() error, // typically Kubelet.runtimeState.storageErrors
   511  	cmStatusFunc func() cm.Status, // typically Kubelet.containerManager.Status
   512  	nodeShutdownManagerErrorsFunc func() error, // typically kubelet.shutdownManager.errors.
   513  	recordEventFunc func(eventType, event string), // typically Kubelet.recordNodeStatusEvent
   514  	localStorageCapacityIsolation bool,
   515  ) Setter {
   516  	return func(ctx context.Context, node *v1.Node) error {
   517  		// NOTE(aaronlevy): NodeReady condition needs to be the last in the list of node conditions.
   518  		// This is due to an issue with version skewed kubelet and master components.
   519  		// ref: https://github.com/kubernetes/kubernetes/issues/16961
   520  		currentTime := metav1.NewTime(nowFunc())
   521  		newNodeReadyCondition := v1.NodeCondition{
   522  			Type:              v1.NodeReady,
   523  			Status:            v1.ConditionTrue,
   524  			Reason:            "KubeletReady",
   525  			Message:           "kubelet is posting ready status",
   526  			LastHeartbeatTime: currentTime,
   527  		}
   528  		errs := []error{runtimeErrorsFunc(), networkErrorsFunc(), storageErrorsFunc(), nodeShutdownManagerErrorsFunc()}
   529  		requiredCapacities := []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods}
   530  		if localStorageCapacityIsolation {
   531  			requiredCapacities = append(requiredCapacities, v1.ResourceEphemeralStorage)
   532  		}
   533  		missingCapacities := []string{}
   534  		for _, resource := range requiredCapacities {
   535  			if _, found := node.Status.Capacity[resource]; !found {
   536  				missingCapacities = append(missingCapacities, string(resource))
   537  			}
   538  		}
   539  		if len(missingCapacities) > 0 {
   540  			errs = append(errs, fmt.Errorf("missing node capacity for resources: %s", strings.Join(missingCapacities, ", ")))
   541  		}
   542  		if aggregatedErr := errors.NewAggregate(errs); aggregatedErr != nil {
   543  			newNodeReadyCondition = v1.NodeCondition{
   544  				Type:              v1.NodeReady,
   545  				Status:            v1.ConditionFalse,
   546  				Reason:            "KubeletNotReady",
   547  				Message:           aggregatedErr.Error(),
   548  				LastHeartbeatTime: currentTime,
   549  			}
   550  		}
   551  
   552  		// Record any soft requirements that were not met in the container manager.
   553  		status := cmStatusFunc()
   554  		if status.SoftRequirements != nil {
   555  			newNodeReadyCondition.Message = fmt.Sprintf("%s. WARNING: %s", newNodeReadyCondition.Message, status.SoftRequirements.Error())
   556  		}
   557  
   558  		readyConditionUpdated := false
   559  		needToRecordEvent := false
   560  		for i := range node.Status.Conditions {
   561  			if node.Status.Conditions[i].Type == v1.NodeReady {
   562  				if node.Status.Conditions[i].Status == newNodeReadyCondition.Status {
   563  					newNodeReadyCondition.LastTransitionTime = node.Status.Conditions[i].LastTransitionTime
   564  				} else {
   565  					newNodeReadyCondition.LastTransitionTime = currentTime
   566  					needToRecordEvent = true
   567  				}
   568  				node.Status.Conditions[i] = newNodeReadyCondition
   569  				readyConditionUpdated = true
   570  				break
   571  			}
   572  		}
   573  		if !readyConditionUpdated {
   574  			newNodeReadyCondition.LastTransitionTime = currentTime
   575  			node.Status.Conditions = append(node.Status.Conditions, newNodeReadyCondition)
   576  		}
   577  		if needToRecordEvent {
   578  			if newNodeReadyCondition.Status == v1.ConditionTrue {
   579  				recordEventFunc(v1.EventTypeNormal, events.NodeReady)
   580  			} else {
   581  				recordEventFunc(v1.EventTypeNormal, events.NodeNotReady)
   582  				klog.InfoS("Node became not ready", "node", klog.KObj(node), "condition", newNodeReadyCondition)
   583  			}
   584  		}
   585  		return nil
   586  	}
   587  }
   588  
   589  // MemoryPressureCondition returns a Setter that updates the v1.NodeMemoryPressure condition on the node.
   590  func MemoryPressureCondition(nowFunc func() time.Time, // typically Kubelet.clock.Now
   591  	pressureFunc func() bool, // typically Kubelet.evictionManager.IsUnderMemoryPressure
   592  	recordEventFunc func(eventType, event string), // typically Kubelet.recordNodeStatusEvent
   593  ) Setter {
   594  	return func(ctx context.Context, node *v1.Node) error {
   595  		currentTime := metav1.NewTime(nowFunc())
   596  		var condition *v1.NodeCondition
   597  
   598  		// Check if NodeMemoryPressure condition already exists and if it does, just pick it up for update.
   599  		for i := range node.Status.Conditions {
   600  			if node.Status.Conditions[i].Type == v1.NodeMemoryPressure {
   601  				condition = &node.Status.Conditions[i]
   602  			}
   603  		}
   604  
   605  		newCondition := false
   606  		// If the NodeMemoryPressure condition doesn't exist, create one
   607  		if condition == nil {
   608  			condition = &v1.NodeCondition{
   609  				Type:   v1.NodeMemoryPressure,
   610  				Status: v1.ConditionUnknown,
   611  			}
   612  			// cannot be appended to node.Status.Conditions here because it gets
   613  			// copied to the slice. So if we append to the slice here none of the
   614  			// updates we make below are reflected in the slice.
   615  			newCondition = true
   616  		}
   617  
   618  		// Update the heartbeat time
   619  		condition.LastHeartbeatTime = currentTime
   620  
   621  		// Note: The conditions below take care of the case when a new NodeMemoryPressure condition is
   622  		// created and as well as the case when the condition already exists. When a new condition
   623  		// is created its status is set to v1.ConditionUnknown which matches either
   624  		// condition.Status != v1.ConditionTrue or
   625  		// condition.Status != v1.ConditionFalse in the conditions below depending on whether
   626  		// the kubelet is under memory pressure or not.
   627  		if pressureFunc() {
   628  			if condition.Status != v1.ConditionTrue {
   629  				condition.Status = v1.ConditionTrue
   630  				condition.Reason = "KubeletHasInsufficientMemory"
   631  				condition.Message = "kubelet has insufficient memory available"
   632  				condition.LastTransitionTime = currentTime
   633  				recordEventFunc(v1.EventTypeNormal, "NodeHasInsufficientMemory")
   634  			}
   635  		} else if condition.Status != v1.ConditionFalse {
   636  			condition.Status = v1.ConditionFalse
   637  			condition.Reason = "KubeletHasSufficientMemory"
   638  			condition.Message = "kubelet has sufficient memory available"
   639  			condition.LastTransitionTime = currentTime
   640  			recordEventFunc(v1.EventTypeNormal, "NodeHasSufficientMemory")
   641  		}
   642  
   643  		if newCondition {
   644  			node.Status.Conditions = append(node.Status.Conditions, *condition)
   645  		}
   646  		return nil
   647  	}
   648  }
   649  
   650  // PIDPressureCondition returns a Setter that updates the v1.NodePIDPressure condition on the node.
   651  func PIDPressureCondition(nowFunc func() time.Time, // typically Kubelet.clock.Now
   652  	pressureFunc func() bool, // typically Kubelet.evictionManager.IsUnderPIDPressure
   653  	recordEventFunc func(eventType, event string), // typically Kubelet.recordNodeStatusEvent
   654  ) Setter {
   655  	return func(ctx context.Context, node *v1.Node) error {
   656  		currentTime := metav1.NewTime(nowFunc())
   657  		var condition *v1.NodeCondition
   658  
   659  		// Check if NodePIDPressure condition already exists and if it does, just pick it up for update.
   660  		for i := range node.Status.Conditions {
   661  			if node.Status.Conditions[i].Type == v1.NodePIDPressure {
   662  				condition = &node.Status.Conditions[i]
   663  			}
   664  		}
   665  
   666  		newCondition := false
   667  		// If the NodePIDPressure condition doesn't exist, create one
   668  		if condition == nil {
   669  			condition = &v1.NodeCondition{
   670  				Type:   v1.NodePIDPressure,
   671  				Status: v1.ConditionUnknown,
   672  			}
   673  			// cannot be appended to node.Status.Conditions here because it gets
   674  			// copied to the slice. So if we append to the slice here none of the
   675  			// updates we make below are reflected in the slice.
   676  			newCondition = true
   677  		}
   678  
   679  		// Update the heartbeat time
   680  		condition.LastHeartbeatTime = currentTime
   681  
   682  		// Note: The conditions below take care of the case when a new NodePIDPressure condition is
   683  		// created and as well as the case when the condition already exists. When a new condition
   684  		// is created its status is set to v1.ConditionUnknown which matches either
   685  		// condition.Status != v1.ConditionTrue or
   686  		// condition.Status != v1.ConditionFalse in the conditions below depending on whether
   687  		// the kubelet is under PID pressure or not.
   688  		if pressureFunc() {
   689  			if condition.Status != v1.ConditionTrue {
   690  				condition.Status = v1.ConditionTrue
   691  				condition.Reason = "KubeletHasInsufficientPID"
   692  				condition.Message = "kubelet has insufficient PID available"
   693  				condition.LastTransitionTime = currentTime
   694  				recordEventFunc(v1.EventTypeNormal, "NodeHasInsufficientPID")
   695  			}
   696  		} else if condition.Status != v1.ConditionFalse {
   697  			condition.Status = v1.ConditionFalse
   698  			condition.Reason = "KubeletHasSufficientPID"
   699  			condition.Message = "kubelet has sufficient PID available"
   700  			condition.LastTransitionTime = currentTime
   701  			recordEventFunc(v1.EventTypeNormal, "NodeHasSufficientPID")
   702  		}
   703  
   704  		if newCondition {
   705  			node.Status.Conditions = append(node.Status.Conditions, *condition)
   706  		}
   707  		return nil
   708  	}
   709  }
   710  
   711  // DiskPressureCondition returns a Setter that updates the v1.NodeDiskPressure condition on the node.
   712  func DiskPressureCondition(nowFunc func() time.Time, // typically Kubelet.clock.Now
   713  	pressureFunc func() bool, // typically Kubelet.evictionManager.IsUnderDiskPressure
   714  	recordEventFunc func(eventType, event string), // typically Kubelet.recordNodeStatusEvent
   715  ) Setter {
   716  	return func(ctx context.Context, node *v1.Node) error {
   717  		currentTime := metav1.NewTime(nowFunc())
   718  		var condition *v1.NodeCondition
   719  
   720  		// Check if NodeDiskPressure condition already exists and if it does, just pick it up for update.
   721  		for i := range node.Status.Conditions {
   722  			if node.Status.Conditions[i].Type == v1.NodeDiskPressure {
   723  				condition = &node.Status.Conditions[i]
   724  			}
   725  		}
   726  
   727  		newCondition := false
   728  		// If the NodeDiskPressure condition doesn't exist, create one
   729  		if condition == nil {
   730  			condition = &v1.NodeCondition{
   731  				Type:   v1.NodeDiskPressure,
   732  				Status: v1.ConditionUnknown,
   733  			}
   734  			// cannot be appended to node.Status.Conditions here because it gets
   735  			// copied to the slice. So if we append to the slice here none of the
   736  			// updates we make below are reflected in the slice.
   737  			newCondition = true
   738  		}
   739  
   740  		// Update the heartbeat time
   741  		condition.LastHeartbeatTime = currentTime
   742  
   743  		// Note: The conditions below take care of the case when a new NodeDiskPressure condition is
   744  		// created and as well as the case when the condition already exists. When a new condition
   745  		// is created its status is set to v1.ConditionUnknown which matches either
   746  		// condition.Status != v1.ConditionTrue or
   747  		// condition.Status != v1.ConditionFalse in the conditions below depending on whether
   748  		// the kubelet is under disk pressure or not.
   749  		if pressureFunc() {
   750  			if condition.Status != v1.ConditionTrue {
   751  				condition.Status = v1.ConditionTrue
   752  				condition.Reason = "KubeletHasDiskPressure"
   753  				condition.Message = "kubelet has disk pressure"
   754  				condition.LastTransitionTime = currentTime
   755  				recordEventFunc(v1.EventTypeNormal, "NodeHasDiskPressure")
   756  			}
   757  		} else if condition.Status != v1.ConditionFalse {
   758  			condition.Status = v1.ConditionFalse
   759  			condition.Reason = "KubeletHasNoDiskPressure"
   760  			condition.Message = "kubelet has no disk pressure"
   761  			condition.LastTransitionTime = currentTime
   762  			recordEventFunc(v1.EventTypeNormal, "NodeHasNoDiskPressure")
   763  		}
   764  
   765  		if newCondition {
   766  			node.Status.Conditions = append(node.Status.Conditions, *condition)
   767  		}
   768  		return nil
   769  	}
   770  }
   771  
   772  // VolumesInUse returns a Setter that updates the volumes in use on the node.
   773  func VolumesInUse(syncedFunc func() bool, // typically Kubelet.volumeManager.ReconcilerStatesHasBeenSynced
   774  	volumesInUseFunc func() []v1.UniqueVolumeName, // typically Kubelet.volumeManager.GetVolumesInUse
   775  ) Setter {
   776  	return func(ctx context.Context, node *v1.Node) error {
   777  		// Make sure to only update node status after reconciler starts syncing up states
   778  		if syncedFunc() {
   779  			node.Status.VolumesInUse = volumesInUseFunc()
   780  		}
   781  		return nil
   782  	}
   783  }