k8s.io/kubernetes@v1.29.3/pkg/kubelet/kubelet_node_status.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package kubelet 18 19 import ( 20 "context" 21 "fmt" 22 "net" 23 goruntime "runtime" 24 "sort" 25 "strings" 26 "time" 27 28 v1 "k8s.io/api/core/v1" 29 apiequality "k8s.io/apimachinery/pkg/api/equality" 30 apierrors "k8s.io/apimachinery/pkg/api/errors" 31 "k8s.io/apimachinery/pkg/api/resource" 32 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 33 "k8s.io/apimachinery/pkg/types" 34 "k8s.io/apimachinery/pkg/util/sets" 35 cloudprovider "k8s.io/cloud-provider" 36 cloudproviderapi "k8s.io/cloud-provider/api" 37 nodeutil "k8s.io/component-helpers/node/util" 38 "k8s.io/klog/v2" 39 kubeletapis "k8s.io/kubelet/pkg/apis" 40 v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" 41 "k8s.io/kubernetes/pkg/kubelet/events" 42 "k8s.io/kubernetes/pkg/kubelet/nodestatus" 43 "k8s.io/kubernetes/pkg/kubelet/util" 44 taintutil "k8s.io/kubernetes/pkg/util/taints" 45 volutil "k8s.io/kubernetes/pkg/volume/util" 46 ) 47 48 // registerWithAPIServer registers the node with the cluster master. It is safe 49 // to call multiple times, but not concurrently (kl.registrationCompleted is 50 // not locked). 51 func (kl *Kubelet) registerWithAPIServer() { 52 if kl.registrationCompleted { 53 return 54 } 55 56 kl.nodeStartupLatencyTracker.RecordAttemptRegisterNode() 57 58 step := 100 * time.Millisecond 59 60 for { 61 time.Sleep(step) 62 step = step * 2 63 if step >= 7*time.Second { 64 step = 7 * time.Second 65 } 66 67 node, err := kl.initialNode(context.TODO()) 68 if err != nil { 69 klog.ErrorS(err, "Unable to construct v1.Node object for kubelet") 70 continue 71 } 72 73 klog.InfoS("Attempting to register node", "node", klog.KObj(node)) 74 registered := kl.tryRegisterWithAPIServer(node) 75 if registered { 76 klog.InfoS("Successfully registered node", "node", klog.KObj(node)) 77 kl.registrationCompleted = true 78 return 79 } 80 } 81 } 82 83 // tryRegisterWithAPIServer makes an attempt to register the given node with 84 // the API server, returning a boolean indicating whether the attempt was 85 // successful. If a node with the same name already exists, it reconciles the 86 // value of the annotation for controller-managed attach-detach of attachable 87 // persistent volumes for the node. 88 func (kl *Kubelet) tryRegisterWithAPIServer(node *v1.Node) bool { 89 _, err := kl.kubeClient.CoreV1().Nodes().Create(context.TODO(), node, metav1.CreateOptions{}) 90 if err == nil { 91 kl.nodeStartupLatencyTracker.RecordRegisteredNewNode() 92 return true 93 } 94 95 if !apierrors.IsAlreadyExists(err) { 96 klog.ErrorS(err, "Unable to register node with API server", "node", klog.KObj(node)) 97 return false 98 } 99 100 existingNode, err := kl.kubeClient.CoreV1().Nodes().Get(context.TODO(), string(kl.nodeName), metav1.GetOptions{}) 101 if err != nil { 102 klog.ErrorS(err, "Unable to register node with API server, error getting existing node", "node", klog.KObj(node)) 103 return false 104 } 105 if existingNode == nil { 106 klog.InfoS("Unable to register node with API server, no node instance returned", "node", klog.KObj(node)) 107 return false 108 } 109 110 originalNode := existingNode.DeepCopy() 111 112 klog.InfoS("Node was previously registered", "node", klog.KObj(node)) 113 114 // Edge case: the node was previously registered; reconcile 115 // the value of the controller-managed attach-detach 116 // annotation. 117 requiresUpdate := kl.reconcileCMADAnnotationWithExistingNode(node, existingNode) 118 requiresUpdate = kl.updateDefaultLabels(node, existingNode) || requiresUpdate 119 requiresUpdate = kl.reconcileExtendedResource(node, existingNode) || requiresUpdate 120 requiresUpdate = kl.reconcileHugePageResource(node, existingNode) || requiresUpdate 121 if requiresUpdate { 122 if _, _, err := nodeutil.PatchNodeStatus(kl.kubeClient.CoreV1(), types.NodeName(kl.nodeName), originalNode, existingNode); err != nil { 123 klog.ErrorS(err, "Unable to reconcile node with API server,error updating node", "node", klog.KObj(node)) 124 return false 125 } 126 } 127 128 return true 129 } 130 131 // reconcileHugePageResource will update huge page capacity for each page size and remove huge page sizes no longer supported 132 func (kl *Kubelet) reconcileHugePageResource(initialNode, existingNode *v1.Node) bool { 133 requiresUpdate := updateDefaultResources(initialNode, existingNode) 134 supportedHugePageResources := sets.String{} 135 136 for resourceName := range initialNode.Status.Capacity { 137 if !v1helper.IsHugePageResourceName(resourceName) { 138 continue 139 } 140 supportedHugePageResources.Insert(string(resourceName)) 141 142 initialCapacity := initialNode.Status.Capacity[resourceName] 143 initialAllocatable := initialNode.Status.Allocatable[resourceName] 144 145 capacity, resourceIsSupported := existingNode.Status.Capacity[resourceName] 146 allocatable := existingNode.Status.Allocatable[resourceName] 147 148 // Add or update capacity if it the size was previously unsupported or has changed 149 if !resourceIsSupported || capacity.Cmp(initialCapacity) != 0 { 150 existingNode.Status.Capacity[resourceName] = initialCapacity.DeepCopy() 151 requiresUpdate = true 152 } 153 154 // Add or update allocatable if it the size was previously unsupported or has changed 155 if !resourceIsSupported || allocatable.Cmp(initialAllocatable) != 0 { 156 existingNode.Status.Allocatable[resourceName] = initialAllocatable.DeepCopy() 157 requiresUpdate = true 158 } 159 160 } 161 162 for resourceName := range existingNode.Status.Capacity { 163 if !v1helper.IsHugePageResourceName(resourceName) { 164 continue 165 } 166 167 // If huge page size no longer is supported, we remove it from the node 168 if !supportedHugePageResources.Has(string(resourceName)) { 169 delete(existingNode.Status.Capacity, resourceName) 170 delete(existingNode.Status.Allocatable, resourceName) 171 klog.InfoS("Removing huge page resource which is no longer supported", "resourceName", resourceName) 172 requiresUpdate = true 173 } 174 } 175 return requiresUpdate 176 } 177 178 // Zeros out extended resource capacity during reconciliation. 179 func (kl *Kubelet) reconcileExtendedResource(initialNode, node *v1.Node) bool { 180 requiresUpdate := updateDefaultResources(initialNode, node) 181 // Check with the device manager to see if node has been recreated, in which case extended resources should be zeroed until they are available 182 if kl.containerManager.ShouldResetExtendedResourceCapacity() { 183 for k := range node.Status.Capacity { 184 if v1helper.IsExtendedResourceName(k) { 185 klog.InfoS("Zero out resource capacity in existing node", "resourceName", k, "node", klog.KObj(node)) 186 node.Status.Capacity[k] = *resource.NewQuantity(int64(0), resource.DecimalSI) 187 node.Status.Allocatable[k] = *resource.NewQuantity(int64(0), resource.DecimalSI) 188 requiresUpdate = true 189 } 190 } 191 } 192 return requiresUpdate 193 } 194 195 // updateDefaultResources will set the default resources on the existing node according to the initial node 196 func updateDefaultResources(initialNode, existingNode *v1.Node) bool { 197 requiresUpdate := false 198 if existingNode.Status.Capacity == nil { 199 if initialNode.Status.Capacity != nil { 200 existingNode.Status.Capacity = initialNode.Status.Capacity.DeepCopy() 201 requiresUpdate = true 202 } else { 203 existingNode.Status.Capacity = make(map[v1.ResourceName]resource.Quantity) 204 } 205 } 206 207 if existingNode.Status.Allocatable == nil { 208 if initialNode.Status.Allocatable != nil { 209 existingNode.Status.Allocatable = initialNode.Status.Allocatable.DeepCopy() 210 requiresUpdate = true 211 } else { 212 existingNode.Status.Allocatable = make(map[v1.ResourceName]resource.Quantity) 213 } 214 } 215 return requiresUpdate 216 } 217 218 // updateDefaultLabels will set the default labels on the node 219 func (kl *Kubelet) updateDefaultLabels(initialNode, existingNode *v1.Node) bool { 220 defaultLabels := []string{ 221 v1.LabelHostname, 222 v1.LabelTopologyZone, 223 v1.LabelTopologyRegion, 224 v1.LabelFailureDomainBetaZone, 225 v1.LabelFailureDomainBetaRegion, 226 v1.LabelInstanceTypeStable, 227 v1.LabelInstanceType, 228 v1.LabelOSStable, 229 v1.LabelArchStable, 230 v1.LabelWindowsBuild, 231 kubeletapis.LabelOS, 232 kubeletapis.LabelArch, 233 } 234 235 needsUpdate := false 236 if existingNode.Labels == nil { 237 existingNode.Labels = make(map[string]string) 238 } 239 //Set default labels but make sure to not set labels with empty values 240 for _, label := range defaultLabels { 241 if _, hasInitialValue := initialNode.Labels[label]; !hasInitialValue { 242 continue 243 } 244 245 if existingNode.Labels[label] != initialNode.Labels[label] { 246 existingNode.Labels[label] = initialNode.Labels[label] 247 needsUpdate = true 248 } 249 250 if existingNode.Labels[label] == "" { 251 delete(existingNode.Labels, label) 252 } 253 } 254 255 return needsUpdate 256 } 257 258 // reconcileCMADAnnotationWithExistingNode reconciles the controller-managed 259 // attach-detach annotation on a new node and the existing node, returning 260 // whether the existing node must be updated. 261 func (kl *Kubelet) reconcileCMADAnnotationWithExistingNode(node, existingNode *v1.Node) bool { 262 var ( 263 existingCMAAnnotation = existingNode.Annotations[volutil.ControllerManagedAttachAnnotation] 264 newCMAAnnotation, newSet = node.Annotations[volutil.ControllerManagedAttachAnnotation] 265 ) 266 267 if newCMAAnnotation == existingCMAAnnotation { 268 return false 269 } 270 271 // If the just-constructed node and the existing node do 272 // not have the same value, update the existing node with 273 // the correct value of the annotation. 274 if !newSet { 275 klog.InfoS("Controller attach-detach setting changed to false; updating existing Node") 276 delete(existingNode.Annotations, volutil.ControllerManagedAttachAnnotation) 277 } else { 278 klog.InfoS("Controller attach-detach setting changed to true; updating existing Node") 279 if existingNode.Annotations == nil { 280 existingNode.Annotations = make(map[string]string) 281 } 282 existingNode.Annotations[volutil.ControllerManagedAttachAnnotation] = newCMAAnnotation 283 } 284 285 return true 286 } 287 288 // initialNode constructs the initial v1.Node for this Kubelet, incorporating node 289 // labels, information from the cloud provider, and Kubelet configuration. 290 func (kl *Kubelet) initialNode(ctx context.Context) (*v1.Node, error) { 291 node := &v1.Node{ 292 ObjectMeta: metav1.ObjectMeta{ 293 Name: string(kl.nodeName), 294 Labels: map[string]string{ 295 v1.LabelHostname: kl.hostname, 296 v1.LabelOSStable: goruntime.GOOS, 297 v1.LabelArchStable: goruntime.GOARCH, 298 kubeletapis.LabelOS: goruntime.GOOS, 299 kubeletapis.LabelArch: goruntime.GOARCH, 300 }, 301 }, 302 Spec: v1.NodeSpec{ 303 Unschedulable: !kl.registerSchedulable, 304 }, 305 } 306 osLabels, err := getOSSpecificLabels() 307 if err != nil { 308 return nil, err 309 } 310 for label, value := range osLabels { 311 node.Labels[label] = value 312 } 313 314 nodeTaints := make([]v1.Taint, len(kl.registerWithTaints)) 315 copy(nodeTaints, kl.registerWithTaints) 316 unschedulableTaint := v1.Taint{ 317 Key: v1.TaintNodeUnschedulable, 318 Effect: v1.TaintEffectNoSchedule, 319 } 320 321 // Taint node with TaintNodeUnschedulable when initializing 322 // node to avoid race condition; refer to #63897 for more detail. 323 if node.Spec.Unschedulable && 324 !taintutil.TaintExists(nodeTaints, &unschedulableTaint) { 325 nodeTaints = append(nodeTaints, unschedulableTaint) 326 } 327 328 if kl.externalCloudProvider { 329 taint := v1.Taint{ 330 Key: cloudproviderapi.TaintExternalCloudProvider, 331 Value: "true", 332 Effect: v1.TaintEffectNoSchedule, 333 } 334 335 nodeTaints = append(nodeTaints, taint) 336 } 337 if len(nodeTaints) > 0 { 338 node.Spec.Taints = nodeTaints 339 } 340 // Initially, set NodeNetworkUnavailable to true. 341 if kl.providerRequiresNetworkingConfiguration() { 342 node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{ 343 Type: v1.NodeNetworkUnavailable, 344 Status: v1.ConditionTrue, 345 Reason: "NoRouteCreated", 346 Message: "Node created without a route", 347 LastTransitionTime: metav1.NewTime(kl.clock.Now()), 348 }) 349 } 350 351 if kl.enableControllerAttachDetach { 352 if node.Annotations == nil { 353 node.Annotations = make(map[string]string) 354 } 355 356 klog.V(2).InfoS("Setting node annotation to enable volume controller attach/detach") 357 node.Annotations[volutil.ControllerManagedAttachAnnotation] = "true" 358 } else { 359 klog.V(2).InfoS("Controller attach/detach is disabled for this node; Kubelet will attach and detach volumes") 360 } 361 362 if kl.keepTerminatedPodVolumes { 363 if node.Annotations == nil { 364 node.Annotations = make(map[string]string) 365 } 366 klog.V(2).InfoS("Setting node annotation to keep pod volumes of terminated pods attached to the node") 367 node.Annotations[volutil.KeepTerminatedPodVolumesAnnotation] = "true" 368 } 369 370 // @question: should this be place after the call to the cloud provider? which also applies labels 371 for k, v := range kl.nodeLabels { 372 if cv, found := node.ObjectMeta.Labels[k]; found { 373 klog.InfoS("the node label will overwrite default setting", "labelKey", k, "labelValue", v, "default", cv) 374 } 375 node.ObjectMeta.Labels[k] = v 376 } 377 378 if kl.providerID != "" { 379 node.Spec.ProviderID = kl.providerID 380 } 381 382 if kl.cloud != nil { 383 instances, ok := kl.cloud.Instances() 384 if !ok { 385 return nil, fmt.Errorf("failed to get instances from cloud provider") 386 } 387 388 // TODO: We can't assume that the node has credentials to talk to the 389 // cloudprovider from arbitrary nodes. At most, we should talk to a 390 // local metadata server here. 391 var err error 392 if node.Spec.ProviderID == "" { 393 node.Spec.ProviderID, err = cloudprovider.GetInstanceProviderID(ctx, kl.cloud, kl.nodeName) 394 if err != nil { 395 return nil, err 396 } 397 } 398 399 instanceType, err := instances.InstanceType(ctx, kl.nodeName) 400 if err != nil { 401 return nil, err 402 } 403 if instanceType != "" { 404 klog.InfoS("Adding label from cloud provider", "labelKey", v1.LabelInstanceType, "labelValue", instanceType) 405 node.ObjectMeta.Labels[v1.LabelInstanceType] = instanceType 406 klog.InfoS("Adding node label from cloud provider", "labelKey", v1.LabelInstanceTypeStable, "labelValue", instanceType) 407 node.ObjectMeta.Labels[v1.LabelInstanceTypeStable] = instanceType 408 } 409 // If the cloud has zone information, label the node with the zone information 410 zones, ok := kl.cloud.Zones() 411 if ok { 412 zone, err := zones.GetZone(ctx) 413 if err != nil { 414 return nil, fmt.Errorf("failed to get zone from cloud provider: %v", err) 415 } 416 if zone.FailureDomain != "" { 417 klog.InfoS("Adding node label from cloud provider", "labelKey", v1.LabelFailureDomainBetaZone, "labelValue", zone.FailureDomain) 418 node.ObjectMeta.Labels[v1.LabelFailureDomainBetaZone] = zone.FailureDomain 419 klog.InfoS("Adding node label from cloud provider", "labelKey", v1.LabelTopologyZone, "labelValue", zone.FailureDomain) 420 node.ObjectMeta.Labels[v1.LabelTopologyZone] = zone.FailureDomain 421 } 422 if zone.Region != "" { 423 klog.InfoS("Adding node label from cloud provider", "labelKey", v1.LabelFailureDomainBetaRegion, "labelValue", zone.Region) 424 node.ObjectMeta.Labels[v1.LabelFailureDomainBetaRegion] = zone.Region 425 klog.InfoS("Adding node label from cloud provider", "labelKey", v1.LabelTopologyRegion, "labelValue", zone.Region) 426 node.ObjectMeta.Labels[v1.LabelTopologyRegion] = zone.Region 427 } 428 } 429 } 430 431 kl.setNodeStatus(ctx, node) 432 433 return node, nil 434 } 435 436 // fastNodeStatusUpdate is a "lightweight" version of syncNodeStatus which doesn't hit the 437 // apiserver except for the final run, to be called by fastStatusUpdateOnce in each loop. 438 // It holds the same lock as syncNodeStatus and is thread-safe when called concurrently with 439 // syncNodeStatus. Its return value indicates whether the loop running it should exit 440 // (final run), and it also sets kl.containerRuntimeReadyExpected. 441 func (kl *Kubelet) fastNodeStatusUpdate(ctx context.Context, timeout bool) (completed bool) { 442 kl.syncNodeStatusMux.Lock() 443 defer func() { 444 kl.syncNodeStatusMux.Unlock() 445 446 if completed { 447 // containerRuntimeReadyExpected is read by updateRuntimeUp(). 448 // Not going for a more granular mutex as this path runs only once. 449 kl.updateRuntimeMux.Lock() 450 defer kl.updateRuntimeMux.Unlock() 451 kl.containerRuntimeReadyExpected = true 452 } 453 }() 454 455 if timeout { 456 klog.ErrorS(nil, "Node not becoming ready in time after startup") 457 return true 458 } 459 460 originalNode, err := kl.GetNode() 461 if err != nil { 462 klog.ErrorS(err, "Error getting the current node from lister") 463 return false 464 } 465 466 readyIdx, originalNodeReady := nodeutil.GetNodeCondition(&originalNode.Status, v1.NodeReady) 467 if readyIdx == -1 { 468 klog.ErrorS(nil, "Node does not have NodeReady condition", "originalNode", originalNode) 469 return false 470 } 471 472 if originalNodeReady.Status == v1.ConditionTrue { 473 return true 474 } 475 476 // This is in addition to the regular syncNodeStatus logic so we can get the container runtime status earlier. 477 // This function itself has a mutex and it doesn't recursively call fastNodeStatusUpdate or syncNodeStatus. 478 kl.updateRuntimeUp() 479 480 node, changed := kl.updateNode(ctx, originalNode) 481 482 if !changed { 483 // We don't do markVolumesFromNode(node) here and leave it to the regular syncNodeStatus(). 484 return false 485 } 486 487 readyIdx, nodeReady := nodeutil.GetNodeCondition(&node.Status, v1.NodeReady) 488 if readyIdx == -1 { 489 klog.ErrorS(nil, "Node does not have NodeReady condition", "node", node) 490 return false 491 } 492 493 if nodeReady.Status == v1.ConditionFalse { 494 return false 495 } 496 497 klog.InfoS("Fast updating node status as it just became ready") 498 if _, err := kl.patchNodeStatus(originalNode, node); err != nil { 499 // The originalNode is probably stale, but we know that the current state of kubelet would turn 500 // the node to be ready. Retry using syncNodeStatus() which fetches from the apiserver. 501 klog.ErrorS(err, "Error updating node status, will retry with syncNodeStatus") 502 503 // The reversed kl.syncNodeStatusMux.Unlock/Lock() below to allow kl.syncNodeStatus() execution. 504 kl.syncNodeStatusMux.Unlock() 505 kl.syncNodeStatus() 506 // This lock action is unnecessary if we add a flag to check in the defer before unlocking it, 507 // but having it here makes the logic a bit easier to read. 508 kl.syncNodeStatusMux.Lock() 509 } 510 511 // We don't do markVolumesFromNode(node) here and leave it to the regular syncNodeStatus(). 512 return true 513 } 514 515 // syncNodeStatus should be called periodically from a goroutine. 516 // It synchronizes node status to master if there is any change or enough time 517 // passed from the last sync, registering the kubelet first if necessary. 518 func (kl *Kubelet) syncNodeStatus() { 519 kl.syncNodeStatusMux.Lock() 520 defer kl.syncNodeStatusMux.Unlock() 521 ctx := context.Background() 522 523 if kl.kubeClient == nil || kl.heartbeatClient == nil { 524 return 525 } 526 if kl.registerNode { 527 // This will exit immediately if it doesn't need to do anything. 528 kl.registerWithAPIServer() 529 } 530 if err := kl.updateNodeStatus(ctx); err != nil { 531 klog.ErrorS(err, "Unable to update node status") 532 } 533 } 534 535 // updateNodeStatus updates node status to master with retries if there is any 536 // change or enough time passed from the last sync. 537 func (kl *Kubelet) updateNodeStatus(ctx context.Context) error { 538 klog.V(5).InfoS("Updating node status") 539 for i := 0; i < nodeStatusUpdateRetry; i++ { 540 if err := kl.tryUpdateNodeStatus(ctx, i); err != nil { 541 if i > 0 && kl.onRepeatedHeartbeatFailure != nil { 542 kl.onRepeatedHeartbeatFailure() 543 } 544 klog.ErrorS(err, "Error updating node status, will retry") 545 } else { 546 return nil 547 } 548 } 549 return fmt.Errorf("update node status exceeds retry count") 550 } 551 552 // tryUpdateNodeStatus tries to update node status to master if there is any 553 // change or enough time passed from the last sync. 554 func (kl *Kubelet) tryUpdateNodeStatus(ctx context.Context, tryNumber int) error { 555 // In large clusters, GET and PUT operations on Node objects coming 556 // from here are the majority of load on apiserver and etcd. 557 // To reduce the load on etcd, we are serving GET operations from 558 // apiserver cache (the data might be slightly delayed but it doesn't 559 // seem to cause more conflict - the delays are pretty small). 560 // If it result in a conflict, all retries are served directly from etcd. 561 opts := metav1.GetOptions{} 562 if tryNumber == 0 { 563 util.FromApiserverCache(&opts) 564 } 565 originalNode, err := kl.heartbeatClient.CoreV1().Nodes().Get(ctx, string(kl.nodeName), opts) 566 if err != nil { 567 return fmt.Errorf("error getting node %q: %v", kl.nodeName, err) 568 } 569 if originalNode == nil { 570 return fmt.Errorf("nil %q node object", kl.nodeName) 571 } 572 573 node, changed := kl.updateNode(ctx, originalNode) 574 shouldPatchNodeStatus := changed || kl.clock.Since(kl.lastStatusReportTime) >= kl.nodeStatusReportFrequency 575 576 if !shouldPatchNodeStatus { 577 kl.markVolumesFromNode(node) 578 return nil 579 } 580 581 updatedNode, err := kl.patchNodeStatus(originalNode, node) 582 if err == nil { 583 kl.markVolumesFromNode(updatedNode) 584 } 585 return err 586 } 587 588 // updateNode creates a copy of originalNode and runs update logic on it. 589 // It returns the updated node object and a bool indicating if anything has been changed. 590 func (kl *Kubelet) updateNode(ctx context.Context, originalNode *v1.Node) (*v1.Node, bool) { 591 node := originalNode.DeepCopy() 592 593 podCIDRChanged := false 594 if len(node.Spec.PodCIDRs) != 0 { 595 // Pod CIDR could have been updated before, so we cannot rely on 596 // node.Spec.PodCIDR being non-empty. We also need to know if pod CIDR is 597 // actually changed. 598 var err error 599 podCIDRs := strings.Join(node.Spec.PodCIDRs, ",") 600 if podCIDRChanged, err = kl.updatePodCIDR(ctx, podCIDRs); err != nil { 601 klog.ErrorS(err, "Error updating pod CIDR") 602 } 603 } 604 605 areRequiredLabelsNotPresent := false 606 osName, osLabelExists := node.Labels[v1.LabelOSStable] 607 if !osLabelExists || osName != goruntime.GOOS { 608 if len(node.Labels) == 0 { 609 node.Labels = make(map[string]string) 610 } 611 node.Labels[v1.LabelOSStable] = goruntime.GOOS 612 areRequiredLabelsNotPresent = true 613 } 614 // Set the arch if there is a mismatch 615 arch, archLabelExists := node.Labels[v1.LabelArchStable] 616 if !archLabelExists || arch != goruntime.GOARCH { 617 if len(node.Labels) == 0 { 618 node.Labels = make(map[string]string) 619 } 620 node.Labels[v1.LabelArchStable] = goruntime.GOARCH 621 areRequiredLabelsNotPresent = true 622 } 623 624 kl.setNodeStatus(ctx, node) 625 626 changed := podCIDRChanged || nodeStatusHasChanged(&originalNode.Status, &node.Status) || areRequiredLabelsNotPresent 627 return node, changed 628 } 629 630 // patchNodeStatus patches node on the API server based on originalNode. 631 // It returns any potential error, or an updatedNode and refreshes the state of kubelet when successful. 632 func (kl *Kubelet) patchNodeStatus(originalNode, node *v1.Node) (*v1.Node, error) { 633 // Patch the current status on the API server 634 updatedNode, _, err := nodeutil.PatchNodeStatus(kl.heartbeatClient.CoreV1(), types.NodeName(kl.nodeName), originalNode, node) 635 if err != nil { 636 return nil, err 637 } 638 kl.lastStatusReportTime = kl.clock.Now() 639 kl.setLastObservedNodeAddresses(updatedNode.Status.Addresses) 640 641 readyIdx, readyCondition := nodeutil.GetNodeCondition(&updatedNode.Status, v1.NodeReady) 642 if readyIdx >= 0 && readyCondition.Status == v1.ConditionTrue { 643 kl.nodeStartupLatencyTracker.RecordNodeReady() 644 } 645 646 return updatedNode, nil 647 } 648 649 // markVolumesFromNode updates volumeManager with VolumesInUse status from node. 650 // 651 // In the case of node status update being unnecessary, call with the fetched node. 652 // We must mark the volumes as ReportedInUse in volume manager's dsw even 653 // if no changes were made to the node status (no volumes were added or removed 654 // from the VolumesInUse list). 655 // 656 // The reason is that on a kubelet restart, the volume manager's dsw is 657 // repopulated and the volume ReportedInUse is initialized to false, while the 658 // VolumesInUse list from the Node object still contains the state from the 659 // previous kubelet instantiation. 660 // 661 // Once the volumes are added to the dsw, the ReportedInUse field needs to be 662 // synced from the VolumesInUse list in the Node.Status. 663 // 664 // The MarkVolumesAsReportedInUse() call cannot be performed in dsw directly 665 // because it does not have access to the Node object. 666 // This also cannot be populated on node status manager init because the volume 667 // may not have been added to dsw at that time. 668 // 669 // Or, after a successful node status update, call with updatedNode returned from 670 // the patch call, to mark the volumeInUse as reportedInUse to indicate 671 // those volumes are already updated in the node's status 672 func (kl *Kubelet) markVolumesFromNode(node *v1.Node) { 673 kl.volumeManager.MarkVolumesAsReportedInUse(node.Status.VolumesInUse) 674 } 675 676 // recordNodeStatusEvent records an event of the given type with the given 677 // message for the node. 678 func (kl *Kubelet) recordNodeStatusEvent(eventType, event string) { 679 klog.V(2).InfoS("Recording event message for node", "node", klog.KRef("", string(kl.nodeName)), "event", event) 680 kl.recorder.Eventf(kl.nodeRef, eventType, event, "Node %s status is now: %s", kl.nodeName, event) 681 } 682 683 // recordEvent records an event for this node, the Kubelet's nodeRef is passed to the recorder 684 func (kl *Kubelet) recordEvent(eventType, event, message string) { 685 kl.recorder.Eventf(kl.nodeRef, eventType, event, message) 686 } 687 688 // record if node schedulable change. 689 func (kl *Kubelet) recordNodeSchedulableEvent(ctx context.Context, node *v1.Node) error { 690 kl.lastNodeUnschedulableLock.Lock() 691 defer kl.lastNodeUnschedulableLock.Unlock() 692 if kl.lastNodeUnschedulable != node.Spec.Unschedulable { 693 if node.Spec.Unschedulable { 694 kl.recordNodeStatusEvent(v1.EventTypeNormal, events.NodeNotSchedulable) 695 } else { 696 kl.recordNodeStatusEvent(v1.EventTypeNormal, events.NodeSchedulable) 697 } 698 kl.lastNodeUnschedulable = node.Spec.Unschedulable 699 } 700 return nil 701 } 702 703 // setNodeStatus fills in the Status fields of the given Node, overwriting 704 // any fields that are currently set. 705 // TODO(madhusudancs): Simplify the logic for setting node conditions and 706 // refactor the node status condition code out to a different file. 707 func (kl *Kubelet) setNodeStatus(ctx context.Context, node *v1.Node) { 708 for i, f := range kl.setNodeStatusFuncs { 709 klog.V(5).InfoS("Setting node status condition code", "position", i, "node", klog.KObj(node)) 710 if err := f(ctx, node); err != nil { 711 klog.ErrorS(err, "Failed to set some node status fields", "node", klog.KObj(node)) 712 } 713 } 714 } 715 716 func (kl *Kubelet) setLastObservedNodeAddresses(addresses []v1.NodeAddress) { 717 kl.lastObservedNodeAddressesMux.Lock() 718 defer kl.lastObservedNodeAddressesMux.Unlock() 719 kl.lastObservedNodeAddresses = addresses 720 } 721 func (kl *Kubelet) getLastObservedNodeAddresses() []v1.NodeAddress { 722 kl.lastObservedNodeAddressesMux.RLock() 723 defer kl.lastObservedNodeAddressesMux.RUnlock() 724 return kl.lastObservedNodeAddresses 725 } 726 727 // defaultNodeStatusFuncs is a factory that generates the default set of 728 // setNodeStatus funcs 729 func (kl *Kubelet) defaultNodeStatusFuncs() []func(context.Context, *v1.Node) error { 730 // if cloud is not nil, we expect the cloud resource sync manager to exist 731 var nodeAddressesFunc func() ([]v1.NodeAddress, error) 732 if kl.cloud != nil { 733 nodeAddressesFunc = kl.cloudResourceSyncManager.NodeAddresses 734 } 735 var validateHostFunc func() error 736 if kl.appArmorValidator != nil { 737 validateHostFunc = kl.appArmorValidator.ValidateHost 738 } 739 var setters []func(ctx context.Context, n *v1.Node) error 740 setters = append(setters, 741 nodestatus.NodeAddress(kl.nodeIPs, kl.nodeIPValidator, kl.hostname, kl.hostnameOverridden, kl.externalCloudProvider, kl.cloud, nodeAddressesFunc), 742 nodestatus.MachineInfo(string(kl.nodeName), kl.maxPods, kl.podsPerCore, kl.GetCachedMachineInfo, kl.containerManager.GetCapacity, 743 kl.containerManager.GetDevicePluginResourceCapacity, kl.containerManager.GetNodeAllocatableReservation, kl.recordEvent, kl.supportLocalStorageCapacityIsolation()), 744 nodestatus.VersionInfo(kl.cadvisor.VersionInfo, kl.containerRuntime.Type, kl.containerRuntime.Version), 745 nodestatus.DaemonEndpoints(kl.daemonEndpoints), 746 nodestatus.Images(kl.nodeStatusMaxImages, kl.imageManager.GetImageList), 747 nodestatus.GoRuntime(), 748 ) 749 // Volume limits 750 setters = append(setters, nodestatus.VolumeLimits(kl.volumePluginMgr.ListVolumePluginWithLimits)) 751 752 setters = append(setters, 753 nodestatus.MemoryPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderMemoryPressure, kl.recordNodeStatusEvent), 754 nodestatus.DiskPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderDiskPressure, kl.recordNodeStatusEvent), 755 nodestatus.PIDPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderPIDPressure, kl.recordNodeStatusEvent), 756 nodestatus.ReadyCondition(kl.clock.Now, kl.runtimeState.runtimeErrors, kl.runtimeState.networkErrors, kl.runtimeState.storageErrors, 757 validateHostFunc, kl.containerManager.Status, kl.shutdownManager.ShutdownStatus, kl.recordNodeStatusEvent, kl.supportLocalStorageCapacityIsolation()), 758 nodestatus.VolumesInUse(kl.volumeManager.ReconcilerStatesHasBeenSynced, kl.volumeManager.GetVolumesInUse), 759 // TODO(mtaufen): I decided not to move this setter for now, since all it does is send an event 760 // and record state back to the Kubelet runtime object. In the future, I'd like to isolate 761 // these side-effects by decoupling the decisions to send events and partial status recording 762 // from the Node setters. 763 kl.recordNodeSchedulableEvent, 764 ) 765 return setters 766 } 767 768 // Validate given node IP belongs to the current host 769 func validateNodeIP(nodeIP net.IP) error { 770 // Honor IP limitations set in setNodeStatus() 771 if nodeIP.To4() == nil && nodeIP.To16() == nil { 772 return fmt.Errorf("nodeIP must be a valid IP address") 773 } 774 if nodeIP.IsLoopback() { 775 return fmt.Errorf("nodeIP can't be loopback address") 776 } 777 if nodeIP.IsMulticast() { 778 return fmt.Errorf("nodeIP can't be a multicast address") 779 } 780 if nodeIP.IsLinkLocalUnicast() { 781 return fmt.Errorf("nodeIP can't be a link-local unicast address") 782 } 783 if nodeIP.IsUnspecified() { 784 return fmt.Errorf("nodeIP can't be an all zeros address") 785 } 786 787 addrs, err := net.InterfaceAddrs() 788 if err != nil { 789 return err 790 } 791 for _, addr := range addrs { 792 var ip net.IP 793 switch v := addr.(type) { 794 case *net.IPNet: 795 ip = v.IP 796 case *net.IPAddr: 797 ip = v.IP 798 } 799 if ip != nil && ip.Equal(nodeIP) { 800 return nil 801 } 802 } 803 return fmt.Errorf("node IP: %q not found in the host's network interfaces", nodeIP.String()) 804 } 805 806 // nodeStatusHasChanged compares the original node and current node's status and 807 // returns true if any change happens. The heartbeat timestamp is ignored. 808 func nodeStatusHasChanged(originalStatus *v1.NodeStatus, status *v1.NodeStatus) bool { 809 if originalStatus == nil && status == nil { 810 return false 811 } 812 if originalStatus == nil || status == nil { 813 return true 814 } 815 816 // Compare node conditions here because we need to ignore the heartbeat timestamp. 817 if nodeConditionsHaveChanged(originalStatus.Conditions, status.Conditions) { 818 return true 819 } 820 821 // Compare other fields of NodeStatus. 822 originalStatusCopy := originalStatus.DeepCopy() 823 statusCopy := status.DeepCopy() 824 originalStatusCopy.Conditions = nil 825 statusCopy.Conditions = nil 826 return !apiequality.Semantic.DeepEqual(originalStatusCopy, statusCopy) 827 } 828 829 // nodeConditionsHaveChanged compares the original node and current node's 830 // conditions and returns true if any change happens. The heartbeat timestamp is 831 // ignored. 832 func nodeConditionsHaveChanged(originalConditions []v1.NodeCondition, conditions []v1.NodeCondition) bool { 833 if len(originalConditions) != len(conditions) { 834 return true 835 } 836 837 originalConditionsCopy := make([]v1.NodeCondition, 0, len(originalConditions)) 838 originalConditionsCopy = append(originalConditionsCopy, originalConditions...) 839 conditionsCopy := make([]v1.NodeCondition, 0, len(conditions)) 840 conditionsCopy = append(conditionsCopy, conditions...) 841 842 sort.SliceStable(originalConditionsCopy, func(i, j int) bool { return originalConditionsCopy[i].Type < originalConditionsCopy[j].Type }) 843 sort.SliceStable(conditionsCopy, func(i, j int) bool { return conditionsCopy[i].Type < conditionsCopy[j].Type }) 844 845 replacedheartbeatTime := metav1.Time{} 846 for i := range conditionsCopy { 847 originalConditionsCopy[i].LastHeartbeatTime = replacedheartbeatTime 848 conditionsCopy[i].LastHeartbeatTime = replacedheartbeatTime 849 if !apiequality.Semantic.DeepEqual(&originalConditionsCopy[i], &conditionsCopy[i]) { 850 return true 851 } 852 } 853 return false 854 }