k8s.io/kubernetes@v1.29.3/pkg/kubelet/status/status_manager.go

k8s.io/kubernetes@v1.29.3/pkg/kubelet/status/status_manager.go (about)

     1  /*
     2  Copyright 2014 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  //go:generate mockgen -source=status_manager.go -destination=testing/mock_pod_status_provider.go -package=testing PodStatusProvider
    18  package status
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"sort"
    24  	"strings"
    25  	"sync"
    26  	"time"
    27  
    28  	"github.com/google/go-cmp/cmp"
    29  	clientset "k8s.io/client-go/kubernetes"
    30  
    31  	v1 "k8s.io/api/core/v1"
    32  	apiequality "k8s.io/apimachinery/pkg/api/equality"
    33  	"k8s.io/apimachinery/pkg/api/errors"
    34  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    35  	"k8s.io/apimachinery/pkg/types"
    36  	"k8s.io/apimachinery/pkg/util/wait"
    37  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    38  	"k8s.io/klog/v2"
    39  	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
    40  	"k8s.io/kubernetes/pkg/features"
    41  	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
    42  	"k8s.io/kubernetes/pkg/kubelet/metrics"
    43  	"k8s.io/kubernetes/pkg/kubelet/status/state"
    44  	kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
    45  	kubeutil "k8s.io/kubernetes/pkg/kubelet/util"
    46  	statusutil "k8s.io/kubernetes/pkg/util/pod"
    47  )
    48  
    49  // podStatusManagerStateFile is the file name where status manager stores its state
    50  const podStatusManagerStateFile = "pod_status_manager_state"
    51  
    52  // A wrapper around v1.PodStatus that includes a version to enforce that stale pod statuses are
    53  // not sent to the API server.
    54  type versionedPodStatus struct {
    55  	// version is a monotonically increasing version number (per pod).
    56  	version uint64
    57  	// Pod name & namespace, for sending updates to API server.
    58  	podName      string
    59  	podNamespace string
    60  	// at is the time at which the most recent status update was detected
    61  	at time.Time
    62  
    63  	// True if the status is generated at the end of SyncTerminatedPod, or after it is completed.
    64  	podIsFinished bool
    65  
    66  	status v1.PodStatus
    67  }
    68  
    69  // Updates pod statuses in apiserver. Writes only when new status has changed.
    70  // All methods are thread-safe.
    71  type manager struct {
    72  	kubeClient clientset.Interface
    73  	podManager PodManager
    74  	// Map from pod UID to sync status of the corresponding pod.
    75  	podStatuses      map[types.UID]versionedPodStatus
    76  	podStatusesLock  sync.RWMutex
    77  	podStatusChannel chan struct{}
    78  	// Map from (mirror) pod UID to latest status version successfully sent to the API server.
    79  	// apiStatusVersions must only be accessed from the sync thread.
    80  	apiStatusVersions map[kubetypes.MirrorPodUID]uint64
    81  	podDeletionSafety PodDeletionSafetyProvider
    82  
    83  	podStartupLatencyHelper PodStartupLatencyStateHelper
    84  	// state allows to save/restore pod resource allocation and tolerate kubelet restarts.
    85  	state state.State
    86  	// stateFileDirectory holds the directory where the state file for checkpoints is held.
    87  	stateFileDirectory string
    88  }
    89  
    90  // PodManager is the subset of methods the manager needs to observe the actual state of the kubelet.
    91  // See pkg/k8s.io/kubernetes/pkg/kubelet/pod.Manager for method godoc.
    92  type PodManager interface {
    93  	GetPodByUID(types.UID) (*v1.Pod, bool)
    94  	GetMirrorPodByPod(*v1.Pod) (*v1.Pod, bool)
    95  	TranslatePodUID(uid types.UID) kubetypes.ResolvedPodUID
    96  	GetUIDTranslations() (podToMirror map[kubetypes.ResolvedPodUID]kubetypes.MirrorPodUID, mirrorToPod map[kubetypes.MirrorPodUID]kubetypes.ResolvedPodUID)
    97  }
    98  
    99  // PodStatusProvider knows how to provide status for a pod. It is intended to be used by other components
   100  // that need to introspect the authoritative status of a pod.  The PodStatusProvider represents the actual
   101  // status of a running pod as the kubelet sees it.
   102  type PodStatusProvider interface {
   103  	// GetPodStatus returns the cached status for the provided pod UID, as well as whether it
   104  	// was a cache hit.
   105  	GetPodStatus(uid types.UID) (v1.PodStatus, bool)
   106  }
   107  
   108  // PodDeletionSafetyProvider provides guarantees that a pod can be safely deleted.
   109  type PodDeletionSafetyProvider interface {
   110  	// PodCouldHaveRunningContainers returns true if the pod could have running containers.
   111  	PodCouldHaveRunningContainers(pod *v1.Pod) bool
   112  }
   113  
   114  type PodStartupLatencyStateHelper interface {
   115  	RecordStatusUpdated(pod *v1.Pod)
   116  	DeletePodStartupState(podUID types.UID)
   117  }
   118  
   119  // Manager is the Source of truth for kubelet pod status, and should be kept up-to-date with
   120  // the latest v1.PodStatus. It also syncs updates back to the API server.
   121  type Manager interface {
   122  	PodStatusProvider
   123  
   124  	// Start the API server status sync loop.
   125  	Start()
   126  
   127  	// SetPodStatus caches updates the cached status for the given pod, and triggers a status update.
   128  	SetPodStatus(pod *v1.Pod, status v1.PodStatus)
   129  
   130  	// SetContainerReadiness updates the cached container status with the given readiness, and
   131  	// triggers a status update.
   132  	SetContainerReadiness(podUID types.UID, containerID kubecontainer.ContainerID, ready bool)
   133  
   134  	// SetContainerStartup updates the cached container status with the given startup, and
   135  	// triggers a status update.
   136  	SetContainerStartup(podUID types.UID, containerID kubecontainer.ContainerID, started bool)
   137  
   138  	// TerminatePod resets the container status for the provided pod to terminated and triggers
   139  	// a status update.
   140  	TerminatePod(pod *v1.Pod)
   141  
   142  	// RemoveOrphanedStatuses scans the status cache and removes any entries for pods not included in
   143  	// the provided podUIDs.
   144  	RemoveOrphanedStatuses(podUIDs map[types.UID]bool)
   145  
   146  	// GetContainerResourceAllocation returns checkpointed AllocatedResources value for the container
   147  	GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceList, bool)
   148  
   149  	// GetPodResizeStatus returns checkpointed PodStatus.Resize value
   150  	GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool)
   151  
   152  	// SetPodAllocation checkpoints the resources allocated to a pod's containers.
   153  	SetPodAllocation(pod *v1.Pod) error
   154  
   155  	// SetPodResizeStatus checkpoints the last resizing decision for the pod.
   156  	SetPodResizeStatus(podUID types.UID, resize v1.PodResizeStatus) error
   157  }
   158  
   159  const syncPeriod = 10 * time.Second
   160  
   161  // NewManager returns a functional Manager.
   162  func NewManager(kubeClient clientset.Interface, podManager PodManager, podDeletionSafety PodDeletionSafetyProvider, podStartupLatencyHelper PodStartupLatencyStateHelper, stateFileDirectory string) Manager {
   163  	return &manager{
   164  		kubeClient:              kubeClient,
   165  		podManager:              podManager,
   166  		podStatuses:             make(map[types.UID]versionedPodStatus),
   167  		podStatusChannel:        make(chan struct{}, 1),
   168  		apiStatusVersions:       make(map[kubetypes.MirrorPodUID]uint64),
   169  		podDeletionSafety:       podDeletionSafety,
   170  		podStartupLatencyHelper: podStartupLatencyHelper,
   171  		stateFileDirectory:      stateFileDirectory,
   172  	}
   173  }
   174  
   175  // isPodStatusByKubeletEqual returns true if the given pod statuses are equal when non-kubelet-owned
   176  // pod conditions are excluded.
   177  // This method normalizes the status before comparing so as to make sure that meaningless
   178  // changes will be ignored.
   179  func isPodStatusByKubeletEqual(oldStatus, status *v1.PodStatus) bool {
   180  	oldCopy := oldStatus.DeepCopy()
   181  	for _, c := range status.Conditions {
   182  		// both owned and shared conditions are used for kubelet status equality
   183  		if kubetypes.PodConditionByKubelet(c.Type) || kubetypes.PodConditionSharedByKubelet(c.Type) {
   184  			_, oc := podutil.GetPodCondition(oldCopy, c.Type)
   185  			if oc == nil || oc.Status != c.Status || oc.Message != c.Message || oc.Reason != c.Reason {
   186  				return false
   187  			}
   188  		}
   189  	}
   190  	oldCopy.Conditions = status.Conditions
   191  	return apiequality.Semantic.DeepEqual(oldCopy, status)
   192  }
   193  
   194  func (m *manager) Start() {
   195  	// Initialize m.state to no-op state checkpoint manager
   196  	m.state = state.NewNoopStateCheckpoint()
   197  
   198  	// Create pod allocation checkpoint manager even if client is nil so as to allow local get/set of AllocatedResources & Resize
   199  	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
   200  		stateImpl, err := state.NewStateCheckpoint(m.stateFileDirectory, podStatusManagerStateFile)
   201  		if err != nil {
   202  			// This is a crictical, non-recoverable failure.
   203  			klog.ErrorS(err, "Could not initialize pod allocation checkpoint manager, please drain node and remove policy state file")
   204  			panic(err)
   205  		}
   206  		m.state = stateImpl
   207  	}
   208  
   209  	// Don't start the status manager if we don't have a client. This will happen
   210  	// on the master, where the kubelet is responsible for bootstrapping the pods
   211  	// of the master components.
   212  	if m.kubeClient == nil {
   213  		klog.InfoS("Kubernetes client is nil, not starting status manager")
   214  		return
   215  	}
   216  
   217  	klog.InfoS("Starting to sync pod status with apiserver")
   218  
   219  	//nolint:staticcheck // SA1015 Ticker can leak since this is only called once and doesn't handle termination.
   220  	syncTicker := time.NewTicker(syncPeriod).C
   221  
   222  	// syncPod and syncBatch share the same go routine to avoid sync races.
   223  	go wait.Forever(func() {
   224  		for {
   225  			select {
   226  			case <-m.podStatusChannel:
   227  				klog.V(4).InfoS("Syncing updated statuses")
   228  				m.syncBatch(false)
   229  			case <-syncTicker:
   230  				klog.V(4).InfoS("Syncing all statuses")
   231  				m.syncBatch(true)
   232  			}
   233  		}
   234  	}, 0)
   235  }
   236  
   237  // GetContainerResourceAllocation returns the last checkpointed AllocatedResources values
   238  // If checkpoint manager has not been initialized, it returns nil, false
   239  func (m *manager) GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceList, bool) {
   240  	m.podStatusesLock.RLock()
   241  	defer m.podStatusesLock.RUnlock()
   242  	return m.state.GetContainerResourceAllocation(podUID, containerName)
   243  }
   244  
   245  // GetPodResizeStatus returns the last checkpointed ResizeStaus value
   246  // If checkpoint manager has not been initialized, it returns nil, false
   247  func (m *manager) GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool) {
   248  	m.podStatusesLock.RLock()
   249  	defer m.podStatusesLock.RUnlock()
   250  	return m.state.GetPodResizeStatus(podUID)
   251  }
   252  
   253  // SetPodAllocation checkpoints the resources allocated to a pod's containers
   254  func (m *manager) SetPodAllocation(pod *v1.Pod) error {
   255  	m.podStatusesLock.RLock()
   256  	defer m.podStatusesLock.RUnlock()
   257  	for _, container := range pod.Spec.Containers {
   258  		var alloc v1.ResourceList
   259  		if container.Resources.Requests != nil {
   260  			alloc = container.Resources.Requests.DeepCopy()
   261  		}
   262  		if err := m.state.SetContainerResourceAllocation(string(pod.UID), container.Name, alloc); err != nil {
   263  			return err
   264  		}
   265  	}
   266  	return nil
   267  }
   268  
   269  // SetPodResizeStatus checkpoints the last resizing decision for the pod.
   270  func (m *manager) SetPodResizeStatus(podUID types.UID, resizeStatus v1.PodResizeStatus) error {
   271  	m.podStatusesLock.RLock()
   272  	defer m.podStatusesLock.RUnlock()
   273  	return m.state.SetPodResizeStatus(string(podUID), resizeStatus)
   274  }
   275  
   276  func (m *manager) GetPodStatus(uid types.UID) (v1.PodStatus, bool) {
   277  	m.podStatusesLock.RLock()
   278  	defer m.podStatusesLock.RUnlock()
   279  	status, ok := m.podStatuses[types.UID(m.podManager.TranslatePodUID(uid))]
   280  	return status.status, ok
   281  }
   282  
   283  func (m *manager) SetPodStatus(pod *v1.Pod, status v1.PodStatus) {
   284  	m.podStatusesLock.Lock()
   285  	defer m.podStatusesLock.Unlock()
   286  
   287  	// Make sure we're caching a deep copy.
   288  	status = *status.DeepCopy()
   289  
   290  	// Force a status update if deletion timestamp is set. This is necessary
   291  	// because if the pod is in the non-running state, the pod worker still
   292  	// needs to be able to trigger an update and/or deletion.
   293  	m.updateStatusInternal(pod, status, pod.DeletionTimestamp != nil, false)
   294  }
   295  
   296  func (m *manager) SetContainerReadiness(podUID types.UID, containerID kubecontainer.ContainerID, ready bool) {
   297  	m.podStatusesLock.Lock()
   298  	defer m.podStatusesLock.Unlock()
   299  
   300  	pod, ok := m.podManager.GetPodByUID(podUID)
   301  	if !ok {
   302  		klog.V(4).InfoS("Pod has been deleted, no need to update readiness", "podUID", string(podUID))
   303  		return
   304  	}
   305  
   306  	oldStatus, found := m.podStatuses[pod.UID]
   307  	if !found {
   308  		klog.InfoS("Container readiness changed before pod has synced",
   309  			"pod", klog.KObj(pod),
   310  			"containerID", containerID.String())
   311  		return
   312  	}
   313  
   314  	// Find the container to update.
   315  	containerStatus, _, ok := findContainerStatus(&oldStatus.status, containerID.String())
   316  	if !ok {
   317  		klog.InfoS("Container readiness changed for unknown container",
   318  			"pod", klog.KObj(pod),
   319  			"containerID", containerID.String())
   320  		return
   321  	}
   322  
   323  	if containerStatus.Ready == ready {
   324  		klog.V(4).InfoS("Container readiness unchanged",
   325  			"ready", ready,
   326  			"pod", klog.KObj(pod),
   327  			"containerID", containerID.String())
   328  		return
   329  	}
   330  
   331  	// Make sure we're not updating the cached version.
   332  	status := *oldStatus.status.DeepCopy()
   333  	containerStatus, _, _ = findContainerStatus(&status, containerID.String())
   334  	containerStatus.Ready = ready
   335  
   336  	// updateConditionFunc updates the corresponding type of condition
   337  	updateConditionFunc := func(conditionType v1.PodConditionType, condition v1.PodCondition) {
   338  		conditionIndex := -1
   339  		for i, condition := range status.Conditions {
   340  			if condition.Type == conditionType {
   341  				conditionIndex = i
   342  				break
   343  			}
   344  		}
   345  		if conditionIndex != -1 {
   346  			status.Conditions[conditionIndex] = condition
   347  		} else {
   348  			klog.InfoS("PodStatus missing condition type", "conditionType", conditionType, "status", status)
   349  			status.Conditions = append(status.Conditions, condition)
   350  		}
   351  	}
   352  	allContainerStatuses := append(status.InitContainerStatuses, status.ContainerStatuses...)
   353  	updateConditionFunc(v1.PodReady, GeneratePodReadyCondition(&pod.Spec, status.Conditions, allContainerStatuses, status.Phase))
   354  	updateConditionFunc(v1.ContainersReady, GenerateContainersReadyCondition(&pod.Spec, allContainerStatuses, status.Phase))
   355  	m.updateStatusInternal(pod, status, false, false)
   356  }
   357  
   358  func (m *manager) SetContainerStartup(podUID types.UID, containerID kubecontainer.ContainerID, started bool) {
   359  	m.podStatusesLock.Lock()
   360  	defer m.podStatusesLock.Unlock()
   361  
   362  	pod, ok := m.podManager.GetPodByUID(podUID)
   363  	if !ok {
   364  		klog.V(4).InfoS("Pod has been deleted, no need to update startup", "podUID", string(podUID))
   365  		return
   366  	}
   367  
   368  	oldStatus, found := m.podStatuses[pod.UID]
   369  	if !found {
   370  		klog.InfoS("Container startup changed before pod has synced",
   371  			"pod", klog.KObj(pod),
   372  			"containerID", containerID.String())
   373  		return
   374  	}
   375  
   376  	// Find the container to update.
   377  	containerStatus, _, ok := findContainerStatus(&oldStatus.status, containerID.String())
   378  	if !ok {
   379  		klog.InfoS("Container startup changed for unknown container",
   380  			"pod", klog.KObj(pod),
   381  			"containerID", containerID.String())
   382  		return
   383  	}
   384  
   385  	if containerStatus.Started != nil && *containerStatus.Started == started {
   386  		klog.V(4).InfoS("Container startup unchanged",
   387  			"pod", klog.KObj(pod),
   388  			"containerID", containerID.String())
   389  		return
   390  	}
   391  
   392  	// Make sure we're not updating the cached version.
   393  	status := *oldStatus.status.DeepCopy()
   394  	containerStatus, _, _ = findContainerStatus(&status, containerID.String())
   395  	containerStatus.Started = &started
   396  
   397  	m.updateStatusInternal(pod, status, false, false)
   398  }
   399  
   400  func findContainerStatus(status *v1.PodStatus, containerID string) (containerStatus *v1.ContainerStatus, init bool, ok bool) {
   401  	// Find the container to update.
   402  	for i, c := range status.ContainerStatuses {
   403  		if c.ContainerID == containerID {
   404  			return &status.ContainerStatuses[i], false, true
   405  		}
   406  	}
   407  
   408  	for i, c := range status.InitContainerStatuses {
   409  		if c.ContainerID == containerID {
   410  			return &status.InitContainerStatuses[i], true, true
   411  		}
   412  	}
   413  
   414  	return nil, false, false
   415  
   416  }
   417  
   418  // TerminatePod ensures that the status of containers is properly defaulted at the end of the pod
   419  // lifecycle. As the Kubelet must reconcile with the container runtime to observe container status
   420  // there is always the possibility we are unable to retrieve one or more container statuses due to
   421  // garbage collection, admin action, or loss of temporary data on a restart. This method ensures
   422  // that any absent container status is treated as a failure so that we do not incorrectly describe
   423  // the pod as successful. If we have not yet initialized the pod in the presence of init containers,
   424  // the init container failure status is sufficient to describe the pod as failing, and we do not need
   425  // to override waiting containers (unless there is evidence the pod previously started those containers).
   426  // It also makes sure that pods are transitioned to a terminal phase (Failed or Succeeded) before
   427  // their deletion.
   428  func (m *manager) TerminatePod(pod *v1.Pod) {
   429  	m.podStatusesLock.Lock()
   430  	defer m.podStatusesLock.Unlock()
   431  
   432  	// ensure that all containers have a terminated state - because we do not know whether the container
   433  	// was successful, always report an error
   434  	oldStatus := &pod.Status
   435  	cachedStatus, isCached := m.podStatuses[pod.UID]
   436  	if isCached {
   437  		oldStatus = &cachedStatus.status
   438  	}
   439  	status := *oldStatus.DeepCopy()
   440  
   441  	// once a pod has initialized, any missing status is treated as a failure
   442  	if hasPodInitialized(pod) {
   443  		for i := range status.ContainerStatuses {
   444  			if status.ContainerStatuses[i].State.Terminated != nil {
   445  				continue
   446  			}
   447  			status.ContainerStatuses[i].State = v1.ContainerState{
   448  				Terminated: &v1.ContainerStateTerminated{
   449  					Reason:   "ContainerStatusUnknown",
   450  					Message:  "The container could not be located when the pod was terminated",
   451  					ExitCode: 137,
   452  				},
   453  			}
   454  		}
   455  	}
   456  
   457  	// all but the final suffix of init containers which have no evidence of a container start are
   458  	// marked as failed containers
   459  	for i := range initializedContainers(status.InitContainerStatuses) {
   460  		if status.InitContainerStatuses[i].State.Terminated != nil {
   461  			continue
   462  		}
   463  		status.InitContainerStatuses[i].State = v1.ContainerState{
   464  			Terminated: &v1.ContainerStateTerminated{
   465  				Reason:   "ContainerStatusUnknown",
   466  				Message:  "The container could not be located when the pod was terminated",
   467  				ExitCode: 137,
   468  			},
   469  		}
   470  	}
   471  
   472  	// Make sure all pods are transitioned to a terminal phase.
   473  	// TODO(#116484): Also assign terminal phase to static an pods.
   474  	if !kubetypes.IsStaticPod(pod) {
   475  		switch status.Phase {
   476  		case v1.PodSucceeded, v1.PodFailed:
   477  			// do nothing, already terminal
   478  		case v1.PodPending, v1.PodRunning:
   479  			if status.Phase == v1.PodRunning && isCached {
   480  				klog.InfoS("Terminal running pod should have already been marked as failed, programmer error", "pod", klog.KObj(pod), "podUID", pod.UID)
   481  			}
   482  			klog.V(3).InfoS("Marking terminal pod as failed", "oldPhase", status.Phase, "pod", klog.KObj(pod), "podUID", pod.UID)
   483  			status.Phase = v1.PodFailed
   484  		default:
   485  			klog.ErrorS(fmt.Errorf("unknown phase: %v", status.Phase), "Unknown phase, programmer error", "pod", klog.KObj(pod), "podUID", pod.UID)
   486  			status.Phase = v1.PodFailed
   487  		}
   488  	}
   489  
   490  	klog.V(5).InfoS("TerminatePod calling updateStatusInternal", "pod", klog.KObj(pod), "podUID", pod.UID)
   491  	m.updateStatusInternal(pod, status, true, true)
   492  }
   493  
   494  // hasPodInitialized returns true if the pod has no evidence of ever starting a regular container, which
   495  // implies those containers should not be transitioned to terminated status.
   496  func hasPodInitialized(pod *v1.Pod) bool {
   497  	// a pod without init containers is always initialized
   498  	if len(pod.Spec.InitContainers) == 0 {
   499  		return true
   500  	}
   501  	// if any container has ever moved out of waiting state, the pod has initialized
   502  	for _, status := range pod.Status.ContainerStatuses {
   503  		if status.LastTerminationState.Terminated != nil || status.State.Waiting == nil {
   504  			return true
   505  		}
   506  	}
   507  	// if the last init container has ever completed with a zero exit code, the pod is initialized
   508  	if l := len(pod.Status.InitContainerStatuses); l > 0 {
   509  		container, ok := kubeutil.GetContainerByIndex(pod.Spec.InitContainers, pod.Status.InitContainerStatuses, l-1)
   510  		if !ok {
   511  			klog.V(4).InfoS("Mismatch between pod spec and status, likely programmer error", "pod", klog.KObj(pod), "containerName", container.Name)
   512  			return false
   513  		}
   514  
   515  		containerStatus := pod.Status.InitContainerStatuses[l-1]
   516  		if kubetypes.IsRestartableInitContainer(&container) {
   517  			if containerStatus.State.Running != nil &&
   518  				containerStatus.Started != nil && *containerStatus.Started {
   519  				return true
   520  			}
   521  		} else { // regular init container
   522  			if state := containerStatus.LastTerminationState; state.Terminated != nil && state.Terminated.ExitCode == 0 {
   523  				return true
   524  			}
   525  			if state := containerStatus.State; state.Terminated != nil && state.Terminated.ExitCode == 0 {
   526  				return true
   527  			}
   528  		}
   529  	}
   530  	// otherwise the pod has no record of being initialized
   531  	return false
   532  }
   533  
   534  // initializedContainers returns all status except for suffix of containers that are in Waiting
   535  // state, which is the set of containers that have attempted to start at least once. If all containers
   536  // are Waiting, the first container is always returned.
   537  func initializedContainers(containers []v1.ContainerStatus) []v1.ContainerStatus {
   538  	for i := len(containers) - 1; i >= 0; i-- {
   539  		if containers[i].State.Waiting == nil || containers[i].LastTerminationState.Terminated != nil {
   540  			return containers[0 : i+1]
   541  		}
   542  	}
   543  	// always return at least one container
   544  	if len(containers) > 0 {
   545  		return containers[0:1]
   546  	}
   547  	return nil
   548  }
   549  
   550  // checkContainerStateTransition ensures that no container is trying to transition
   551  // from a terminated to non-terminated state, which is illegal and indicates a
   552  // logical error in the kubelet.
   553  func checkContainerStateTransition(oldStatuses, newStatuses *v1.PodStatus, podSpec *v1.PodSpec) error {
   554  	// If we should always restart, containers are allowed to leave the terminated state
   555  	if podSpec.RestartPolicy == v1.RestartPolicyAlways {
   556  		return nil
   557  	}
   558  	for _, oldStatus := range oldStatuses.ContainerStatuses {
   559  		// Skip any container that wasn't terminated
   560  		if oldStatus.State.Terminated == nil {
   561  			continue
   562  		}
   563  		// Skip any container that failed but is allowed to restart
   564  		if oldStatus.State.Terminated.ExitCode != 0 && podSpec.RestartPolicy == v1.RestartPolicyOnFailure {
   565  			continue
   566  		}
   567  		for _, newStatus := range newStatuses.ContainerStatuses {
   568  			if oldStatus.Name == newStatus.Name && newStatus.State.Terminated == nil {
   569  				return fmt.Errorf("terminated container %v attempted illegal transition to non-terminated state", newStatus.Name)
   570  			}
   571  		}
   572  	}
   573  
   574  	for i, oldStatus := range oldStatuses.InitContainerStatuses {
   575  		initContainer, ok := kubeutil.GetContainerByIndex(podSpec.InitContainers, oldStatuses.InitContainerStatuses, i)
   576  		if !ok {
   577  			return fmt.Errorf("found mismatch between pod spec and status, container: %v", oldStatus.Name)
   578  		}
   579  		// Skip any restartable init container as it always is allowed to restart
   580  		if kubetypes.IsRestartableInitContainer(&initContainer) {
   581  			continue
   582  		}
   583  		// Skip any container that wasn't terminated
   584  		if oldStatus.State.Terminated == nil {
   585  			continue
   586  		}
   587  		// Skip any container that failed but is allowed to restart
   588  		if oldStatus.State.Terminated.ExitCode != 0 && podSpec.RestartPolicy == v1.RestartPolicyOnFailure {
   589  			continue
   590  		}
   591  		for _, newStatus := range newStatuses.InitContainerStatuses {
   592  			if oldStatus.Name == newStatus.Name && newStatus.State.Terminated == nil {
   593  				return fmt.Errorf("terminated init container %v attempted illegal transition to non-terminated state", newStatus.Name)
   594  			}
   595  		}
   596  	}
   597  	return nil
   598  }
   599  
   600  // updateStatusInternal updates the internal status cache, and queues an update to the api server if
   601  // necessary.
   602  // This method IS NOT THREAD SAFE and must be called from a locked function.
   603  func (m *manager) updateStatusInternal(pod *v1.Pod, status v1.PodStatus, forceUpdate, podIsFinished bool) {
   604  	var oldStatus v1.PodStatus
   605  	cachedStatus, isCached := m.podStatuses[pod.UID]
   606  	if isCached {
   607  		oldStatus = cachedStatus.status
   608  		// TODO(#116484): Also assign terminal phase to static pods.
   609  		if !kubetypes.IsStaticPod(pod) {
   610  			if cachedStatus.podIsFinished && !podIsFinished {
   611  				klog.InfoS("Got unexpected podIsFinished=false, while podIsFinished=true in status cache, programmer error.", "pod", klog.KObj(pod))
   612  				podIsFinished = true
   613  			}
   614  		}
   615  	} else if mirrorPod, ok := m.podManager.GetMirrorPodByPod(pod); ok {
   616  		oldStatus = mirrorPod.Status
   617  	} else {
   618  		oldStatus = pod.Status
   619  	}
   620  
   621  	// Check for illegal state transition in containers
   622  	if err := checkContainerStateTransition(&oldStatus, &status, &pod.Spec); err != nil {
   623  		klog.ErrorS(err, "Status update on pod aborted", "pod", klog.KObj(pod))
   624  		return
   625  	}
   626  
   627  	// Set ContainersReadyCondition.LastTransitionTime.
   628  	updateLastTransitionTime(&status, &oldStatus, v1.ContainersReady)
   629  
   630  	// Set ReadyCondition.LastTransitionTime.
   631  	updateLastTransitionTime(&status, &oldStatus, v1.PodReady)
   632  
   633  	// Set InitializedCondition.LastTransitionTime.
   634  	updateLastTransitionTime(&status, &oldStatus, v1.PodInitialized)
   635  
   636  	// Set PodReadyToStartContainersCondition.LastTransitionTime.
   637  	updateLastTransitionTime(&status, &oldStatus, v1.PodReadyToStartContainers)
   638  
   639  	// Set PodScheduledCondition.LastTransitionTime.
   640  	updateLastTransitionTime(&status, &oldStatus, v1.PodScheduled)
   641  
   642  	if utilfeature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions) {
   643  		// Set DisruptionTarget.LastTransitionTime.
   644  		updateLastTransitionTime(&status, &oldStatus, v1.DisruptionTarget)
   645  	}
   646  
   647  	// ensure that the start time does not change across updates.
   648  	if oldStatus.StartTime != nil && !oldStatus.StartTime.IsZero() {
   649  		status.StartTime = oldStatus.StartTime
   650  	} else if status.StartTime.IsZero() {
   651  		// if the status has no start time, we need to set an initial time
   652  		now := metav1.Now()
   653  		status.StartTime = &now
   654  	}
   655  
   656  	normalizeStatus(pod, &status)
   657  
   658  	// Perform some more extensive logging of container termination state to assist in
   659  	// debugging production races (generally not needed).
   660  	if klogV := klog.V(5); klogV.Enabled() {
   661  		var containers []string
   662  		for _, s := range append(append([]v1.ContainerStatus(nil), status.InitContainerStatuses...), status.ContainerStatuses...) {
   663  			var current, previous string
   664  			switch {
   665  			case s.State.Running != nil:
   666  				current = "running"
   667  			case s.State.Waiting != nil:
   668  				current = "waiting"
   669  			case s.State.Terminated != nil:
   670  				current = fmt.Sprintf("terminated=%d", s.State.Terminated.ExitCode)
   671  			default:
   672  				current = "unknown"
   673  			}
   674  			switch {
   675  			case s.LastTerminationState.Running != nil:
   676  				previous = "running"
   677  			case s.LastTerminationState.Waiting != nil:
   678  				previous = "waiting"
   679  			case s.LastTerminationState.Terminated != nil:
   680  				previous = fmt.Sprintf("terminated=%d", s.LastTerminationState.Terminated.ExitCode)
   681  			default:
   682  				previous = "<none>"
   683  			}
   684  			containers = append(containers, fmt.Sprintf("(%s state=%s previous=%s)", s.Name, current, previous))
   685  		}
   686  		sort.Strings(containers)
   687  		klogV.InfoS("updateStatusInternal", "version", cachedStatus.version+1, "podIsFinished", podIsFinished, "pod", klog.KObj(pod), "podUID", pod.UID, "containers", strings.Join(containers, " "))
   688  	}
   689  
   690  	// The intent here is to prevent concurrent updates to a pod's status from
   691  	// clobbering each other so the phase of a pod progresses monotonically.
   692  	if isCached && isPodStatusByKubeletEqual(&cachedStatus.status, &status) && !forceUpdate {
   693  		klog.V(3).InfoS("Ignoring same status for pod", "pod", klog.KObj(pod), "status", status)
   694  		return
   695  	}
   696  
   697  	newStatus := versionedPodStatus{
   698  		status:        status,
   699  		version:       cachedStatus.version + 1,
   700  		podName:       pod.Name,
   701  		podNamespace:  pod.Namespace,
   702  		podIsFinished: podIsFinished,
   703  	}
   704  
   705  	// Multiple status updates can be generated before we update the API server,
   706  	// so we track the time from the first status update until we retire it to
   707  	// the API.
   708  	if cachedStatus.at.IsZero() {
   709  		newStatus.at = time.Now()
   710  	} else {
   711  		newStatus.at = cachedStatus.at
   712  	}
   713  
   714  	m.podStatuses[pod.UID] = newStatus
   715  
   716  	select {
   717  	case m.podStatusChannel <- struct{}{}:
   718  	default:
   719  		// there's already a status update pending
   720  	}
   721  }
   722  
   723  // updateLastTransitionTime updates the LastTransitionTime of a pod condition.
   724  func updateLastTransitionTime(status, oldStatus *v1.PodStatus, conditionType v1.PodConditionType) {
   725  	_, condition := podutil.GetPodCondition(status, conditionType)
   726  	if condition == nil {
   727  		return
   728  	}
   729  	// Need to set LastTransitionTime.
   730  	lastTransitionTime := metav1.Now()
   731  	_, oldCondition := podutil.GetPodCondition(oldStatus, conditionType)
   732  	if oldCondition != nil && condition.Status == oldCondition.Status {
   733  		lastTransitionTime = oldCondition.LastTransitionTime
   734  	}
   735  	condition.LastTransitionTime = lastTransitionTime
   736  }
   737  
   738  // deletePodStatus simply removes the given pod from the status cache.
   739  func (m *manager) deletePodStatus(uid types.UID) {
   740  	m.podStatusesLock.Lock()
   741  	defer m.podStatusesLock.Unlock()
   742  	delete(m.podStatuses, uid)
   743  	m.podStartupLatencyHelper.DeletePodStartupState(uid)
   744  	if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
   745  		m.state.Delete(string(uid), "")
   746  	}
   747  }
   748  
   749  // TODO(filipg): It'd be cleaner if we can do this without signal from user.
   750  func (m *manager) RemoveOrphanedStatuses(podUIDs map[types.UID]bool) {
   751  	m.podStatusesLock.Lock()
   752  	defer m.podStatusesLock.Unlock()
   753  	for key := range m.podStatuses {
   754  		if _, ok := podUIDs[key]; !ok {
   755  			klog.V(5).InfoS("Removing pod from status map.", "podUID", key)
   756  			delete(m.podStatuses, key)
   757  			if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
   758  				m.state.Delete(string(key), "")
   759  			}
   760  		}
   761  	}
   762  }
   763  
   764  // syncBatch syncs pods statuses with the apiserver. Returns the number of syncs
   765  // attempted for testing.
   766  func (m *manager) syncBatch(all bool) int {
   767  	type podSync struct {
   768  		podUID    types.UID
   769  		statusUID kubetypes.MirrorPodUID
   770  		status    versionedPodStatus
   771  	}
   772  
   773  	var updatedStatuses []podSync
   774  	podToMirror, mirrorToPod := m.podManager.GetUIDTranslations()
   775  	func() { // Critical section
   776  		m.podStatusesLock.RLock()
   777  		defer m.podStatusesLock.RUnlock()
   778  
   779  		// Clean up orphaned versions.
   780  		if all {
   781  			for uid := range m.apiStatusVersions {
   782  				_, hasPod := m.podStatuses[types.UID(uid)]
   783  				_, hasMirror := mirrorToPod[uid]
   784  				if !hasPod && !hasMirror {
   785  					delete(m.apiStatusVersions, uid)
   786  				}
   787  			}
   788  		}
   789  
   790  		// Decide which pods need status updates.
   791  		for uid, status := range m.podStatuses {
   792  			// translate the pod UID (source) to the status UID (API pod) -
   793  			// static pods are identified in source by pod UID but tracked in the
   794  			// API via the uid of the mirror pod
   795  			uidOfStatus := kubetypes.MirrorPodUID(uid)
   796  			if mirrorUID, ok := podToMirror[kubetypes.ResolvedPodUID(uid)]; ok {
   797  				if mirrorUID == "" {
   798  					klog.V(5).InfoS("Static pod does not have a corresponding mirror pod; skipping",
   799  						"podUID", uid,
   800  						"pod", klog.KRef(status.podNamespace, status.podName))
   801  					continue
   802  				}
   803  				uidOfStatus = mirrorUID
   804  			}
   805  
   806  			// if a new status update has been delivered, trigger an update, otherwise the
   807  			// pod can wait for the next bulk check (which performs reconciliation as well)
   808  			if !all {
   809  				if m.apiStatusVersions[uidOfStatus] >= status.version {
   810  					continue
   811  				}
   812  				updatedStatuses = append(updatedStatuses, podSync{uid, uidOfStatus, status})
   813  				continue
   814  			}
   815  
   816  			// Ensure that any new status, or mismatched status, or pod that is ready for
   817  			// deletion gets updated. If a status update fails we retry the next time any
   818  			// other pod is updated.
   819  			if m.needsUpdate(types.UID(uidOfStatus), status) {
   820  				updatedStatuses = append(updatedStatuses, podSync{uid, uidOfStatus, status})
   821  			} else if m.needsReconcile(uid, status.status) {
   822  				// Delete the apiStatusVersions here to force an update on the pod status
   823  				// In most cases the deleted apiStatusVersions here should be filled
   824  				// soon after the following syncPod() [If the syncPod() sync an update
   825  				// successfully].
   826  				delete(m.apiStatusVersions, uidOfStatus)
   827  				updatedStatuses = append(updatedStatuses, podSync{uid, uidOfStatus, status})
   828  			}
   829  		}
   830  	}()
   831  
   832  	for _, update := range updatedStatuses {
   833  		klog.V(5).InfoS("Sync pod status", "podUID", update.podUID, "statusUID", update.statusUID, "version", update.status.version)
   834  		m.syncPod(update.podUID, update.status)
   835  	}
   836  
   837  	return len(updatedStatuses)
   838  }
   839  
   840  // syncPod syncs the given status with the API server. The caller must not hold the status lock.
   841  func (m *manager) syncPod(uid types.UID, status versionedPodStatus) {
   842  	// TODO: make me easier to express from client code
   843  	pod, err := m.kubeClient.CoreV1().Pods(status.podNamespace).Get(context.TODO(), status.podName, metav1.GetOptions{})
   844  	if errors.IsNotFound(err) {
   845  		klog.V(3).InfoS("Pod does not exist on the server",
   846  			"podUID", uid,
   847  			"pod", klog.KRef(status.podNamespace, status.podName))
   848  		// If the Pod is deleted the status will be cleared in
   849  		// RemoveOrphanedStatuses, so we just ignore the update here.
   850  		return
   851  	}
   852  	if err != nil {
   853  		klog.InfoS("Failed to get status for pod",
   854  			"podUID", uid,
   855  			"pod", klog.KRef(status.podNamespace, status.podName),
   856  			"err", err)
   857  		return
   858  	}
   859  
   860  	translatedUID := m.podManager.TranslatePodUID(pod.UID)
   861  	// Type convert original uid just for the purpose of comparison.
   862  	if len(translatedUID) > 0 && translatedUID != kubetypes.ResolvedPodUID(uid) {
   863  		klog.V(2).InfoS("Pod was deleted and then recreated, skipping status update",
   864  			"pod", klog.KObj(pod),
   865  			"oldPodUID", uid,
   866  			"podUID", translatedUID)
   867  		m.deletePodStatus(uid)
   868  		return
   869  	}
   870  
   871  	mergedStatus := mergePodStatus(pod.Status, status.status, m.podDeletionSafety.PodCouldHaveRunningContainers(pod))
   872  
   873  	newPod, patchBytes, unchanged, err := statusutil.PatchPodStatus(context.TODO(), m.kubeClient, pod.Namespace, pod.Name, pod.UID, pod.Status, mergedStatus)
   874  	klog.V(3).InfoS("Patch status for pod", "pod", klog.KObj(pod), "podUID", uid, "patch", string(patchBytes))
   875  
   876  	if err != nil {
   877  		klog.InfoS("Failed to update status for pod", "pod", klog.KObj(pod), "err", err)
   878  		return
   879  	}
   880  	if unchanged {
   881  		klog.V(3).InfoS("Status for pod is up-to-date", "pod", klog.KObj(pod), "statusVersion", status.version)
   882  	} else {
   883  		klog.V(3).InfoS("Status for pod updated successfully", "pod", klog.KObj(pod), "statusVersion", status.version, "status", mergedStatus)
   884  		pod = newPod
   885  		// We pass a new object (result of API call which contains updated ResourceVersion)
   886  		m.podStartupLatencyHelper.RecordStatusUpdated(pod)
   887  	}
   888  
   889  	// measure how long the status update took to propagate from generation to update on the server
   890  	if status.at.IsZero() {
   891  		klog.V(3).InfoS("Pod had no status time set", "pod", klog.KObj(pod), "podUID", uid, "version", status.version)
   892  	} else {
   893  		duration := time.Since(status.at).Truncate(time.Millisecond)
   894  		metrics.PodStatusSyncDuration.Observe(duration.Seconds())
   895  	}
   896  
   897  	m.apiStatusVersions[kubetypes.MirrorPodUID(pod.UID)] = status.version
   898  
   899  	// We don't handle graceful deletion of mirror pods.
   900  	if m.canBeDeleted(pod, status.status, status.podIsFinished) {
   901  		deleteOptions := metav1.DeleteOptions{
   902  			GracePeriodSeconds: new(int64),
   903  			// Use the pod UID as the precondition for deletion to prevent deleting a
   904  			// newly created pod with the same name and namespace.
   905  			Preconditions: metav1.NewUIDPreconditions(string(pod.UID)),
   906  		}
   907  		err = m.kubeClient.CoreV1().Pods(pod.Namespace).Delete(context.TODO(), pod.Name, deleteOptions)
   908  		if err != nil {
   909  			klog.InfoS("Failed to delete status for pod", "pod", klog.KObj(pod), "err", err)
   910  			return
   911  		}
   912  		klog.V(3).InfoS("Pod fully terminated and removed from etcd", "pod", klog.KObj(pod))
   913  		m.deletePodStatus(uid)
   914  	}
   915  }
   916  
   917  // needsUpdate returns whether the status is stale for the given pod UID.
   918  // This method is not thread safe, and must only be accessed by the sync thread.
   919  func (m *manager) needsUpdate(uid types.UID, status versionedPodStatus) bool {
   920  	latest, ok := m.apiStatusVersions[kubetypes.MirrorPodUID(uid)]
   921  	if !ok || latest < status.version {
   922  		return true
   923  	}
   924  	pod, ok := m.podManager.GetPodByUID(uid)
   925  	if !ok {
   926  		return false
   927  	}
   928  	return m.canBeDeleted(pod, status.status, status.podIsFinished)
   929  }
   930  
   931  func (m *manager) canBeDeleted(pod *v1.Pod, status v1.PodStatus, podIsFinished bool) bool {
   932  	if pod.DeletionTimestamp == nil || kubetypes.IsMirrorPod(pod) {
   933  		return false
   934  	}
   935  	// Delay deletion of pods until the phase is terminal, based on pod.Status
   936  	// which comes from pod manager.
   937  	if !podutil.IsPodPhaseTerminal(pod.Status.Phase) {
   938  		// For debugging purposes we also log the kubelet's local phase, when the deletion is delayed.
   939  		klog.V(3).InfoS("Delaying pod deletion as the phase is non-terminal", "phase", pod.Status.Phase, "localPhase", status.Phase, "pod", klog.KObj(pod), "podUID", pod.UID)
   940  		return false
   941  	}
   942  	// If this is an update completing pod termination then we know the pod termination is finished.
   943  	if podIsFinished {
   944  		klog.V(3).InfoS("The pod termination is finished as SyncTerminatedPod completes its execution", "phase", pod.Status.Phase, "localPhase", status.Phase, "pod", klog.KObj(pod), "podUID", pod.UID)
   945  		return true
   946  	}
   947  	return false
   948  }
   949  
   950  // needsReconcile compares the given status with the status in the pod manager (which
   951  // in fact comes from apiserver), returns whether the status needs to be reconciled with
   952  // the apiserver. Now when pod status is inconsistent between apiserver and kubelet,
   953  // kubelet should forcibly send an update to reconcile the inconsistence, because kubelet
   954  // should be the source of truth of pod status.
   955  // NOTE(random-liu): It's simpler to pass in mirror pod uid and get mirror pod by uid, but
   956  // now the pod manager only supports getting mirror pod by static pod, so we have to pass
   957  // static pod uid here.
   958  // TODO(random-liu): Simplify the logic when mirror pod manager is added.
   959  func (m *manager) needsReconcile(uid types.UID, status v1.PodStatus) bool {
   960  	// The pod could be a static pod, so we should translate first.
   961  	pod, ok := m.podManager.GetPodByUID(uid)
   962  	if !ok {
   963  		klog.V(4).InfoS("Pod has been deleted, no need to reconcile", "podUID", string(uid))
   964  		return false
   965  	}
   966  	// If the pod is a static pod, we should check its mirror pod, because only status in mirror pod is meaningful to us.
   967  	if kubetypes.IsStaticPod(pod) {
   968  		mirrorPod, ok := m.podManager.GetMirrorPodByPod(pod)
   969  		if !ok {
   970  			klog.V(4).InfoS("Static pod has no corresponding mirror pod, no need to reconcile", "pod", klog.KObj(pod))
   971  			return false
   972  		}
   973  		pod = mirrorPod
   974  	}
   975  
   976  	podStatus := pod.Status.DeepCopy()
   977  	normalizeStatus(pod, podStatus)
   978  
   979  	if isPodStatusByKubeletEqual(podStatus, &status) {
   980  		// If the status from the source is the same with the cached status,
   981  		// reconcile is not needed. Just return.
   982  		return false
   983  	}
   984  	klog.V(3).InfoS("Pod status is inconsistent with cached status for pod, a reconciliation should be triggered",
   985  		"pod", klog.KObj(pod),
   986  		"statusDiff", cmp.Diff(podStatus, &status))
   987  
   988  	return true
   989  }
   990  
   991  // normalizeStatus normalizes nanosecond precision timestamps in podStatus
   992  // down to second precision (*RFC339NANO* -> *RFC3339*). This must be done
   993  // before comparing podStatus to the status returned by apiserver because
   994  // apiserver does not support RFC339NANO.
   995  // Related issue #15262/PR #15263 to move apiserver to RFC339NANO is closed.
   996  func normalizeStatus(pod *v1.Pod, status *v1.PodStatus) *v1.PodStatus {
   997  	bytesPerStatus := kubecontainer.MaxPodTerminationMessageLogLength
   998  	if containers := len(pod.Spec.Containers) + len(pod.Spec.InitContainers); containers > 0 {
   999  		bytesPerStatus = bytesPerStatus / containers
  1000  	}
  1001  	normalizeTimeStamp := func(t *metav1.Time) {
  1002  		*t = t.Rfc3339Copy()
  1003  	}
  1004  	normalizeContainerState := func(c *v1.ContainerState) {
  1005  		if c.Running != nil {
  1006  			normalizeTimeStamp(&c.Running.StartedAt)
  1007  		}
  1008  		if c.Terminated != nil {
  1009  			normalizeTimeStamp(&c.Terminated.StartedAt)
  1010  			normalizeTimeStamp(&c.Terminated.FinishedAt)
  1011  			if len(c.Terminated.Message) > bytesPerStatus {
  1012  				c.Terminated.Message = c.Terminated.Message[:bytesPerStatus]
  1013  			}
  1014  		}
  1015  	}
  1016  
  1017  	if status.StartTime != nil {
  1018  		normalizeTimeStamp(status.StartTime)
  1019  	}
  1020  	for i := range status.Conditions {
  1021  		condition := &status.Conditions[i]
  1022  		normalizeTimeStamp(&condition.LastProbeTime)
  1023  		normalizeTimeStamp(&condition.LastTransitionTime)
  1024  	}
  1025  
  1026  	// update container statuses
  1027  	for i := range status.ContainerStatuses {
  1028  		cstatus := &status.ContainerStatuses[i]
  1029  		normalizeContainerState(&cstatus.State)
  1030  		normalizeContainerState(&cstatus.LastTerminationState)
  1031  	}
  1032  	// Sort the container statuses, so that the order won't affect the result of comparison
  1033  	sort.Sort(kubetypes.SortedContainerStatuses(status.ContainerStatuses))
  1034  
  1035  	// update init container statuses
  1036  	for i := range status.InitContainerStatuses {
  1037  		cstatus := &status.InitContainerStatuses[i]
  1038  		normalizeContainerState(&cstatus.State)
  1039  		normalizeContainerState(&cstatus.LastTerminationState)
  1040  	}
  1041  	// Sort the container statuses, so that the order won't affect the result of comparison
  1042  	kubetypes.SortInitContainerStatuses(pod, status.InitContainerStatuses)
  1043  	return status
  1044  }
  1045  
  1046  // mergePodStatus merges oldPodStatus and newPodStatus to preserve where pod conditions
  1047  // not owned by kubelet and to ensure terminal phase transition only happens after all
  1048  // running containers have terminated. This method does not modify the old status.
  1049  func mergePodStatus(oldPodStatus, newPodStatus v1.PodStatus, couldHaveRunningContainers bool) v1.PodStatus {
  1050  	podConditions := make([]v1.PodCondition, 0, len(oldPodStatus.Conditions)+len(newPodStatus.Conditions))
  1051  
  1052  	for _, c := range oldPodStatus.Conditions {
  1053  		if !kubetypes.PodConditionByKubelet(c.Type) {
  1054  			podConditions = append(podConditions, c)
  1055  		}
  1056  	}
  1057  
  1058  	transitioningToTerminalPhase := !podutil.IsPodPhaseTerminal(oldPodStatus.Phase) && podutil.IsPodPhaseTerminal(newPodStatus.Phase)
  1059  
  1060  	for _, c := range newPodStatus.Conditions {
  1061  		if kubetypes.PodConditionByKubelet(c.Type) {
  1062  			podConditions = append(podConditions, c)
  1063  		} else if kubetypes.PodConditionSharedByKubelet(c.Type) {
  1064  			// we replace or append all the "shared by kubelet" conditions
  1065  			if c.Type == v1.DisruptionTarget {
  1066  				// guard the update of the DisruptionTarget condition with a check to ensure
  1067  				// it will only be sent once all containers have terminated and the phase
  1068  				// is terminal. This avoids sending an unnecessary patch request to add
  1069  				// the condition if the actual status phase transition is delayed.
  1070  				if transitioningToTerminalPhase && !couldHaveRunningContainers {
  1071  					// update the LastTransitionTime again here because the older transition
  1072  					// time set in updateStatusInternal is likely stale as sending of
  1073  					// the condition was delayed until all pod's containers have terminated.
  1074  					updateLastTransitionTime(&newPodStatus, &oldPodStatus, c.Type)
  1075  					if _, c := podutil.GetPodConditionFromList(newPodStatus.Conditions, c.Type); c != nil {
  1076  						// for shared conditions we update or append in podConditions
  1077  						podConditions = statusutil.ReplaceOrAppendPodCondition(podConditions, c)
  1078  					}
  1079  				}
  1080  			}
  1081  		}
  1082  	}
  1083  	newPodStatus.Conditions = podConditions
  1084  
  1085  	// ResourceClaimStatuses is not owned and not modified by kubelet.
  1086  	newPodStatus.ResourceClaimStatuses = oldPodStatus.ResourceClaimStatuses
  1087  
  1088  	// Delay transitioning a pod to a terminal status unless the pod is actually terminal.
  1089  	// The Kubelet should never transition a pod to terminal status that could have running
  1090  	// containers and thus actively be leveraging exclusive resources. Note that resources
  1091  	// like volumes are reconciled by a subsystem in the Kubelet and will converge if a new
  1092  	// pod reuses an exclusive resource (unmount -> free -> mount), which means we do not
  1093  	// need wait for those resources to be detached by the Kubelet. In general, resources
  1094  	// the Kubelet exclusively owns must be released prior to a pod being reported terminal,
  1095  	// while resources that have participanting components above the API use the pod's
  1096  	// transition to a terminal phase (or full deletion) to release those resources.
  1097  	if transitioningToTerminalPhase {
  1098  		if couldHaveRunningContainers {
  1099  			newPodStatus.Phase = oldPodStatus.Phase
  1100  			newPodStatus.Reason = oldPodStatus.Reason
  1101  			newPodStatus.Message = oldPodStatus.Message
  1102  		}
  1103  	}
  1104  
  1105  	// If the new phase is terminal, explicitly set the ready condition to false for v1.PodReady and v1.ContainersReady.
  1106  	// It may take some time for kubelet to reconcile the ready condition, so explicitly set ready conditions to false if the phase is terminal.
  1107  	// This is done to ensure kubelet does not report a status update with terminal pod phase and ready=true.
  1108  	// See https://issues.k8s.io/108594 for more details.
  1109  	if podutil.IsPodPhaseTerminal(newPodStatus.Phase) {
  1110  		if podutil.IsPodReadyConditionTrue(newPodStatus) || podutil.IsContainersReadyConditionTrue(newPodStatus) {
  1111  			containersReadyCondition := generateContainersReadyConditionForTerminalPhase(newPodStatus.Phase)
  1112  			podutil.UpdatePodCondition(&newPodStatus, &containersReadyCondition)
  1113  
  1114  			podReadyCondition := generatePodReadyConditionForTerminalPhase(newPodStatus.Phase)
  1115  			podutil.UpdatePodCondition(&newPodStatus, &podReadyCondition)
  1116  		}
  1117  	}
  1118  
  1119  	return newPodStatus
  1120  }
  1121  
  1122  // NeedToReconcilePodReadiness returns if the pod "Ready" condition need to be reconcile
  1123  func NeedToReconcilePodReadiness(pod *v1.Pod) bool {
  1124  	if len(pod.Spec.ReadinessGates) == 0 {
  1125  		return false
  1126  	}
  1127  	podReadyCondition := GeneratePodReadyCondition(&pod.Spec, pod.Status.Conditions, pod.Status.ContainerStatuses, pod.Status.Phase)
  1128  	i, curCondition := podutil.GetPodConditionFromList(pod.Status.Conditions, v1.PodReady)
  1129  	// Only reconcile if "Ready" condition is present and Status or Message is not expected
  1130  	if i >= 0 && (curCondition.Status != podReadyCondition.Status || curCondition.Message != podReadyCondition.Message) {
  1131  		return true
  1132  	}
  1133  	return false
  1134  }