k8s.io/kubernetes@v1.29.3/pkg/kubelet/kuberuntime/kuberuntime_manager.go

k8s.io/kubernetes@v1.29.3/pkg/kubelet/kuberuntime/kuberuntime_manager.go (about)

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package kuberuntime
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"os"
    24  	"path/filepath"
    25  	"sort"
    26  	"time"
    27  
    28  	cadvisorapi "github.com/google/cadvisor/info/v1"
    29  	"github.com/google/go-cmp/cmp"
    30  	"go.opentelemetry.io/otel/trace"
    31  	crierror "k8s.io/cri-api/pkg/errors"
    32  	"k8s.io/klog/v2"
    33  
    34  	v1 "k8s.io/api/core/v1"
    35  	"k8s.io/apimachinery/pkg/api/resource"
    36  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    37  	kubetypes "k8s.io/apimachinery/pkg/types"
    38  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    39  	utilversion "k8s.io/apimachinery/pkg/util/version"
    40  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    41  	"k8s.io/client-go/tools/record"
    42  	ref "k8s.io/client-go/tools/reference"
    43  	"k8s.io/client-go/util/flowcontrol"
    44  	"k8s.io/component-base/logs/logreduction"
    45  	internalapi "k8s.io/cri-api/pkg/apis"
    46  	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
    47  
    48  	"k8s.io/kubernetes/pkg/api/legacyscheme"
    49  	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
    50  	"k8s.io/kubernetes/pkg/credentialprovider"
    51  	"k8s.io/kubernetes/pkg/credentialprovider/plugin"
    52  	"k8s.io/kubernetes/pkg/features"
    53  	"k8s.io/kubernetes/pkg/kubelet/cm"
    54  	kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
    55  	"k8s.io/kubernetes/pkg/kubelet/events"
    56  	"k8s.io/kubernetes/pkg/kubelet/images"
    57  	runtimeutil "k8s.io/kubernetes/pkg/kubelet/kuberuntime/util"
    58  	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
    59  	"k8s.io/kubernetes/pkg/kubelet/logs"
    60  	"k8s.io/kubernetes/pkg/kubelet/metrics"
    61  	proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
    62  	"k8s.io/kubernetes/pkg/kubelet/runtimeclass"
    63  	"k8s.io/kubernetes/pkg/kubelet/sysctl"
    64  	"k8s.io/kubernetes/pkg/kubelet/types"
    65  	"k8s.io/kubernetes/pkg/kubelet/util/cache"
    66  	"k8s.io/kubernetes/pkg/kubelet/util/format"
    67  	sc "k8s.io/kubernetes/pkg/securitycontext"
    68  )
    69  
    70  const (
    71  	// The api version of kubelet runtime api
    72  	kubeRuntimeAPIVersion = "0.1.0"
    73  	// The root directory for pod logs
    74  	podLogsRootDirectory = "/var/log/pods"
    75  	// A minimal shutdown window for avoiding unnecessary SIGKILLs
    76  	minimumGracePeriodInSeconds = 2
    77  
    78  	// The expiration time of version cache.
    79  	versionCacheTTL = 60 * time.Second
    80  	// How frequently to report identical errors
    81  	identicalErrorDelay = 1 * time.Minute
    82  	// OpenTelemetry instrumentation scope name
    83  	instrumentationScope = "k8s.io/kubernetes/pkg/kubelet/kuberuntime"
    84  )
    85  
    86  var (
    87  	// ErrVersionNotSupported is returned when the api version of runtime interface is not supported
    88  	ErrVersionNotSupported = errors.New("runtime api version is not supported")
    89  )
    90  
    91  // podStateProvider can determine if none of the elements are necessary to retain (pod content)
    92  // or if none of the runtime elements are necessary to retain (containers)
    93  type podStateProvider interface {
    94  	IsPodTerminationRequested(kubetypes.UID) bool
    95  	ShouldPodContentBeRemoved(kubetypes.UID) bool
    96  	ShouldPodRuntimeBeRemoved(kubetypes.UID) bool
    97  }
    98  
    99  type kubeGenericRuntimeManager struct {
   100  	runtimeName string
   101  	recorder    record.EventRecorder
   102  	osInterface kubecontainer.OSInterface
   103  
   104  	// machineInfo contains the machine information.
   105  	machineInfo *cadvisorapi.MachineInfo
   106  
   107  	// Container GC manager
   108  	containerGC *containerGC
   109  
   110  	// Keyring for pulling images
   111  	keyring credentialprovider.DockerKeyring
   112  
   113  	// Runner of lifecycle events.
   114  	runner kubecontainer.HandlerRunner
   115  
   116  	// RuntimeHelper that wraps kubelet to generate runtime container options.
   117  	runtimeHelper kubecontainer.RuntimeHelper
   118  
   119  	// Health check results.
   120  	livenessManager  proberesults.Manager
   121  	readinessManager proberesults.Manager
   122  	startupManager   proberesults.Manager
   123  
   124  	// If true, enforce container cpu limits with CFS quota support
   125  	cpuCFSQuota bool
   126  
   127  	// CPUCFSQuotaPeriod sets the CPU CFS quota period value, cpu.cfs_period_us, defaults to 100ms
   128  	cpuCFSQuotaPeriod metav1.Duration
   129  
   130  	// wrapped image puller.
   131  	imagePuller images.ImageManager
   132  
   133  	// gRPC service clients
   134  	runtimeService internalapi.RuntimeService
   135  	imageService   internalapi.ImageManagerService
   136  
   137  	// The version cache of runtime daemon.
   138  	versionCache *cache.ObjectCache
   139  
   140  	// The directory path for seccomp profiles.
   141  	seccompProfileRoot string
   142  
   143  	// Container management interface for pod container.
   144  	containerManager cm.ContainerManager
   145  
   146  	// Internal lifecycle event handlers for container resource management.
   147  	internalLifecycle cm.InternalContainerLifecycle
   148  
   149  	// Manage container logs.
   150  	logManager logs.ContainerLogManager
   151  
   152  	// Manage RuntimeClass resources.
   153  	runtimeClassManager *runtimeclass.Manager
   154  
   155  	// Cache last per-container error message to reduce log spam
   156  	logReduction *logreduction.LogReduction
   157  
   158  	// PodState provider instance
   159  	podStateProvider podStateProvider
   160  
   161  	// Use RuntimeDefault as the default seccomp profile for all workloads.
   162  	seccompDefault bool
   163  
   164  	// MemorySwapBehavior defines how swap is used
   165  	memorySwapBehavior string
   166  
   167  	//Function to get node allocatable resources
   168  	getNodeAllocatable func() v1.ResourceList
   169  
   170  	// Memory throttling factor for MemoryQoS
   171  	memoryThrottlingFactor float64
   172  }
   173  
   174  // KubeGenericRuntime is a interface contains interfaces for container runtime and command.
   175  type KubeGenericRuntime interface {
   176  	kubecontainer.Runtime
   177  	kubecontainer.StreamingRuntime
   178  	kubecontainer.CommandRunner
   179  }
   180  
   181  // NewKubeGenericRuntimeManager creates a new kubeGenericRuntimeManager
   182  func NewKubeGenericRuntimeManager(
   183  	recorder record.EventRecorder,
   184  	livenessManager proberesults.Manager,
   185  	readinessManager proberesults.Manager,
   186  	startupManager proberesults.Manager,
   187  	rootDirectory string,
   188  	machineInfo *cadvisorapi.MachineInfo,
   189  	podStateProvider podStateProvider,
   190  	osInterface kubecontainer.OSInterface,
   191  	runtimeHelper kubecontainer.RuntimeHelper,
   192  	insecureContainerLifecycleHTTPClient types.HTTPDoer,
   193  	imageBackOff *flowcontrol.Backoff,
   194  	serializeImagePulls bool,
   195  	maxParallelImagePulls *int32,
   196  	imagePullQPS float32,
   197  	imagePullBurst int,
   198  	imageCredentialProviderConfigFile string,
   199  	imageCredentialProviderBinDir string,
   200  	cpuCFSQuota bool,
   201  	cpuCFSQuotaPeriod metav1.Duration,
   202  	runtimeService internalapi.RuntimeService,
   203  	imageService internalapi.ImageManagerService,
   204  	containerManager cm.ContainerManager,
   205  	logManager logs.ContainerLogManager,
   206  	runtimeClassManager *runtimeclass.Manager,
   207  	seccompDefault bool,
   208  	memorySwapBehavior string,
   209  	getNodeAllocatable func() v1.ResourceList,
   210  	memoryThrottlingFactor float64,
   211  	podPullingTimeRecorder images.ImagePodPullingTimeRecorder,
   212  	tracerProvider trace.TracerProvider,
   213  ) (KubeGenericRuntime, error) {
   214  	ctx := context.Background()
   215  	runtimeService = newInstrumentedRuntimeService(runtimeService)
   216  	imageService = newInstrumentedImageManagerService(imageService)
   217  	tracer := tracerProvider.Tracer(instrumentationScope)
   218  	kubeRuntimeManager := &kubeGenericRuntimeManager{
   219  		recorder:               recorder,
   220  		cpuCFSQuota:            cpuCFSQuota,
   221  		cpuCFSQuotaPeriod:      cpuCFSQuotaPeriod,
   222  		seccompProfileRoot:     filepath.Join(rootDirectory, "seccomp"),
   223  		livenessManager:        livenessManager,
   224  		readinessManager:       readinessManager,
   225  		startupManager:         startupManager,
   226  		machineInfo:            machineInfo,
   227  		osInterface:            osInterface,
   228  		runtimeHelper:          runtimeHelper,
   229  		runtimeService:         runtimeService,
   230  		imageService:           imageService,
   231  		containerManager:       containerManager,
   232  		internalLifecycle:      containerManager.InternalContainerLifecycle(),
   233  		logManager:             logManager,
   234  		runtimeClassManager:    runtimeClassManager,
   235  		logReduction:           logreduction.NewLogReduction(identicalErrorDelay),
   236  		seccompDefault:         seccompDefault,
   237  		memorySwapBehavior:     memorySwapBehavior,
   238  		getNodeAllocatable:     getNodeAllocatable,
   239  		memoryThrottlingFactor: memoryThrottlingFactor,
   240  	}
   241  
   242  	typedVersion, err := kubeRuntimeManager.getTypedVersion(ctx)
   243  	if err != nil {
   244  		klog.ErrorS(err, "Get runtime version failed")
   245  		return nil, err
   246  	}
   247  
   248  	// Only matching kubeRuntimeAPIVersion is supported now
   249  	// TODO: Runtime API machinery is under discussion at https://github.com/kubernetes/kubernetes/issues/28642
   250  	if typedVersion.Version != kubeRuntimeAPIVersion {
   251  		klog.ErrorS(err, "This runtime api version is not supported",
   252  			"apiVersion", typedVersion.Version,
   253  			"supportedAPIVersion", kubeRuntimeAPIVersion)
   254  		return nil, ErrVersionNotSupported
   255  	}
   256  
   257  	kubeRuntimeManager.runtimeName = typedVersion.RuntimeName
   258  	klog.InfoS("Container runtime initialized",
   259  		"containerRuntime", typedVersion.RuntimeName,
   260  		"version", typedVersion.RuntimeVersion,
   261  		"apiVersion", typedVersion.RuntimeApiVersion)
   262  
   263  	// If the container logs directory does not exist, create it.
   264  	// TODO: create podLogsRootDirectory at kubelet.go when kubelet is refactored to
   265  	// new runtime interface
   266  	if _, err := osInterface.Stat(podLogsRootDirectory); os.IsNotExist(err) {
   267  		if err := osInterface.MkdirAll(podLogsRootDirectory, 0755); err != nil {
   268  			klog.ErrorS(err, "Failed to create pod log directory", "path", podLogsRootDirectory)
   269  		}
   270  	}
   271  
   272  	if imageCredentialProviderConfigFile != "" || imageCredentialProviderBinDir != "" {
   273  		if err := plugin.RegisterCredentialProviderPlugins(imageCredentialProviderConfigFile, imageCredentialProviderBinDir); err != nil {
   274  			klog.ErrorS(err, "Failed to register CRI auth plugins")
   275  			os.Exit(1)
   276  		}
   277  	}
   278  	kubeRuntimeManager.keyring = credentialprovider.NewDockerKeyring()
   279  
   280  	kubeRuntimeManager.imagePuller = images.NewImageManager(
   281  		kubecontainer.FilterEventRecorder(recorder),
   282  		kubeRuntimeManager,
   283  		imageBackOff,
   284  		serializeImagePulls,
   285  		maxParallelImagePulls,
   286  		imagePullQPS,
   287  		imagePullBurst,
   288  		podPullingTimeRecorder)
   289  	kubeRuntimeManager.runner = lifecycle.NewHandlerRunner(insecureContainerLifecycleHTTPClient, kubeRuntimeManager, kubeRuntimeManager, recorder)
   290  	kubeRuntimeManager.containerGC = newContainerGC(runtimeService, podStateProvider, kubeRuntimeManager, tracer)
   291  	kubeRuntimeManager.podStateProvider = podStateProvider
   292  
   293  	kubeRuntimeManager.versionCache = cache.NewObjectCache(
   294  		func() (interface{}, error) {
   295  			return kubeRuntimeManager.getTypedVersion(ctx)
   296  		},
   297  		versionCacheTTL,
   298  	)
   299  
   300  	return kubeRuntimeManager, nil
   301  }
   302  
   303  // Type returns the type of the container runtime.
   304  func (m *kubeGenericRuntimeManager) Type() string {
   305  	return m.runtimeName
   306  }
   307  
   308  func newRuntimeVersion(version string) (*utilversion.Version, error) {
   309  	if ver, err := utilversion.ParseSemantic(version); err == nil {
   310  		return ver, err
   311  	}
   312  	return utilversion.ParseGeneric(version)
   313  }
   314  
   315  func (m *kubeGenericRuntimeManager) getTypedVersion(ctx context.Context) (*runtimeapi.VersionResponse, error) {
   316  	typedVersion, err := m.runtimeService.Version(ctx, kubeRuntimeAPIVersion)
   317  	if err != nil {
   318  		return nil, fmt.Errorf("get remote runtime typed version failed: %v", err)
   319  	}
   320  	return typedVersion, nil
   321  }
   322  
   323  // Version returns the version information of the container runtime.
   324  func (m *kubeGenericRuntimeManager) Version(ctx context.Context) (kubecontainer.Version, error) {
   325  	typedVersion, err := m.getTypedVersion(ctx)
   326  	if err != nil {
   327  		return nil, err
   328  	}
   329  
   330  	return newRuntimeVersion(typedVersion.RuntimeVersion)
   331  }
   332  
   333  // APIVersion returns the cached API version information of the container
   334  // runtime. Implementation is expected to update this cache periodically.
   335  // This may be different from the runtime engine's version.
   336  func (m *kubeGenericRuntimeManager) APIVersion() (kubecontainer.Version, error) {
   337  	versionObject, err := m.versionCache.Get(m.machineInfo.MachineID)
   338  	if err != nil {
   339  		return nil, err
   340  	}
   341  	typedVersion := versionObject.(*runtimeapi.VersionResponse)
   342  
   343  	return newRuntimeVersion(typedVersion.RuntimeApiVersion)
   344  }
   345  
   346  // Status returns the status of the runtime. An error is returned if the Status
   347  // function itself fails, nil otherwise.
   348  func (m *kubeGenericRuntimeManager) Status(ctx context.Context) (*kubecontainer.RuntimeStatus, error) {
   349  	resp, err := m.runtimeService.Status(ctx, false)
   350  	if err != nil {
   351  		return nil, err
   352  	}
   353  	if resp.GetStatus() == nil {
   354  		return nil, errors.New("runtime status is nil")
   355  	}
   356  	return toKubeRuntimeStatus(resp.GetStatus()), nil
   357  }
   358  
   359  // GetPods returns a list of containers grouped by pods. The boolean parameter
   360  // specifies whether the runtime returns all containers including those already
   361  // exited and dead containers (used for garbage collection).
   362  func (m *kubeGenericRuntimeManager) GetPods(ctx context.Context, all bool) ([]*kubecontainer.Pod, error) {
   363  	pods := make(map[kubetypes.UID]*kubecontainer.Pod)
   364  	sandboxes, err := m.getKubeletSandboxes(ctx, all)
   365  	if err != nil {
   366  		return nil, err
   367  	}
   368  	for i := range sandboxes {
   369  		s := sandboxes[i]
   370  		if s.Metadata == nil {
   371  			klog.V(4).InfoS("Sandbox does not have metadata", "sandbox", s)
   372  			continue
   373  		}
   374  		podUID := kubetypes.UID(s.Metadata.Uid)
   375  		if _, ok := pods[podUID]; !ok {
   376  			pods[podUID] = &kubecontainer.Pod{
   377  				ID:        podUID,
   378  				Name:      s.Metadata.Name,
   379  				Namespace: s.Metadata.Namespace,
   380  			}
   381  		}
   382  		p := pods[podUID]
   383  		converted, err := m.sandboxToKubeContainer(s)
   384  		if err != nil {
   385  			klog.V(4).InfoS("Convert sandbox of pod failed", "runtimeName", m.runtimeName, "sandbox", s, "podUID", podUID, "err", err)
   386  			continue
   387  		}
   388  		p.Sandboxes = append(p.Sandboxes, converted)
   389  		p.CreatedAt = uint64(s.GetCreatedAt())
   390  	}
   391  
   392  	containers, err := m.getKubeletContainers(ctx, all)
   393  	if err != nil {
   394  		return nil, err
   395  	}
   396  	for i := range containers {
   397  		c := containers[i]
   398  		if c.Metadata == nil {
   399  			klog.V(4).InfoS("Container does not have metadata", "container", c)
   400  			continue
   401  		}
   402  
   403  		labelledInfo := getContainerInfoFromLabels(c.Labels)
   404  		pod, found := pods[labelledInfo.PodUID]
   405  		if !found {
   406  			pod = &kubecontainer.Pod{
   407  				ID:        labelledInfo.PodUID,
   408  				Name:      labelledInfo.PodName,
   409  				Namespace: labelledInfo.PodNamespace,
   410  			}
   411  			pods[labelledInfo.PodUID] = pod
   412  		}
   413  
   414  		converted, err := m.toKubeContainer(c)
   415  		if err != nil {
   416  			klog.V(4).InfoS("Convert container of pod failed", "runtimeName", m.runtimeName, "container", c, "podUID", labelledInfo.PodUID, "err", err)
   417  			continue
   418  		}
   419  
   420  		pod.Containers = append(pod.Containers, converted)
   421  	}
   422  
   423  	// Convert map to list.
   424  	var result []*kubecontainer.Pod
   425  	for _, pod := range pods {
   426  		result = append(result, pod)
   427  	}
   428  
   429  	// There are scenarios where multiple pods are running in parallel having
   430  	// the same name, because one of them have not been fully terminated yet.
   431  	// To avoid unexpected behavior on container name based search (for example
   432  	// by calling *Kubelet.findContainer() without specifying a pod ID), we now
   433  	// return the list of pods ordered by their creation time.
   434  	sort.SliceStable(result, func(i, j int) bool {
   435  		return result[i].CreatedAt > result[j].CreatedAt
   436  	})
   437  	klog.V(4).InfoS("Retrieved pods from runtime", "all", all)
   438  	return result, nil
   439  }
   440  
   441  // containerKillReason explains what killed a given container
   442  type containerKillReason string
   443  
   444  const (
   445  	reasonStartupProbe        containerKillReason = "StartupProbe"
   446  	reasonLivenessProbe       containerKillReason = "LivenessProbe"
   447  	reasonFailedPostStartHook containerKillReason = "FailedPostStartHook"
   448  	reasonUnknown             containerKillReason = "Unknown"
   449  )
   450  
   451  // containerToKillInfo contains necessary information to kill a container.
   452  type containerToKillInfo struct {
   453  	// The spec of the container.
   454  	container *v1.Container
   455  	// The name of the container.
   456  	name string
   457  	// The message indicates why the container will be killed.
   458  	message string
   459  	// The reason is a clearer source of info on why a container will be killed
   460  	// TODO: replace message with reason?
   461  	reason containerKillReason
   462  }
   463  
   464  // containerResources holds the set of resources applicable to the running container
   465  type containerResources struct {
   466  	memoryLimit   int64
   467  	memoryRequest int64
   468  	cpuLimit      int64
   469  	cpuRequest    int64
   470  }
   471  
   472  // containerToUpdateInfo contains necessary information to update a container's resources.
   473  type containerToUpdateInfo struct {
   474  	// Index of the container in pod.Spec.Containers that needs resource update
   475  	apiContainerIdx int
   476  	// ID of the runtime container that needs resource update
   477  	kubeContainerID kubecontainer.ContainerID
   478  	// Desired resources for the running container
   479  	desiredContainerResources containerResources
   480  	// Most recently configured resources on the running container
   481  	currentContainerResources *containerResources
   482  }
   483  
   484  // podActions keeps information what to do for a pod.
   485  type podActions struct {
   486  	// Stop all running (regular, init and ephemeral) containers and the sandbox for the pod.
   487  	KillPod bool
   488  	// Whether need to create a new sandbox. If needed to kill pod and create
   489  	// a new pod sandbox, all init containers need to be purged (i.e., removed).
   490  	CreateSandbox bool
   491  	// The id of existing sandbox. It is used for starting containers in ContainersToStart.
   492  	SandboxID string
   493  	// The attempt number of creating sandboxes for the pod.
   494  	Attempt uint32
   495  
   496  	// The next init container to start.
   497  	NextInitContainerToStart *v1.Container
   498  	// InitContainersToStart keeps a list of indexes for the init containers to
   499  	// start, where the index is the index of the specific init container in the
   500  	// pod spec (pod.Spec.InitContainers).
   501  	// NOTE: This is a field for SidecarContainers feature. Either this or
   502  	// NextInitContainerToStart will be set.
   503  	InitContainersToStart []int
   504  	// ContainersToStart keeps a list of indexes for the containers to start,
   505  	// where the index is the index of the specific container in the pod spec (
   506  	// pod.Spec.Containers).
   507  	ContainersToStart []int
   508  	// ContainersToKill keeps a map of containers that need to be killed, note that
   509  	// the key is the container ID of the container, while
   510  	// the value contains necessary information to kill a container.
   511  	ContainersToKill map[kubecontainer.ContainerID]containerToKillInfo
   512  	// EphemeralContainersToStart is a list of indexes for the ephemeral containers to start,
   513  	// where the index is the index of the specific container in pod.Spec.EphemeralContainers.
   514  	EphemeralContainersToStart []int
   515  	// ContainersToUpdate keeps a list of containers needing resource update.
   516  	// Container resource update is applicable only for CPU and memory.
   517  	ContainersToUpdate map[v1.ResourceName][]containerToUpdateInfo
   518  	// UpdatePodResources is true if container(s) need resource update with restart
   519  	UpdatePodResources bool
   520  }
   521  
   522  func (p podActions) String() string {
   523  	return fmt.Sprintf("KillPod: %t, CreateSandbox: %t, UpdatePodResources: %t, Attempt: %d, InitContainersToStart: %v, ContainersToStart: %v, EphemeralContainersToStart: %v,ContainersToUpdate: %v, ContainersToKill: %v",
   524  		p.KillPod, p.CreateSandbox, p.UpdatePodResources, p.Attempt, p.InitContainersToStart, p.ContainersToStart, p.EphemeralContainersToStart, p.ContainersToUpdate, p.ContainersToKill)
   525  }
   526  
   527  func containerChanged(container *v1.Container, containerStatus *kubecontainer.Status) (uint64, uint64, bool) {
   528  	expectedHash := kubecontainer.HashContainer(container)
   529  	return expectedHash, containerStatus.Hash, containerStatus.Hash != expectedHash
   530  }
   531  
   532  func shouldRestartOnFailure(pod *v1.Pod) bool {
   533  	return pod.Spec.RestartPolicy != v1.RestartPolicyNever
   534  }
   535  
   536  func containerSucceeded(c *v1.Container, podStatus *kubecontainer.PodStatus) bool {
   537  	cStatus := podStatus.FindContainerStatusByName(c.Name)
   538  	if cStatus == nil || cStatus.State == kubecontainer.ContainerStateRunning {
   539  		return false
   540  	}
   541  	return cStatus.ExitCode == 0
   542  }
   543  
   544  func isInPlacePodVerticalScalingAllowed(pod *v1.Pod) bool {
   545  	if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
   546  		return false
   547  	}
   548  	if types.IsStaticPod(pod) {
   549  		return false
   550  	}
   551  	return true
   552  }
   553  
   554  func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containerIdx int, kubeContainerStatus *kubecontainer.Status, changes *podActions) bool {
   555  	container := pod.Spec.Containers[containerIdx]
   556  	if container.Resources.Limits == nil || len(pod.Status.ContainerStatuses) == 0 {
   557  		return true
   558  	}
   559  
   560  	// Determine if the *running* container needs resource update by comparing v1.Spec.Resources (desired)
   561  	// with v1.Status.Resources / runtime.Status.Resources (last known actual).
   562  	// Proceed only when kubelet has accepted the resize a.k.a v1.Spec.Resources.Requests == v1.Status.AllocatedResources.
   563  	// Skip if runtime containerID doesn't match pod.Status containerID (container is restarting)
   564  	apiContainerStatus, exists := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name)
   565  	if !exists || apiContainerStatus.State.Running == nil || apiContainerStatus.Resources == nil ||
   566  		kubeContainerStatus.State != kubecontainer.ContainerStateRunning ||
   567  		kubeContainerStatus.ID.String() != apiContainerStatus.ContainerID ||
   568  		!cmp.Equal(container.Resources.Requests, apiContainerStatus.AllocatedResources) {
   569  		return true
   570  	}
   571  
   572  	desiredMemoryLimit := container.Resources.Limits.Memory().Value()
   573  	desiredCPULimit := container.Resources.Limits.Cpu().MilliValue()
   574  	desiredCPURequest := container.Resources.Requests.Cpu().MilliValue()
   575  	currentMemoryLimit := apiContainerStatus.Resources.Limits.Memory().Value()
   576  	currentCPULimit := apiContainerStatus.Resources.Limits.Cpu().MilliValue()
   577  	currentCPURequest := apiContainerStatus.Resources.Requests.Cpu().MilliValue()
   578  	// Runtime container status resources (from CRI), if set, supercedes v1(api) container status resrouces.
   579  	if kubeContainerStatus.Resources != nil {
   580  		if kubeContainerStatus.Resources.MemoryLimit != nil {
   581  			currentMemoryLimit = kubeContainerStatus.Resources.MemoryLimit.Value()
   582  		}
   583  		if kubeContainerStatus.Resources.CPULimit != nil {
   584  			currentCPULimit = kubeContainerStatus.Resources.CPULimit.MilliValue()
   585  		}
   586  		if kubeContainerStatus.Resources.CPURequest != nil {
   587  			currentCPURequest = kubeContainerStatus.Resources.CPURequest.MilliValue()
   588  		}
   589  	}
   590  
   591  	// Note: cgroup doesn't support memory request today, so we don't compare that. If canAdmitPod called  during
   592  	// handlePodResourcesResize finds 'fit', then desiredMemoryRequest == currentMemoryRequest.
   593  	if desiredMemoryLimit == currentMemoryLimit && desiredCPULimit == currentCPULimit && desiredCPURequest == currentCPURequest {
   594  		return true
   595  	}
   596  
   597  	desiredResources := containerResources{
   598  		memoryLimit:   desiredMemoryLimit,
   599  		memoryRequest: apiContainerStatus.AllocatedResources.Memory().Value(),
   600  		cpuLimit:      desiredCPULimit,
   601  		cpuRequest:    desiredCPURequest,
   602  	}
   603  	currentResources := containerResources{
   604  		memoryLimit:   currentMemoryLimit,
   605  		memoryRequest: apiContainerStatus.Resources.Requests.Memory().Value(),
   606  		cpuLimit:      currentCPULimit,
   607  		cpuRequest:    currentCPURequest,
   608  	}
   609  
   610  	resizePolicy := make(map[v1.ResourceName]v1.ResourceResizeRestartPolicy)
   611  	for _, pol := range container.ResizePolicy {
   612  		resizePolicy[pol.ResourceName] = pol.RestartPolicy
   613  	}
   614  	determineContainerResize := func(rName v1.ResourceName, specValue, statusValue int64) (resize, restart bool) {
   615  		if specValue == statusValue {
   616  			return false, false
   617  		}
   618  		if resizePolicy[rName] == v1.RestartContainer {
   619  			return true, true
   620  		}
   621  		return true, false
   622  	}
   623  	markContainerForUpdate := func(rName v1.ResourceName, specValue, statusValue int64) {
   624  		cUpdateInfo := containerToUpdateInfo{
   625  			apiContainerIdx:           containerIdx,
   626  			kubeContainerID:           kubeContainerStatus.ID,
   627  			desiredContainerResources: desiredResources,
   628  			currentContainerResources: &currentResources,
   629  		}
   630  		// Order the container updates such that resource decreases are applied before increases
   631  		switch {
   632  		case specValue > statusValue: // append
   633  			changes.ContainersToUpdate[rName] = append(changes.ContainersToUpdate[rName], cUpdateInfo)
   634  		case specValue < statusValue: // prepend
   635  			changes.ContainersToUpdate[rName] = append(changes.ContainersToUpdate[rName], containerToUpdateInfo{})
   636  			copy(changes.ContainersToUpdate[rName][1:], changes.ContainersToUpdate[rName])
   637  			changes.ContainersToUpdate[rName][0] = cUpdateInfo
   638  		}
   639  	}
   640  	resizeMemLim, restartMemLim := determineContainerResize(v1.ResourceMemory, desiredMemoryLimit, currentMemoryLimit)
   641  	resizeCPULim, restartCPULim := determineContainerResize(v1.ResourceCPU, desiredCPULimit, currentCPULimit)
   642  	resizeCPUReq, restartCPUReq := determineContainerResize(v1.ResourceCPU, desiredCPURequest, currentCPURequest)
   643  	if restartCPULim || restartCPUReq || restartMemLim {
   644  		// resize policy requires this container to restart
   645  		changes.ContainersToKill[kubeContainerStatus.ID] = containerToKillInfo{
   646  			name:      kubeContainerStatus.Name,
   647  			container: &pod.Spec.Containers[containerIdx],
   648  			message:   fmt.Sprintf("Container %s resize requires restart", container.Name),
   649  		}
   650  		changes.ContainersToStart = append(changes.ContainersToStart, containerIdx)
   651  		changes.UpdatePodResources = true
   652  		return false
   653  	} else {
   654  		if resizeMemLim {
   655  			markContainerForUpdate(v1.ResourceMemory, desiredMemoryLimit, currentMemoryLimit)
   656  		}
   657  		if resizeCPULim {
   658  			markContainerForUpdate(v1.ResourceCPU, desiredCPULimit, currentCPULimit)
   659  		} else if resizeCPUReq {
   660  			markContainerForUpdate(v1.ResourceCPU, desiredCPURequest, currentCPURequest)
   661  		}
   662  	}
   663  	return true
   664  }
   665  
   666  func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *kubecontainer.PodStatus, podContainerChanges podActions, result kubecontainer.PodSyncResult) {
   667  	pcm := m.containerManager.NewPodContainerManager()
   668  	//TODO(vinaykul,InPlacePodVerticalScaling): Figure out best way to get enforceMemoryQoS value (parameter #4 below) in platform-agnostic way
   669  	podResources := cm.ResourceConfigForPod(pod, m.cpuCFSQuota, uint64((m.cpuCFSQuotaPeriod.Duration)/time.Microsecond), false)
   670  	if podResources == nil {
   671  		klog.ErrorS(nil, "Unable to get resource configuration", "pod", pod.Name)
   672  		result.Fail(fmt.Errorf("Unable to get resource configuration processing resize for pod %s", pod.Name))
   673  		return
   674  	}
   675  	setPodCgroupConfig := func(rName v1.ResourceName, setLimitValue bool) error {
   676  		var err error
   677  		switch rName {
   678  		case v1.ResourceCPU:
   679  			podCpuResources := &cm.ResourceConfig{CPUPeriod: podResources.CPUPeriod}
   680  			if setLimitValue {
   681  				podCpuResources.CPUQuota = podResources.CPUQuota
   682  			} else {
   683  				podCpuResources.CPUShares = podResources.CPUShares
   684  			}
   685  			err = pcm.SetPodCgroupConfig(pod, rName, podCpuResources)
   686  		case v1.ResourceMemory:
   687  			err = pcm.SetPodCgroupConfig(pod, rName, podResources)
   688  		}
   689  		if err != nil {
   690  			klog.ErrorS(err, "Failed to set cgroup config", "resource", rName, "pod", pod.Name)
   691  		}
   692  		return err
   693  	}
   694  	// Memory and CPU are updated separately because memory resizes may be ordered differently than CPU resizes.
   695  	// If resize results in net pod resource increase, set pod cgroup config before resizing containers.
   696  	// If resize results in net pod resource decrease, set pod cgroup config after resizing containers.
   697  	// If an error occurs at any point, abort. Let future syncpod iterations retry the unfinished stuff.
   698  	resizeContainers := func(rName v1.ResourceName, currPodCgLimValue, newPodCgLimValue, currPodCgReqValue, newPodCgReqValue int64) error {
   699  		var err error
   700  		if newPodCgLimValue > currPodCgLimValue {
   701  			if err = setPodCgroupConfig(rName, true); err != nil {
   702  				return err
   703  			}
   704  		}
   705  		if newPodCgReqValue > currPodCgReqValue {
   706  			if err = setPodCgroupConfig(rName, false); err != nil {
   707  				return err
   708  			}
   709  		}
   710  		if len(podContainerChanges.ContainersToUpdate[rName]) > 0 {
   711  			if err = m.updatePodContainerResources(pod, rName, podContainerChanges.ContainersToUpdate[rName]); err != nil {
   712  				klog.ErrorS(err, "updatePodContainerResources failed", "pod", format.Pod(pod), "resource", rName)
   713  				return err
   714  			}
   715  		}
   716  		if newPodCgLimValue < currPodCgLimValue {
   717  			err = setPodCgroupConfig(rName, true)
   718  		}
   719  		if newPodCgReqValue < currPodCgReqValue {
   720  			if err = setPodCgroupConfig(rName, false); err != nil {
   721  				return err
   722  			}
   723  		}
   724  		return err
   725  	}
   726  	if len(podContainerChanges.ContainersToUpdate[v1.ResourceMemory]) > 0 || podContainerChanges.UpdatePodResources {
   727  		if podResources.Memory == nil {
   728  			klog.ErrorS(nil, "podResources.Memory is nil", "pod", pod.Name)
   729  			result.Fail(fmt.Errorf("podResources.Memory is nil for pod %s", pod.Name))
   730  			return
   731  		}
   732  		currentPodMemoryConfig, err := pcm.GetPodCgroupConfig(pod, v1.ResourceMemory)
   733  		if err != nil {
   734  			klog.ErrorS(err, "GetPodCgroupConfig for memory failed", "pod", pod.Name)
   735  			result.Fail(err)
   736  			return
   737  		}
   738  		currentPodMemoryUsage, err := pcm.GetPodCgroupMemoryUsage(pod)
   739  		if err != nil {
   740  			klog.ErrorS(err, "GetPodCgroupMemoryUsage failed", "pod", pod.Name)
   741  			result.Fail(err)
   742  			return
   743  		}
   744  		if currentPodMemoryUsage >= uint64(*podResources.Memory) {
   745  			klog.ErrorS(nil, "Aborting attempt to set pod memory limit less than current memory usage", "pod", pod.Name)
   746  			result.Fail(fmt.Errorf("Aborting attempt to set pod memory limit less than current memory usage for pod %s", pod.Name))
   747  			return
   748  		}
   749  		if errResize := resizeContainers(v1.ResourceMemory, int64(*currentPodMemoryConfig.Memory), *podResources.Memory, 0, 0); errResize != nil {
   750  			result.Fail(errResize)
   751  			return
   752  		}
   753  	}
   754  	if len(podContainerChanges.ContainersToUpdate[v1.ResourceCPU]) > 0 || podContainerChanges.UpdatePodResources {
   755  		if podResources.CPUQuota == nil || podResources.CPUShares == nil {
   756  			klog.ErrorS(nil, "podResources.CPUQuota or podResources.CPUShares is nil", "pod", pod.Name)
   757  			result.Fail(fmt.Errorf("podResources.CPUQuota or podResources.CPUShares is nil for pod %s", pod.Name))
   758  			return
   759  		}
   760  		currentPodCpuConfig, err := pcm.GetPodCgroupConfig(pod, v1.ResourceCPU)
   761  		if err != nil {
   762  			klog.ErrorS(err, "GetPodCgroupConfig for CPU failed", "pod", pod.Name)
   763  			result.Fail(err)
   764  			return
   765  		}
   766  		if errResize := resizeContainers(v1.ResourceCPU, *currentPodCpuConfig.CPUQuota, *podResources.CPUQuota,
   767  			int64(*currentPodCpuConfig.CPUShares), int64(*podResources.CPUShares)); errResize != nil {
   768  			result.Fail(errResize)
   769  			return
   770  		}
   771  	}
   772  }
   773  
   774  func (m *kubeGenericRuntimeManager) updatePodContainerResources(pod *v1.Pod, resourceName v1.ResourceName, containersToUpdate []containerToUpdateInfo) error {
   775  	klog.V(5).InfoS("Updating container resources", "pod", klog.KObj(pod))
   776  
   777  	for _, cInfo := range containersToUpdate {
   778  		container := pod.Spec.Containers[cInfo.apiContainerIdx].DeepCopy()
   779  		// If updating memory limit, use most recently configured CPU request and limit values.
   780  		// If updating CPU request and limit, use most recently configured memory request and limit values.
   781  		switch resourceName {
   782  		case v1.ResourceMemory:
   783  			container.Resources.Limits = v1.ResourceList{
   784  				v1.ResourceCPU:    *resource.NewMilliQuantity(cInfo.currentContainerResources.cpuLimit, resource.DecimalSI),
   785  				v1.ResourceMemory: *resource.NewQuantity(cInfo.desiredContainerResources.memoryLimit, resource.BinarySI),
   786  			}
   787  			container.Resources.Requests = v1.ResourceList{
   788  				v1.ResourceCPU:    *resource.NewMilliQuantity(cInfo.currentContainerResources.cpuRequest, resource.DecimalSI),
   789  				v1.ResourceMemory: *resource.NewQuantity(cInfo.desiredContainerResources.memoryRequest, resource.BinarySI),
   790  			}
   791  		case v1.ResourceCPU:
   792  			container.Resources.Limits = v1.ResourceList{
   793  				v1.ResourceCPU:    *resource.NewMilliQuantity(cInfo.desiredContainerResources.cpuLimit, resource.DecimalSI),
   794  				v1.ResourceMemory: *resource.NewQuantity(cInfo.currentContainerResources.memoryLimit, resource.BinarySI),
   795  			}
   796  			container.Resources.Requests = v1.ResourceList{
   797  				v1.ResourceCPU:    *resource.NewMilliQuantity(cInfo.desiredContainerResources.cpuRequest, resource.DecimalSI),
   798  				v1.ResourceMemory: *resource.NewQuantity(cInfo.currentContainerResources.memoryRequest, resource.BinarySI),
   799  			}
   800  		}
   801  		if err := m.updateContainerResources(pod, container, cInfo.kubeContainerID); err != nil {
   802  			// Log error and abort as container updates need to succeed in the order determined by computePodResizeAction.
   803  			// The recovery path is for SyncPod to keep retrying at later times until it succeeds.
   804  			klog.ErrorS(err, "updateContainerResources failed", "container", container.Name, "cID", cInfo.kubeContainerID,
   805  				"pod", format.Pod(pod), "resourceName", resourceName)
   806  			return err
   807  		}
   808  		// If UpdateContainerResources is error-free, it means desired values for 'resourceName' was accepted by runtime.
   809  		// So we update currentContainerResources for 'resourceName', which is our view of most recently configured resources.
   810  		// Note: We can't rely on GetPodStatus as runtime may lag in actuating the resource values it just accepted.
   811  		switch resourceName {
   812  		case v1.ResourceMemory:
   813  			cInfo.currentContainerResources.memoryLimit = cInfo.desiredContainerResources.memoryLimit
   814  			cInfo.currentContainerResources.memoryRequest = cInfo.desiredContainerResources.memoryRequest
   815  		case v1.ResourceCPU:
   816  			cInfo.currentContainerResources.cpuLimit = cInfo.desiredContainerResources.cpuLimit
   817  			cInfo.currentContainerResources.cpuRequest = cInfo.desiredContainerResources.cpuRequest
   818  		}
   819  	}
   820  	return nil
   821  }
   822  
   823  // computePodActions checks whether the pod spec has changed and returns the changes if true.
   824  func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus) podActions {
   825  	klog.V(5).InfoS("Syncing Pod", "pod", klog.KObj(pod))
   826  
   827  	createPodSandbox, attempt, sandboxID := runtimeutil.PodSandboxChanged(pod, podStatus)
   828  	changes := podActions{
   829  		KillPod:           createPodSandbox,
   830  		CreateSandbox:     createPodSandbox,
   831  		SandboxID:         sandboxID,
   832  		Attempt:           attempt,
   833  		ContainersToStart: []int{},
   834  		ContainersToKill:  make(map[kubecontainer.ContainerID]containerToKillInfo),
   835  	}
   836  
   837  	// If we need to (re-)create the pod sandbox, everything will need to be
   838  	// killed and recreated, and init containers should be purged.
   839  	if createPodSandbox {
   840  		if !shouldRestartOnFailure(pod) && attempt != 0 && len(podStatus.ContainerStatuses) != 0 {
   841  			// Should not restart the pod, just return.
   842  			// we should not create a sandbox, and just kill the pod if it is already done.
   843  			// if all containers are done and should not be started, there is no need to create a new sandbox.
   844  			// this stops confusing logs on pods whose containers all have exit codes, but we recreate a sandbox before terminating it.
   845  			//
   846  			// If ContainerStatuses is empty, we assume that we've never
   847  			// successfully created any containers. In this case, we should
   848  			// retry creating the sandbox.
   849  			changes.CreateSandbox = false
   850  			return changes
   851  		}
   852  
   853  		// Get the containers to start, excluding the ones that succeeded if RestartPolicy is OnFailure.
   854  		var containersToStart []int
   855  		for idx, c := range pod.Spec.Containers {
   856  			if pod.Spec.RestartPolicy == v1.RestartPolicyOnFailure && containerSucceeded(&c, podStatus) {
   857  				continue
   858  			}
   859  			containersToStart = append(containersToStart, idx)
   860  		}
   861  
   862  		// We should not create a sandbox, and just kill the pod if initialization
   863  		// is done and there is no container to start.
   864  		if len(containersToStart) == 0 {
   865  			hasInitialized := false
   866  			if !utilfeature.DefaultFeatureGate.Enabled(features.SidecarContainers) {
   867  				_, _, hasInitialized = findNextInitContainerToRun(pod, podStatus)
   868  			} else {
   869  				// If there is any regular container, it means all init containers have
   870  				// been initialized.
   871  				hasInitialized = hasAnyRegularContainerCreated(pod, podStatus)
   872  			}
   873  
   874  			if hasInitialized {
   875  				changes.CreateSandbox = false
   876  				return changes
   877  			}
   878  		}
   879  
   880  		// If we are creating a pod sandbox, we should restart from the initial
   881  		// state.
   882  		if len(pod.Spec.InitContainers) != 0 {
   883  			// Pod has init containers, return the first one.
   884  			if !utilfeature.DefaultFeatureGate.Enabled(features.SidecarContainers) {
   885  				changes.NextInitContainerToStart = &pod.Spec.InitContainers[0]
   886  			} else {
   887  				changes.InitContainersToStart = []int{0}
   888  			}
   889  
   890  			return changes
   891  		}
   892  		changes.ContainersToStart = containersToStart
   893  		return changes
   894  	}
   895  
   896  	// Ephemeral containers may be started even if initialization is not yet complete.
   897  	for i := range pod.Spec.EphemeralContainers {
   898  		c := (*v1.Container)(&pod.Spec.EphemeralContainers[i].EphemeralContainerCommon)
   899  
   900  		// Ephemeral Containers are never restarted
   901  		if podStatus.FindContainerStatusByName(c.Name) == nil {
   902  			changes.EphemeralContainersToStart = append(changes.EphemeralContainersToStart, i)
   903  		}
   904  	}
   905  
   906  	// Check initialization progress.
   907  	if !utilfeature.DefaultFeatureGate.Enabled(features.SidecarContainers) {
   908  		initLastStatus, next, done := findNextInitContainerToRun(pod, podStatus)
   909  		if !done {
   910  			if next != nil {
   911  				initFailed := initLastStatus != nil && isInitContainerFailed(initLastStatus)
   912  				if initFailed && !shouldRestartOnFailure(pod) {
   913  					changes.KillPod = true
   914  				} else {
   915  					// Always try to stop containers in unknown state first.
   916  					if initLastStatus != nil && initLastStatus.State == kubecontainer.ContainerStateUnknown {
   917  						changes.ContainersToKill[initLastStatus.ID] = containerToKillInfo{
   918  							name:      next.Name,
   919  							container: next,
   920  							message: fmt.Sprintf("Init container is in %q state, try killing it before restart",
   921  								initLastStatus.State),
   922  							reason: reasonUnknown,
   923  						}
   924  					}
   925  					changes.NextInitContainerToStart = next
   926  				}
   927  			}
   928  			// Initialization failed or still in progress. Skip inspecting non-init
   929  			// containers.
   930  			return changes
   931  		}
   932  	} else {
   933  		hasInitialized := m.computeInitContainerActions(pod, podStatus, &changes)
   934  		if changes.KillPod || !hasInitialized {
   935  			// Initialization failed or still in progress. Skip inspecting non-init
   936  			// containers.
   937  			return changes
   938  		}
   939  	}
   940  
   941  	if isInPlacePodVerticalScalingAllowed(pod) {
   942  		changes.ContainersToUpdate = make(map[v1.ResourceName][]containerToUpdateInfo)
   943  		latestPodStatus, err := m.GetPodStatus(ctx, podStatus.ID, pod.Name, pod.Namespace)
   944  		if err == nil {
   945  			podStatus = latestPodStatus
   946  		}
   947  	}
   948  
   949  	// Number of running containers to keep.
   950  	keepCount := 0
   951  	// check the status of containers.
   952  	for idx, container := range pod.Spec.Containers {
   953  		containerStatus := podStatus.FindContainerStatusByName(container.Name)
   954  
   955  		// Call internal container post-stop lifecycle hook for any non-running container so that any
   956  		// allocated cpus are released immediately. If the container is restarted, cpus will be re-allocated
   957  		// to it.
   958  		if containerStatus != nil && containerStatus.State != kubecontainer.ContainerStateRunning {
   959  			if err := m.internalLifecycle.PostStopContainer(containerStatus.ID.ID); err != nil {
   960  				klog.ErrorS(err, "Internal container post-stop lifecycle hook failed for container in pod with error",
   961  					"containerName", container.Name, "pod", klog.KObj(pod))
   962  			}
   963  		}
   964  
   965  		// If container does not exist, or is not running, check whether we
   966  		// need to restart it.
   967  		if containerStatus == nil || containerStatus.State != kubecontainer.ContainerStateRunning {
   968  			if kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) {
   969  				klog.V(3).InfoS("Container of pod is not in the desired state and shall be started", "containerName", container.Name, "pod", klog.KObj(pod))
   970  				changes.ContainersToStart = append(changes.ContainersToStart, idx)
   971  				if containerStatus != nil && containerStatus.State == kubecontainer.ContainerStateUnknown {
   972  					// If container is in unknown state, we don't know whether it
   973  					// is actually running or not, always try killing it before
   974  					// restart to avoid having 2 running instances of the same container.
   975  					changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
   976  						name:      containerStatus.Name,
   977  						container: &pod.Spec.Containers[idx],
   978  						message: fmt.Sprintf("Container is in %q state, try killing it before restart",
   979  							containerStatus.State),
   980  						reason: reasonUnknown,
   981  					}
   982  				}
   983  			}
   984  			continue
   985  		}
   986  		// The container is running, but kill the container if any of the following condition is met.
   987  		var message string
   988  		var reason containerKillReason
   989  		restart := shouldRestartOnFailure(pod)
   990  		// Do not restart if only the Resources field has changed with InPlacePodVerticalScaling enabled
   991  		if _, _, changed := containerChanged(&container, containerStatus); changed &&
   992  			(!isInPlacePodVerticalScalingAllowed(pod) ||
   993  				kubecontainer.HashContainerWithoutResources(&container) != containerStatus.HashWithoutResources) {
   994  			message = fmt.Sprintf("Container %s definition changed", container.Name)
   995  			// Restart regardless of the restart policy because the container
   996  			// spec changed.
   997  			restart = true
   998  		} else if liveness, found := m.livenessManager.Get(containerStatus.ID); found && liveness == proberesults.Failure {
   999  			// If the container failed the liveness probe, we should kill it.
  1000  			message = fmt.Sprintf("Container %s failed liveness probe", container.Name)
  1001  			reason = reasonLivenessProbe
  1002  		} else if startup, found := m.startupManager.Get(containerStatus.ID); found && startup == proberesults.Failure {
  1003  			// If the container failed the startup probe, we should kill it.
  1004  			message = fmt.Sprintf("Container %s failed startup probe", container.Name)
  1005  			reason = reasonStartupProbe
  1006  		} else if isInPlacePodVerticalScalingAllowed(pod) && !m.computePodResizeAction(pod, idx, containerStatus, &changes) {
  1007  			// computePodResizeAction updates 'changes' if resize policy requires restarting this container
  1008  			continue
  1009  		} else {
  1010  			// Keep the container.
  1011  			keepCount++
  1012  			continue
  1013  		}
  1014  
  1015  		// We need to kill the container, but if we also want to restart the
  1016  		// container afterwards, make the intent clear in the message. Also do
  1017  		// not kill the entire pod since we expect container to be running eventually.
  1018  		if restart {
  1019  			message = fmt.Sprintf("%s, will be restarted", message)
  1020  			changes.ContainersToStart = append(changes.ContainersToStart, idx)
  1021  		}
  1022  
  1023  		changes.ContainersToKill[containerStatus.ID] = containerToKillInfo{
  1024  			name:      containerStatus.Name,
  1025  			container: &pod.Spec.Containers[idx],
  1026  			message:   message,
  1027  			reason:    reason,
  1028  		}
  1029  		klog.V(2).InfoS("Message for Container of pod", "containerName", container.Name, "containerStatusID", containerStatus.ID, "pod", klog.KObj(pod), "containerMessage", message)
  1030  	}
  1031  
  1032  	if keepCount == 0 && len(changes.ContainersToStart) == 0 {
  1033  		changes.KillPod = true
  1034  		if utilfeature.DefaultFeatureGate.Enabled(features.SidecarContainers) {
  1035  			// To prevent the restartable init containers to keep pod alive, we should
  1036  			// not restart them.
  1037  			changes.InitContainersToStart = nil
  1038  		}
  1039  	}
  1040  
  1041  	return changes
  1042  }
  1043  
  1044  // SyncPod syncs the running pod into the desired pod by executing following steps:
  1045  //
  1046  //  1. Compute sandbox and container changes.
  1047  //  2. Kill pod sandbox if necessary.
  1048  //  3. Kill any containers that should not be running.
  1049  //  4. Create sandbox if necessary.
  1050  //  5. Create ephemeral containers.
  1051  //  6. Create init containers.
  1052  //  7. Resize running containers (if InPlacePodVerticalScaling==true)
  1053  //  8. Create normal containers.
  1054  func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
  1055  	// Step 1: Compute sandbox and container changes.
  1056  	podContainerChanges := m.computePodActions(ctx, pod, podStatus)
  1057  	klog.V(3).InfoS("computePodActions got for pod", "podActions", podContainerChanges, "pod", klog.KObj(pod))
  1058  	if podContainerChanges.CreateSandbox {
  1059  		ref, err := ref.GetReference(legacyscheme.Scheme, pod)
  1060  		if err != nil {
  1061  			klog.ErrorS(err, "Couldn't make a ref to pod", "pod", klog.KObj(pod))
  1062  		}
  1063  		if podContainerChanges.SandboxID != "" {
  1064  			m.recorder.Eventf(ref, v1.EventTypeNormal, events.SandboxChanged, "Pod sandbox changed, it will be killed and re-created.")
  1065  		} else {
  1066  			klog.V(4).InfoS("SyncPod received new pod, will create a sandbox for it", "pod", klog.KObj(pod))
  1067  		}
  1068  	}
  1069  
  1070  	// Step 2: Kill the pod if the sandbox has changed.
  1071  	if podContainerChanges.KillPod {
  1072  		if podContainerChanges.CreateSandbox {
  1073  			klog.V(4).InfoS("Stopping PodSandbox for pod, will start new one", "pod", klog.KObj(pod))
  1074  		} else {
  1075  			klog.V(4).InfoS("Stopping PodSandbox for pod, because all other containers are dead", "pod", klog.KObj(pod))
  1076  		}
  1077  
  1078  		killResult := m.killPodWithSyncResult(ctx, pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil)
  1079  		result.AddPodSyncResult(killResult)
  1080  		if killResult.Error() != nil {
  1081  			klog.ErrorS(killResult.Error(), "killPodWithSyncResult failed")
  1082  			return
  1083  		}
  1084  
  1085  		if podContainerChanges.CreateSandbox {
  1086  			m.purgeInitContainers(ctx, pod, podStatus)
  1087  		}
  1088  	} else {
  1089  		// Step 3: kill any running containers in this pod which are not to keep.
  1090  		for containerID, containerInfo := range podContainerChanges.ContainersToKill {
  1091  			klog.V(3).InfoS("Killing unwanted container for pod", "containerName", containerInfo.name, "containerID", containerID, "pod", klog.KObj(pod))
  1092  			killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerInfo.name)
  1093  			result.AddSyncResult(killContainerResult)
  1094  			if err := m.killContainer(ctx, pod, containerID, containerInfo.name, containerInfo.message, containerInfo.reason, nil, nil); err != nil {
  1095  				killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
  1096  				klog.ErrorS(err, "killContainer for pod failed", "containerName", containerInfo.name, "containerID", containerID, "pod", klog.KObj(pod))
  1097  				return
  1098  			}
  1099  		}
  1100  	}
  1101  
  1102  	// Keep terminated init containers fairly aggressively controlled
  1103  	// This is an optimization because container removals are typically handled
  1104  	// by container garbage collector.
  1105  	m.pruneInitContainersBeforeStart(ctx, pod, podStatus)
  1106  
  1107  	// We pass the value of the PRIMARY podIP and list of podIPs down to
  1108  	// generatePodSandboxConfig and generateContainerConfig, which in turn
  1109  	// passes it to various other functions, in order to facilitate functionality
  1110  	// that requires this value (hosts file and downward API) and avoid races determining
  1111  	// the pod IP in cases where a container requires restart but the
  1112  	// podIP isn't in the status manager yet. The list of podIPs is used to
  1113  	// generate the hosts file.
  1114  	//
  1115  	// We default to the IPs in the passed-in pod status, and overwrite them if the
  1116  	// sandbox needs to be (re)started.
  1117  	var podIPs []string
  1118  	if podStatus != nil {
  1119  		podIPs = podStatus.IPs
  1120  	}
  1121  
  1122  	// Step 4: Create a sandbox for the pod if necessary.
  1123  	podSandboxID := podContainerChanges.SandboxID
  1124  	if podContainerChanges.CreateSandbox {
  1125  		var msg string
  1126  		var err error
  1127  
  1128  		klog.V(4).InfoS("Creating PodSandbox for pod", "pod", klog.KObj(pod))
  1129  		metrics.StartedPodsTotal.Inc()
  1130  		createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
  1131  		result.AddSyncResult(createSandboxResult)
  1132  
  1133  		// ConvertPodSysctlsVariableToDotsSeparator converts sysctl variable
  1134  		// in the Pod.Spec.SecurityContext.Sysctls slice into a dot as a separator.
  1135  		// runc uses the dot as the separator to verify whether the sysctl variable
  1136  		// is correct in a separate namespace, so when using the slash as the sysctl
  1137  		// variable separator, runc returns an error: "sysctl is not in a separate kernel namespace"
  1138  		// and the podSandBox cannot be successfully created. Therefore, before calling runc,
  1139  		// we need to convert the sysctl variable, the dot is used as a separator to separate the kernel namespace.
  1140  		// When runc supports slash as sysctl separator, this function can no longer be used.
  1141  		sysctl.ConvertPodSysctlsVariableToDotsSeparator(pod.Spec.SecurityContext)
  1142  
  1143  		// Prepare resources allocated by the Dynammic Resource Allocation feature for the pod
  1144  		if utilfeature.DefaultFeatureGate.Enabled(features.DynamicResourceAllocation) {
  1145  			if err := m.runtimeHelper.PrepareDynamicResources(pod); err != nil {
  1146  				ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
  1147  				if referr != nil {
  1148  					klog.ErrorS(referr, "Couldn't make a ref to pod", "pod", klog.KObj(pod))
  1149  					return
  1150  				}
  1151  				m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedPrepareDynamicResources, "Failed to prepare dynamic resources: %v", err)
  1152  				klog.ErrorS(err, "Failed to prepare dynamic resources", "pod", klog.KObj(pod))
  1153  				return
  1154  			}
  1155  		}
  1156  
  1157  		podSandboxID, msg, err = m.createPodSandbox(ctx, pod, podContainerChanges.Attempt)
  1158  		if err != nil {
  1159  			// createPodSandbox can return an error from CNI, CSI,
  1160  			// or CRI if the Pod has been deleted while the POD is
  1161  			// being created. If the pod has been deleted then it's
  1162  			// not a real error.
  1163  			//
  1164  			// SyncPod can still be running when we get here, which
  1165  			// means the PodWorker has not acked the deletion.
  1166  			if m.podStateProvider.IsPodTerminationRequested(pod.UID) {
  1167  				klog.V(4).InfoS("Pod was deleted and sandbox failed to be created", "pod", klog.KObj(pod), "podUID", pod.UID)
  1168  				return
  1169  			}
  1170  			metrics.StartedPodsErrorsTotal.Inc()
  1171  			createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
  1172  			klog.ErrorS(err, "CreatePodSandbox for pod failed", "pod", klog.KObj(pod))
  1173  			ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
  1174  			if referr != nil {
  1175  				klog.ErrorS(referr, "Couldn't make a ref to pod", "pod", klog.KObj(pod))
  1176  			}
  1177  			m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedCreatePodSandBox, "Failed to create pod sandbox: %v", err)
  1178  			return
  1179  		}
  1180  		klog.V(4).InfoS("Created PodSandbox for pod", "podSandboxID", podSandboxID, "pod", klog.KObj(pod))
  1181  
  1182  		resp, err := m.runtimeService.PodSandboxStatus(ctx, podSandboxID, false)
  1183  		if err != nil {
  1184  			ref, referr := ref.GetReference(legacyscheme.Scheme, pod)
  1185  			if referr != nil {
  1186  				klog.ErrorS(referr, "Couldn't make a ref to pod", "pod", klog.KObj(pod))
  1187  			}
  1188  			m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedStatusPodSandBox, "Unable to get pod sandbox status: %v", err)
  1189  			klog.ErrorS(err, "Failed to get pod sandbox status; Skipping pod", "pod", klog.KObj(pod))
  1190  			result.Fail(err)
  1191  			return
  1192  		}
  1193  		if resp.GetStatus() == nil {
  1194  			result.Fail(errors.New("pod sandbox status is nil"))
  1195  			return
  1196  		}
  1197  
  1198  		// If we ever allow updating a pod from non-host-network to
  1199  		// host-network, we may use a stale IP.
  1200  		if !kubecontainer.IsHostNetworkPod(pod) {
  1201  			// Overwrite the podIPs passed in the pod status, since we just started the pod sandbox.
  1202  			podIPs = m.determinePodSandboxIPs(pod.Namespace, pod.Name, resp.GetStatus())
  1203  			klog.V(4).InfoS("Determined the ip for pod after sandbox changed", "IPs", podIPs, "pod", klog.KObj(pod))
  1204  		}
  1205  	}
  1206  
  1207  	// the start containers routines depend on pod ip(as in primary pod ip)
  1208  	// instead of trying to figure out if we have 0 < len(podIPs)
  1209  	// everytime, we short circuit it here
  1210  	podIP := ""
  1211  	if len(podIPs) != 0 {
  1212  		podIP = podIPs[0]
  1213  	}
  1214  
  1215  	// Get podSandboxConfig for containers to start.
  1216  	configPodSandboxResult := kubecontainer.NewSyncResult(kubecontainer.ConfigPodSandbox, podSandboxID)
  1217  	result.AddSyncResult(configPodSandboxResult)
  1218  	podSandboxConfig, err := m.generatePodSandboxConfig(pod, podContainerChanges.Attempt)
  1219  	if err != nil {
  1220  		message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err)
  1221  		klog.ErrorS(err, "GeneratePodSandboxConfig for pod failed", "pod", klog.KObj(pod))
  1222  		configPodSandboxResult.Fail(kubecontainer.ErrConfigPodSandbox, message)
  1223  		return
  1224  	}
  1225  
  1226  	// Helper containing boilerplate common to starting all types of containers.
  1227  	// typeName is a description used to describe this type of container in log messages,
  1228  	// currently: "container", "init container" or "ephemeral container"
  1229  	// metricLabel is the label used to describe this type of container in monitoring metrics.
  1230  	// currently: "container", "init_container" or "ephemeral_container"
  1231  	start := func(ctx context.Context, typeName, metricLabel string, spec *startSpec) error {
  1232  		startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, spec.container.Name)
  1233  		result.AddSyncResult(startContainerResult)
  1234  
  1235  		isInBackOff, msg, err := m.doBackOff(pod, spec.container, podStatus, backOff)
  1236  		if isInBackOff {
  1237  			startContainerResult.Fail(err, msg)
  1238  			klog.V(4).InfoS("Backing Off restarting container in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod))
  1239  			return err
  1240  		}
  1241  
  1242  		metrics.StartedContainersTotal.WithLabelValues(metricLabel).Inc()
  1243  		if sc.HasWindowsHostProcessRequest(pod, spec.container) {
  1244  			metrics.StartedHostProcessContainersTotal.WithLabelValues(metricLabel).Inc()
  1245  		}
  1246  		klog.V(4).InfoS("Creating container in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod))
  1247  		// NOTE (aramase) podIPs are populated for single stack and dual stack clusters. Send only podIPs.
  1248  		if msg, err := m.startContainer(ctx, podSandboxID, podSandboxConfig, spec, pod, podStatus, pullSecrets, podIP, podIPs); err != nil {
  1249  			// startContainer() returns well-defined error codes that have reasonable cardinality for metrics and are
  1250  			// useful to cluster administrators to distinguish "server errors" from "user errors".
  1251  			metrics.StartedContainersErrorsTotal.WithLabelValues(metricLabel, err.Error()).Inc()
  1252  			if sc.HasWindowsHostProcessRequest(pod, spec.container) {
  1253  				metrics.StartedHostProcessContainersErrorsTotal.WithLabelValues(metricLabel, err.Error()).Inc()
  1254  			}
  1255  			startContainerResult.Fail(err, msg)
  1256  			// known errors that are logged in other places are logged at higher levels here to avoid
  1257  			// repetitive log spam
  1258  			switch {
  1259  			case err == images.ErrImagePullBackOff:
  1260  				klog.V(3).InfoS("Container start failed in pod", "containerType", typeName, "container", spec.container, "pod", klog.KObj(pod), "containerMessage", msg, "err", err)
  1261  			default:
  1262  				utilruntime.HandleError(fmt.Errorf("%v %+v start failed in pod %v: %v: %s", typeName, spec.container, format.Pod(pod), err, msg))
  1263  			}
  1264  			return err
  1265  		}
  1266  
  1267  		return nil
  1268  	}
  1269  
  1270  	// Step 5: start ephemeral containers
  1271  	// These are started "prior" to init containers to allow running ephemeral containers even when there
  1272  	// are errors starting an init container. In practice init containers will start first since ephemeral
  1273  	// containers cannot be specified on pod creation.
  1274  	for _, idx := range podContainerChanges.EphemeralContainersToStart {
  1275  		start(ctx, "ephemeral container", metrics.EphemeralContainer, ephemeralContainerStartSpec(&pod.Spec.EphemeralContainers[idx]))
  1276  	}
  1277  
  1278  	if !utilfeature.DefaultFeatureGate.Enabled(features.SidecarContainers) {
  1279  		// Step 6: start the init container.
  1280  		if container := podContainerChanges.NextInitContainerToStart; container != nil {
  1281  			// Start the next init container.
  1282  			if err := start(ctx, "init container", metrics.InitContainer, containerStartSpec(container)); err != nil {
  1283  				return
  1284  			}
  1285  
  1286  			// Successfully started the container; clear the entry in the failure
  1287  			klog.V(4).InfoS("Completed init container for pod", "containerName", container.Name, "pod", klog.KObj(pod))
  1288  		}
  1289  	} else {
  1290  		// Step 6: start init containers.
  1291  		for _, idx := range podContainerChanges.InitContainersToStart {
  1292  			container := &pod.Spec.InitContainers[idx]
  1293  			// Start the next init container.
  1294  			if err := start(ctx, "init container", metrics.InitContainer, containerStartSpec(container)); err != nil {
  1295  				if types.IsRestartableInitContainer(container) {
  1296  					klog.V(4).InfoS("Failed to start the restartable init container for the pod, skipping", "initContainerName", container.Name, "pod", klog.KObj(pod))
  1297  					continue
  1298  				}
  1299  				klog.V(4).InfoS("Failed to initialize the pod, as the init container failed to start, aborting", "initContainerName", container.Name, "pod", klog.KObj(pod))
  1300  				return
  1301  			}
  1302  
  1303  			// Successfully started the container; clear the entry in the failure
  1304  			klog.V(4).InfoS("Completed init container for pod", "containerName", container.Name, "pod", klog.KObj(pod))
  1305  		}
  1306  	}
  1307  
  1308  	// Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources
  1309  	if isInPlacePodVerticalScalingAllowed(pod) {
  1310  		if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources {
  1311  			m.doPodResizeAction(pod, podStatus, podContainerChanges, result)
  1312  		}
  1313  	}
  1314  
  1315  	// Step 8: start containers in podContainerChanges.ContainersToStart.
  1316  	for _, idx := range podContainerChanges.ContainersToStart {
  1317  		start(ctx, "container", metrics.Container, containerStartSpec(&pod.Spec.Containers[idx]))
  1318  	}
  1319  
  1320  	return
  1321  }
  1322  
  1323  // If a container is still in backoff, the function will return a brief backoff error and
  1324  // a detailed error message.
  1325  func (m *kubeGenericRuntimeManager) doBackOff(pod *v1.Pod, container *v1.Container, podStatus *kubecontainer.PodStatus, backOff *flowcontrol.Backoff) (bool, string, error) {
  1326  	var cStatus *kubecontainer.Status
  1327  	for _, c := range podStatus.ContainerStatuses {
  1328  		if c.Name == container.Name && c.State == kubecontainer.ContainerStateExited {
  1329  			cStatus = c
  1330  			break
  1331  		}
  1332  	}
  1333  
  1334  	if cStatus == nil {
  1335  		return false, "", nil
  1336  	}
  1337  
  1338  	klog.V(3).InfoS("Checking backoff for container in pod", "containerName", container.Name, "pod", klog.KObj(pod))
  1339  	// Use the finished time of the latest exited container as the start point to calculate whether to do back-off.
  1340  	ts := cStatus.FinishedAt
  1341  	// backOff requires a unique key to identify the container.
  1342  	key := getStableKey(pod, container)
  1343  	if backOff.IsInBackOffSince(key, ts) {
  1344  		if containerRef, err := kubecontainer.GenerateContainerRef(pod, container); err == nil {
  1345  			m.recorder.Eventf(containerRef, v1.EventTypeWarning, events.BackOffStartContainer,
  1346  				fmt.Sprintf("Back-off restarting failed container %s in pod %s", container.Name, format.Pod(pod)))
  1347  		}
  1348  		err := fmt.Errorf("back-off %s restarting failed container=%s pod=%s", backOff.Get(key), container.Name, format.Pod(pod))
  1349  		klog.V(3).InfoS("Back-off restarting failed container", "err", err.Error())
  1350  		return true, err.Error(), kubecontainer.ErrCrashLoopBackOff
  1351  	}
  1352  
  1353  	backOff.Next(key, ts)
  1354  	return false, "", nil
  1355  }
  1356  
  1357  // KillPod kills all the containers of a pod. Pod may be nil, running pod must not be.
  1358  // gracePeriodOverride if specified allows the caller to override the pod default grace period.
  1359  // only hard kill paths are allowed to specify a gracePeriodOverride in the kubelet in order to not corrupt user data.
  1360  // it is useful when doing SIGKILL for hard eviction scenarios, or max grace period during soft eviction scenarios.
  1361  func (m *kubeGenericRuntimeManager) KillPod(ctx context.Context, pod *v1.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) error {
  1362  	err := m.killPodWithSyncResult(ctx, pod, runningPod, gracePeriodOverride)
  1363  	return err.Error()
  1364  }
  1365  
  1366  // killPodWithSyncResult kills a runningPod and returns SyncResult.
  1367  // Note: The pod passed in could be *nil* when kubelet restarted.
  1368  func (m *kubeGenericRuntimeManager) killPodWithSyncResult(ctx context.Context, pod *v1.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) (result kubecontainer.PodSyncResult) {
  1369  	killContainerResults := m.killContainersWithSyncResult(ctx, pod, runningPod, gracePeriodOverride)
  1370  	for _, containerResult := range killContainerResults {
  1371  		result.AddSyncResult(containerResult)
  1372  	}
  1373  
  1374  	// stop sandbox, the sandbox will be removed in GarbageCollect
  1375  	killSandboxResult := kubecontainer.NewSyncResult(kubecontainer.KillPodSandbox, runningPod.ID)
  1376  	result.AddSyncResult(killSandboxResult)
  1377  	// Stop all sandboxes belongs to same pod
  1378  	for _, podSandbox := range runningPod.Sandboxes {
  1379  		if err := m.runtimeService.StopPodSandbox(ctx, podSandbox.ID.ID); err != nil && !crierror.IsNotFound(err) {
  1380  			killSandboxResult.Fail(kubecontainer.ErrKillPodSandbox, err.Error())
  1381  			klog.ErrorS(nil, "Failed to stop sandbox", "podSandboxID", podSandbox.ID)
  1382  		}
  1383  	}
  1384  
  1385  	return
  1386  }
  1387  
  1388  func (m *kubeGenericRuntimeManager) GeneratePodStatus(event *runtimeapi.ContainerEventResponse) (*kubecontainer.PodStatus, error) {
  1389  	podIPs := m.determinePodSandboxIPs(event.PodSandboxStatus.Metadata.Namespace, event.PodSandboxStatus.Metadata.Name, event.PodSandboxStatus)
  1390  
  1391  	kubeContainerStatuses := []*kubecontainer.Status{}
  1392  	for _, status := range event.ContainersStatuses {
  1393  		kubeContainerStatuses = append(kubeContainerStatuses, m.convertToKubeContainerStatus(status))
  1394  	}
  1395  
  1396  	sort.Sort(containerStatusByCreated(kubeContainerStatuses))
  1397  
  1398  	return &kubecontainer.PodStatus{
  1399  		ID:                kubetypes.UID(event.PodSandboxStatus.Metadata.Uid),
  1400  		Name:              event.PodSandboxStatus.Metadata.Name,
  1401  		Namespace:         event.PodSandboxStatus.Metadata.Namespace,
  1402  		IPs:               podIPs,
  1403  		SandboxStatuses:   []*runtimeapi.PodSandboxStatus{event.PodSandboxStatus},
  1404  		ContainerStatuses: kubeContainerStatuses,
  1405  	}, nil
  1406  }
  1407  
  1408  // GetPodStatus retrieves the status of the pod, including the
  1409  // information of all containers in the pod that are visible in Runtime.
  1410  func (m *kubeGenericRuntimeManager) GetPodStatus(ctx context.Context, uid kubetypes.UID, name, namespace string) (*kubecontainer.PodStatus, error) {
  1411  	// Now we retain restart count of container as a container label. Each time a container
  1412  	// restarts, pod will read the restart count from the registered dead container, increment
  1413  	// it to get the new restart count, and then add a label with the new restart count on
  1414  	// the newly started container.
  1415  	// However, there are some limitations of this method:
  1416  	//	1. When all dead containers were garbage collected, the container status could
  1417  	//	not get the historical value and would be *inaccurate*. Fortunately, the chance
  1418  	//	is really slim.
  1419  	//	2. When working with old version containers which have no restart count label,
  1420  	//	we can only assume their restart count is 0.
  1421  	// Anyhow, we only promised "best-effort" restart count reporting, we can just ignore
  1422  	// these limitations now.
  1423  	// TODO: move this comment to SyncPod.
  1424  	podSandboxIDs, err := m.getSandboxIDByPodUID(ctx, uid, nil)
  1425  	if err != nil {
  1426  		return nil, err
  1427  	}
  1428  
  1429  	pod := &v1.Pod{
  1430  		ObjectMeta: metav1.ObjectMeta{
  1431  			Name:      name,
  1432  			Namespace: namespace,
  1433  			UID:       uid,
  1434  		},
  1435  	}
  1436  
  1437  	podFullName := format.Pod(pod)
  1438  
  1439  	klog.V(4).InfoS("getSandboxIDByPodUID got sandbox IDs for pod", "podSandboxID", podSandboxIDs, "pod", klog.KObj(pod))
  1440  
  1441  	sandboxStatuses := []*runtimeapi.PodSandboxStatus{}
  1442  	containerStatuses := []*kubecontainer.Status{}
  1443  	var timestamp time.Time
  1444  
  1445  	podIPs := []string{}
  1446  	for idx, podSandboxID := range podSandboxIDs {
  1447  		resp, err := m.runtimeService.PodSandboxStatus(ctx, podSandboxID, false)
  1448  		// Between List (getSandboxIDByPodUID) and check (PodSandboxStatus) another thread might remove a container, and that is normal.
  1449  		// The previous call (getSandboxIDByPodUID) never fails due to a pod sandbox not existing.
  1450  		// Therefore, this method should not either, but instead act as if the previous call failed,
  1451  		// which means the error should be ignored.
  1452  		if crierror.IsNotFound(err) {
  1453  			continue
  1454  		}
  1455  		if err != nil {
  1456  			klog.ErrorS(err, "PodSandboxStatus of sandbox for pod", "podSandboxID", podSandboxID, "pod", klog.KObj(pod))
  1457  			return nil, err
  1458  		}
  1459  		if resp.GetStatus() == nil {
  1460  			return nil, errors.New("pod sandbox status is nil")
  1461  
  1462  		}
  1463  		sandboxStatuses = append(sandboxStatuses, resp.Status)
  1464  		// Only get pod IP from latest sandbox
  1465  		if idx == 0 && resp.Status.State == runtimeapi.PodSandboxState_SANDBOX_READY {
  1466  			podIPs = m.determinePodSandboxIPs(namespace, name, resp.Status)
  1467  		}
  1468  
  1469  		if idx == 0 && utilfeature.DefaultFeatureGate.Enabled(features.EventedPLEG) {
  1470  			if resp.Timestamp == 0 {
  1471  				// If the Evented PLEG is enabled in the kubelet, but not in the runtime
  1472  				// then the pod status we get will not have the timestamp set.
  1473  				// e.g. CI job 'pull-kubernetes-e2e-gce-alpha-features' will runs with
  1474  				// features gate enabled, which includes Evented PLEG, but uses the
  1475  				// runtime without Evented PLEG support.
  1476  				klog.V(4).InfoS("Runtime does not set pod status timestamp", "pod", klog.KObj(pod))
  1477  				containerStatuses, err = m.getPodContainerStatuses(ctx, uid, name, namespace)
  1478  				if err != nil {
  1479  					if m.logReduction.ShouldMessageBePrinted(err.Error(), podFullName) {
  1480  						klog.ErrorS(err, "getPodContainerStatuses for pod failed", "pod", klog.KObj(pod))
  1481  					}
  1482  					return nil, err
  1483  				}
  1484  			} else {
  1485  				// Get the statuses of all containers visible to the pod and
  1486  				// timestamp from sandboxStatus.
  1487  				timestamp = time.Unix(resp.Timestamp, 0)
  1488  				for _, cs := range resp.ContainersStatuses {
  1489  					cStatus := m.convertToKubeContainerStatus(cs)
  1490  					containerStatuses = append(containerStatuses, cStatus)
  1491  				}
  1492  			}
  1493  		}
  1494  	}
  1495  
  1496  	if !utilfeature.DefaultFeatureGate.Enabled(features.EventedPLEG) {
  1497  		// Get statuses of all containers visible in the pod.
  1498  		containerStatuses, err = m.getPodContainerStatuses(ctx, uid, name, namespace)
  1499  		if err != nil {
  1500  			if m.logReduction.ShouldMessageBePrinted(err.Error(), podFullName) {
  1501  				klog.ErrorS(err, "getPodContainerStatuses for pod failed", "pod", klog.KObj(pod))
  1502  			}
  1503  			return nil, err
  1504  		}
  1505  	}
  1506  
  1507  	m.logReduction.ClearID(podFullName)
  1508  	return &kubecontainer.PodStatus{
  1509  		ID:                uid,
  1510  		Name:              name,
  1511  		Namespace:         namespace,
  1512  		IPs:               podIPs,
  1513  		SandboxStatuses:   sandboxStatuses,
  1514  		ContainerStatuses: containerStatuses,
  1515  		TimeStamp:         timestamp,
  1516  	}, nil
  1517  }
  1518  
  1519  // GarbageCollect removes dead containers using the specified container gc policy.
  1520  func (m *kubeGenericRuntimeManager) GarbageCollect(ctx context.Context, gcPolicy kubecontainer.GCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
  1521  	return m.containerGC.GarbageCollect(ctx, gcPolicy, allSourcesReady, evictNonDeletedPods)
  1522  }
  1523  
  1524  // UpdatePodCIDR is just a passthrough method to update the runtimeConfig of the shim
  1525  // with the podCIDR supplied by the kubelet.
  1526  func (m *kubeGenericRuntimeManager) UpdatePodCIDR(ctx context.Context, podCIDR string) error {
  1527  	// TODO(#35531): do we really want to write a method on this manager for each
  1528  	// field of the config?
  1529  	klog.InfoS("Updating runtime config through cri with podcidr", "CIDR", podCIDR)
  1530  	return m.runtimeService.UpdateRuntimeConfig(ctx,
  1531  		&runtimeapi.RuntimeConfig{
  1532  			NetworkConfig: &runtimeapi.NetworkConfig{
  1533  				PodCidr: podCIDR,
  1534  			},
  1535  		})
  1536  }
  1537  
  1538  func (m *kubeGenericRuntimeManager) CheckpointContainer(ctx context.Context, options *runtimeapi.CheckpointContainerRequest) error {
  1539  	return m.runtimeService.CheckpointContainer(ctx, options)
  1540  }
  1541  
  1542  func (m *kubeGenericRuntimeManager) ListMetricDescriptors(ctx context.Context) ([]*runtimeapi.MetricDescriptor, error) {
  1543  	return m.runtimeService.ListMetricDescriptors(ctx)
  1544  }
  1545  
  1546  func (m *kubeGenericRuntimeManager) ListPodSandboxMetrics(ctx context.Context) ([]*runtimeapi.PodSandboxMetrics, error) {
  1547  	return m.runtimeService.ListPodSandboxMetrics(ctx)
  1548  }